commit 4ad14c6ac9ad91827636f197abe7c2b4e955eb31 Author: Janaki Lahorani Date: Mon Jun 18 12:02:33 2018 -0700 HIVE-19940: PPD of predicates with deterministic UDFs with RBO diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java index b01a9bad7e65ac20f466b26a0287e9afdb74e37d..1c662d7cfb5b4a999d66b93ded266c8d8860d25a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java @@ -104,7 +104,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return false; } else { if (exp instanceof ExprNodeGenericFuncDesc) { - isCandidate = false; + if (isDeterministic((ExprNodeGenericFuncDesc) exp)) { + isCandidate = true; + } else { + isCandidate = false; + } } if (exp instanceof ExprNodeColumnDesc && ci == null) { ExprNodeColumnDesc column = (ExprNodeColumnDesc)exp; @@ -135,6 +139,30 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } + /** + * + * @param funcDesc function descriptor + * @return true if the function is deterministic false otherwise + */ + public static boolean isDeterministic(ExprNodeGenericFuncDesc funcDesc) { + if (FunctionRegistry.isConsistentWithinQuery(funcDesc.getGenericUDF())) { + // check whether the children or deterministic + for (ExprNodeDesc exprNodeDesc : funcDesc.getChildren()) { + if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) { + if (!isDeterministic((ExprNodeGenericFuncDesc) exprNodeDesc)) { + // some child is not deterministic - return false + return false; + } + } + } + // all children are deterministic - return true + return true; + } + + // function is not deterministic - return false + return false; + } + /** * FieldExprProcessor. * diff --git ql/src/test/queries/clientpositive/ppd_deterministic_expr.q ql/src/test/queries/clientpositive/ppd_deterministic_expr.q new file mode 100644 index 0000000000000000000000000000000000000000..47c88493c623b5aa4eb924efce03de9e1b1173fb --- /dev/null +++ ql/src/test/queries/clientpositive/ppd_deterministic_expr.q @@ -0,0 +1,143 @@ +set hive.auto.convert.join=false; +set hive.optimize.index.filter=true; +set hive.cbo.enable=false; + +CREATE TABLE `testb`( + `cola` string COMMENT '', + `colb` string COMMENT '', + `colc` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) + +STORED AS AVRO; + +CREATE TABLE `testa`( + `col1` string COMMENT '', + `col2` string COMMENT '', + `col3` string COMMENT '', + `col4` string COMMENT '', + `col5` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) +STORED AS AVRO; + +insert into testA partition (part1='US', part2='ABC', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testA partition (part1='UK', part2='DEF', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testA partition (part1='US', part2='DEF', part3='200') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testA partition (part1='CA', part2='ABC', part3='300') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testB partition (part1='CA', part2='ABC', part3='300') +values ('600', '700', 'abc'), ('601', '701', 'abcd'); + +insert into testB partition (part1='CA', part2='ABC', part3='400') +values ( '600', '700', 'abc'), ( '601', '701', 'abcd'); + +insert into testB partition (part1='UK', part2='PQR', part3='500') +values ('600', '700', 'abc'), ('601', '701', 'abcd'); + +insert into testB partition (part1='US', part2='DEF', part3='200') +values ( '600', '700', 'abc'), ('601', '701', 'abcd'); + +insert into testB partition (part1='US', part2='PQR', part3='123') +values ( '600', '700', 'abc'), ('601', '701', 'abcd'); + +-- views with deterministic functions +create view viewDeterministicUDFA partitioned on (vpart1, vpart2, vpart3) as select + cast(col1 as decimal(38,18)) as vcol1, + cast(col2 as decimal(38,18)) as vcol2, + cast(col3 as decimal(38,18)) as vcol3, + cast(col4 as decimal(38,18)) as vcol4, + cast(col5 as char(10)) as vcol5, + cast(part1 as char(2)) as vpart1, + cast(part2 as char(3)) as vpart2, + cast(part3 as char(3)) as vpart3 + from testa +where part1 in ('US', 'CA'); + +create view viewDeterministicUDFB partitioned on (vpart1, vpart2, vpart3) as select + cast(cola as decimal(38,18)) as vcolA, + cast(colb as decimal(38,18)) as vcolB, + cast(colc as char(10)) as vcolC, + cast(part1 as char(2)) as vpart1, + cast(part2 as char(3)) as vpart2, + cast(part3 as char(3)) as vpart3 + from testb +where part1 in ('US', 'CA'); + +-- views without function reference +create view viewNoUDFA partitioned on (part1, part2, part3) as select + cast(col1 as decimal(38,18)) as vcol1, + cast(col2 as decimal(38,18)) as vcol2, + cast(col3 as decimal(38,18)) as vcol3, + cast(col4 as decimal(38,18)) as vcol4, + cast(col5 as char(10)) as vcol5, + part1, + part2, + part3 + from testa +where part1 in ('US', 'CA'); + +create view viewNoUDFB partitioned on (part1, part2, part3) as select + cast(cola as decimal(38,18)) as vcolA, + cast(colb as decimal(38,18)) as vcolB, + cast(colc as char(10)) as vcolC, + part1, + part2, + part3 + from testb +where part1 in ('US', 'CA'); + +-- query referencing deterministic functions +explain +select vcol1, vcol2, vcol3, vcola, vcolb +from viewDeterministicUDFA a inner join viewDeterministicUDFB b +on a.vpart1 = b.vpart1 +and a.vpart2 = b.vpart2 +and a.vpart3 = b.vpart3 +and a.vpart1 = 'US' +and a.vpart2 = 'DEF' +and a.vpart3 = '200'; + +select vcol1, vcol2, vcol3, vcola, vcolb +from viewDeterministicUDFA a inner join viewDeterministicUDFB b +on a.vpart1 = b.vpart1 +and a.vpart2 = b.vpart2 +and a.vpart3 = b.vpart3 +and a.vpart1 = 'US' +and a.vpart2 = 'DEF' +and a.vpart3 = '200'; + +-- query with views referencing no udfs +explain +select vcol1, vcol2, vcol3, vcola, vcolb +from viewNoUDFA a inner join viewNoUDFB b +on a.part1 = b.part1 +and a.part2 = b.part2 +and a.part3 = b.part3 +and a.part1 = 'US' +and a.part2 = 'DEF' +and a.part3 = '200'; + +select vcol1, vcol2, vcol3, vcola, vcolb +from viewNoUDFA a inner join viewNoUDFB b +on a.part1 = b.part1 +and a.part2 = b.part2 +and a.part3 = b.part3 +and a.part1 = 'US' +and a.part2 = 'DEF' +and a.part3 = '200'; diff --git ql/src/test/queries/clientpositive/ppd_udf_col.q ql/src/test/queries/clientpositive/ppd_udf_col.q index 9c7d4fdc20dad955469c3baa688595c2961cad6b..ac2f861f4f3c6401b99d034810c3ed705e47e26b 100644 --- ql/src/test/queries/clientpositive/ppd_udf_col.q +++ ql/src/test/queries/clientpositive/ppd_udf_col.q @@ -48,3 +48,51 @@ EXPLAIN SELECT key,randum123, v10 FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a WHERE a.v10 <= 200; + +set hive.cbo.enable=false; + +EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1; + +EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20; + +EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3; + +EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200; + +set hive.ppd.remove.duplicatefilters=false; + +EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1; + +EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20; + +EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3; + +EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200; diff --git ql/src/test/results/clientpositive/llap/check_constraint.q.out ql/src/test/results/clientpositive/llap/check_constraint.q.out index 085b00315ccb4ad7fee2b3c43f1c7b9d78690a48..b1642314d748a11f4b352c2dd6b7b698f788055a 100644 --- ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -1819,20 +1819,17 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key < 10) and enforce_constraint((CAST( key AS decimal(5,2)) is not null and (CAST( key AS decimal(5,2)) >= CAST( UDFToInteger(key) AS decimal(5,2))) is not false))) (type: boolean) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(key) (type: int), CAST( key AS decimal(5,2)) (type: decimal(5,2)), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 34362 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: enforce_constraint((_col1 is not null and (_col1 >= CAST( _col0 AS decimal(5,2))) is not false)) (type: boolean) + Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: decimal(5,2)), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: decimal(5,2)), _col2 (type: string) Filter Operator predicate: ((key < 20) and (key > 10) and enforce_constraint(value is not null)) (type: boolean) Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index 7a880ddda7da57bc795eca963b417af99e5c7cf5..79b8cf3c650221f53e47d18f6a8f15c4f8870e66 100644 --- ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -3419,20 +3419,17 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key < 10) and enforce_constraint((UDFToInteger(key) is not null and value is not null))) (type: boolean) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(key) (type: int), CAST( key AS decimal(5,2)) (type: decimal(5,2)), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 34362 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: enforce_constraint((_col0 is not null and _col2 is not null)) (type: boolean) + Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 83 Data size: 17181 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: decimal(5,2)), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: decimal(5,2)), _col2 (type: string) Filter Operator predicate: ((key < 20) and (key > 10) and enforce_constraint(value is not null)) (type: boolean) Statistics: Num rows: 27 Data size: 4806 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/subquery_in.q.out ql/src/test/results/clientpositive/llap/subquery_in.q.out index f961402551d48d7297328021df58dedbdd2bc2f2..3cf7eee986499b6f8a0ca36fa1d3dc7bcb0416fa 100644 --- ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -4511,23 +4511,19 @@ STAGE PLANS: predicate: (_col1 is not null and _col2 is not null) (type: boolean) Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: (_col1 / _col2) (type: double), _col0 (type: int) + expressions: _col0 (type: int), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: double) + Group By Operator + keys: _col0 (type: int), _col1 (type: double) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int), _col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: double) Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 8a5ccf5aff839f6ab795d0a29bfce0ff5dd58205..a5f7de50880ce207598425d2a12416adc4973c23 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -1411,7 +1411,7 @@ POSTHOOK: Input: default@t1_v POSTHOOK: Output: database:default POSTHOOK: Output: default@T2_v POSTHOOK: Lineage: T2_v.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) @@ -1472,7 +1472,7 @@ STAGE PLANS: Select Operator expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 15272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -1564,7 +1564,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) PREHOOK: type: QUERY @@ -2417,7 +2417,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 26 -Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type) @@ -2540,16 +2540,24 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Group By Operator - keys: _col0 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: double) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: floor(_col1) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: @@ -2587,7 +2595,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -3092,7 +3100,7 @@ STAGE PLANS: Select Operator expressions: (UDFToDouble(p_type) + 2.0D) (type: double), p_brand (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: double), _col1 (type: string) mode: hash @@ -3806,7 +3814,7 @@ STAGE PLANS: Select Operator expressions: concat('v', value) (type: string), key (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 135500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col1 (type: string), _col0 (type: string) mode: hash diff --git ql/src/test/results/clientpositive/masking_disablecbo_2.q.out ql/src/test/results/clientpositive/masking_disablecbo_2.q.out index bb58b0864f94094593dac385e888aaebe2cdd795..d62bdf80c10e8cf8b760381c775131fcd4363f5f 100644 --- ql/src/test/results/clientpositive/masking_disablecbo_2.q.out +++ ql/src/test/results/clientpositive/masking_disablecbo_2.q.out @@ -556,12 +556,12 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 > 0) (type: boolean) + Filter Operator + predicate: (UDFToInteger(key) > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1101,21 +1101,18 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToInteger(key) > 0) and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col0 (type: int) TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1170,12 +1167,12 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: UDFToDouble(UDFToInteger(key)) is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(_col0) is not null (type: boolean) + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) @@ -1187,21 +1184,18 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(value) is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToInteger(key) > 0) and UDFToDouble(value) is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(key) (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col1) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col1) (type: double) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -1259,12 +1253,12 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 > 0) (type: boolean) + Filter Operator + predicate: (UDFToInteger(key) > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/perf/tez/query8.q.out ql/src/test/results/clientpositive/perf/tez/query8.q.out index e0742f2baca7a3f644ab1d1a7f42ba354283bfd5..cb1f5468f4f56ad6cfaea2ed7a8d84f0c551bbfb 100644 --- ql/src/test/results/clientpositive/perf/tez/query8.q.out +++ ql/src/test/results/clientpositive/perf/tez/query8.q.out @@ -231,127 +231,127 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_115] - Limit [LIM_114] (rows=100 width=88) + File Output Operator [FS_113] + Limit [LIM_112] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_113] (rows=348477374 width=88) + Select Operator [SEL_111] (rows=348477374 width=88) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] - Group By Operator [GBY_111] (rows=348477374 width=88) + SHUFFLE [RS_110] + Group By Operator [GBY_109] (rows=348477374 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col0 Group By Operator [GBY_56] (rows=696954748 width=88) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col8 - Merge Join Operator [MERGEJOIN_89] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_87] (rows=696954748 width=88) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col8"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_88] (rows=1874 width=1911) - Conds:RS_107.substr(_col0, 1, 2)=RS_110.substr(_col2, 1, 2)(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_86] (rows=1874 width=1911) + Conds:RS_105.substr(_col0, 1, 2)=RS_108.substr(_col2, 1, 2)(Inner),Output:["_col1","_col2"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] + SHUFFLE [RS_108] PartitionCols:substr(_col2, 1, 2) - Select Operator [SEL_109] (rows=1704 width=1910) + Select Operator [SEL_107] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_108] (rows=1704 width=1910) + Filter Operator [FIL_106] (rows=1704 width=1910) predicate:(s_store_sk is not null and substr(s_zip, 1, 2) is not null) TableScan [TS_42] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_zip"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] + SHUFFLE [RS_105] PartitionCols:substr(_col0, 1, 2) - Select Operator [SEL_106] (rows=1 width=1014) + Select Operator [SEL_104] (rows=1 width=1014) Output:["_col0"] - Filter Operator [FIL_105] (rows=1 width=1014) + Filter Operator [FIL_103] (rows=1 width=1014) predicate:(_col1 = 2L) - Group By Operator [GBY_104] (rows=6833333 width=1014) + Group By Operator [GBY_102] (rows=6833333 width=1014) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Union 9 [SIMPLE_EDGE] <-Reducer 15 [CONTAINS] vectorized - Reduce Output Operator [RS_136] + Reduce Output Operator [RS_134] PartitionCols:_col0 - Group By Operator [GBY_135] (rows=13666666 width=1014) + Group By Operator [GBY_133] (rows=13666666 width=1014) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_134] (rows=3666666 width=1014) + Group By Operator [GBY_132] (rows=3666666 width=1014) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] + SHUFFLE [RS_131] PartitionCols:_col0 - Group By Operator [GBY_132] (rows=7333333 width=1014) + Group By Operator [GBY_130] (rows=7333333 width=1014) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_131] (rows=7333333 width=1014) + Select Operator [SEL_129] (rows=7333333 width=1014) Output:["_col0"] - Filter Operator [FIL_130] (rows=7333333 width=1014) + Filter Operator [FIL_128] (rows=7333333 width=1014) predicate:(_col1 > 10L) - Group By Operator [GBY_129] (rows=22000000 width=1014) + Group By Operator [GBY_127] (rows=22000000 width=1014) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 Group By Operator [GBY_24] (rows=44000000 width=1014) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 - Merge Join Operator [MERGEJOIN_87] (rows=44000000 width=1014) - Conds:RS_125._col0=RS_128._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_85] (rows=44000000 width=1014) + Conds:RS_123._col0=RS_126._col0(Inner),Output:["_col1"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_125] + SHUFFLE [RS_123] PartitionCols:_col0 - Select Operator [SEL_124] (rows=40000000 width=1014) + Select Operator [SEL_122] (rows=40000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_123] (rows=40000000 width=1014) + Filter Operator [FIL_121] (rows=40000000 width=1014) predicate:(ca_address_sk is not null and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_14] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_zip"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] + SHUFFLE [RS_126] PartitionCols:_col0 - Select Operator [SEL_127] (rows=40000000 width=860) + Select Operator [SEL_125] (rows=40000000 width=860) Output:["_col0"] - Filter Operator [FIL_126] (rows=40000000 width=860) + Filter Operator [FIL_124] (rows=40000000 width=860) predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) TableScan [TS_17] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_current_addr_sk","c_preferred_cust_flag"] <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_122] + Reduce Output Operator [RS_120] PartitionCols:_col0 - Group By Operator [GBY_121] (rows=13666666 width=1014) + Group By Operator [GBY_119] (rows=13666666 width=1014) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_120] (rows=10000000 width=1014) + Group By Operator [GBY_118] (rows=10000000 width=1014) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] + SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_118] (rows=20000000 width=1014) + Group By Operator [GBY_116] (rows=20000000 width=1014) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_117] (rows=20000000 width=1014) + Select Operator [SEL_115] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_116] (rows=20000000 width=1014) + Filter Operator [FIL_114] (rows=20000000 width=1014) predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_6] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_zip"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_86] (rows=633595212 width=88) - Conds:RS_100._col0=RS_103._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_84] (rows=633595212 width=88) + Conds:RS_98._col0=RS_101._col0(Inner),Output:["_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_100] + SHUFFLE [RS_98] PartitionCols:_col0 - Select Operator [SEL_99] (rows=575995635 width=88) + Select Operator [SEL_97] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_98] (rows=575995635 width=88) + Filter Operator [FIL_96] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_103] + SHUFFLE [RS_101] PartitionCols:_col0 - Select Operator [SEL_102] (rows=18262 width=1119) + Select Operator [SEL_100] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_101] (rows=18262 width=1119) + Filter Operator [FIL_99] (rows=18262 width=1119) predicate:((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] diff --git ql/src/test/results/clientpositive/ppd_deterministic_expr.q.out ql/src/test/results/clientpositive/ppd_deterministic_expr.q.out new file mode 100644 index 0000000000000000000000000000000000000000..b96a0e23c2418ba7f154915fbba7777e7fd3775e --- /dev/null +++ ql/src/test/results/clientpositive/ppd_deterministic_expr.q.out @@ -0,0 +1,553 @@ +PREHOOK: query: CREATE TABLE `testb`( + `cola` string COMMENT '', + `colb` string COMMENT '', + `colc` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) + +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testb +POSTHOOK: query: CREATE TABLE `testb`( + `cola` string COMMENT '', + `colb` string COMMENT '', + `colc` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) + +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testb +PREHOOK: query: CREATE TABLE `testa`( + `col1` string COMMENT '', + `col2` string COMMENT '', + `col3` string COMMENT '', + `col4` string COMMENT '', + `col5` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testa +POSTHOOK: query: CREATE TABLE `testa`( + `col1` string COMMENT '', + `col2` string COMMENT '', + `col3` string COMMENT '', + `col4` string COMMENT '', + `col5` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testa +PREHOOK: query: insert into testA partition (part1='US', part2='ABC', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testa@part1=US/part2=ABC/part3=123 +POSTHOOK: query: insert into testA partition (part1='US', part2='ABC', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testa@part1=US/part2=ABC/part3=123 +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col1 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col2 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col3 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col4 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col5 SCRIPT [] +PREHOOK: query: insert into testA partition (part1='UK', part2='DEF', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testa@part1=UK/part2=DEF/part3=123 +POSTHOOK: query: insert into testA partition (part1='UK', part2='DEF', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testa@part1=UK/part2=DEF/part3=123 +POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col1 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col2 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col3 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col4 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col5 SCRIPT [] +PREHOOK: query: insert into testA partition (part1='US', part2='DEF', part3='200') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testa@part1=US/part2=DEF/part3=200 +POSTHOOK: query: insert into testA partition (part1='US', part2='DEF', part3='200') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testa@part1=US/part2=DEF/part3=200 +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col1 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col2 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col3 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col4 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col5 SCRIPT [] +PREHOOK: query: insert into testA partition (part1='CA', part2='ABC', part3='300') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testa@part1=CA/part2=ABC/part3=300 +POSTHOOK: query: insert into testA partition (part1='CA', part2='ABC', part3='300') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testa@part1=CA/part2=ABC/part3=300 +POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col1 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col2 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col3 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col4 SCRIPT [] +POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col5 SCRIPT [] +PREHOOK: query: insert into testB partition (part1='CA', part2='ABC', part3='300') +values ('600', '700', 'abc'), ('601', '701', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testb@part1=CA/part2=ABC/part3=300 +POSTHOOK: query: insert into testB partition (part1='CA', part2='ABC', part3='300') +values ('600', '700', 'abc'), ('601', '701', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testb@part1=CA/part2=ABC/part3=300 +POSTHOOK: Lineage: testb PARTITION(part1=CA,part2=ABC,part3=300).cola SCRIPT [] +POSTHOOK: Lineage: testb PARTITION(part1=CA,part2=ABC,part3=300).colb SCRIPT [] +POSTHOOK: Lineage: testb PARTITION(part1=CA,part2=ABC,part3=300).colc SCRIPT [] +PREHOOK: query: insert into testB partition (part1='CA', part2='ABC', part3='400') +values ( '600', '700', 'abc'), ( '601', '701', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testb@part1=CA/part2=ABC/part3=400 +POSTHOOK: query: insert into testB partition (part1='CA', part2='ABC', part3='400') +values ( '600', '700', 'abc'), ( '601', '701', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testb@part1=CA/part2=ABC/part3=400 +POSTHOOK: Lineage: testb PARTITION(part1=CA,part2=ABC,part3=400).cola SCRIPT [] +POSTHOOK: Lineage: testb PARTITION(part1=CA,part2=ABC,part3=400).colb SCRIPT [] +POSTHOOK: Lineage: testb PARTITION(part1=CA,part2=ABC,part3=400).colc SCRIPT [] +PREHOOK: query: insert into testB partition (part1='UK', part2='PQR', part3='500') +values ('600', '700', 'abc'), ('601', '701', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testb@part1=UK/part2=PQR/part3=500 +POSTHOOK: query: insert into testB partition (part1='UK', part2='PQR', part3='500') +values ('600', '700', 'abc'), ('601', '701', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testb@part1=UK/part2=PQR/part3=500 +POSTHOOK: Lineage: testb PARTITION(part1=UK,part2=PQR,part3=500).cola SCRIPT [] +POSTHOOK: Lineage: testb PARTITION(part1=UK,part2=PQR,part3=500).colb SCRIPT [] +POSTHOOK: Lineage: testb PARTITION(part1=UK,part2=PQR,part3=500).colc SCRIPT [] +PREHOOK: query: insert into testB partition (part1='US', part2='DEF', part3='200') +values ( '600', '700', 'abc'), ('601', '701', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testb@part1=US/part2=DEF/part3=200 +POSTHOOK: query: insert into testB partition (part1='US', part2='DEF', part3='200') +values ( '600', '700', 'abc'), ('601', '701', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testb@part1=US/part2=DEF/part3=200 +POSTHOOK: Lineage: testb PARTITION(part1=US,part2=DEF,part3=200).cola SCRIPT [] +POSTHOOK: Lineage: testb PARTITION(part1=US,part2=DEF,part3=200).colb SCRIPT [] +POSTHOOK: Lineage: testb PARTITION(part1=US,part2=DEF,part3=200).colc SCRIPT [] +PREHOOK: query: insert into testB partition (part1='US', part2='PQR', part3='123') +values ( '600', '700', 'abc'), ('601', '701', 'abcd') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@testb@part1=US/part2=PQR/part3=123 +POSTHOOK: query: insert into testB partition (part1='US', part2='PQR', part3='123') +values ( '600', '700', 'abc'), ('601', '701', 'abcd') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@testb@part1=US/part2=PQR/part3=123 +POSTHOOK: Lineage: testb PARTITION(part1=US,part2=PQR,part3=123).cola SCRIPT [] +POSTHOOK: Lineage: testb PARTITION(part1=US,part2=PQR,part3=123).colb SCRIPT [] +POSTHOOK: Lineage: testb PARTITION(part1=US,part2=PQR,part3=123).colc SCRIPT [] +PREHOOK: query: create view viewDeterministicUDFA partitioned on (vpart1, vpart2, vpart3) as select + cast(col1 as decimal(38,18)) as vcol1, + cast(col2 as decimal(38,18)) as vcol2, + cast(col3 as decimal(38,18)) as vcol3, + cast(col4 as decimal(38,18)) as vcol4, + cast(col5 as char(10)) as vcol5, + cast(part1 as char(2)) as vpart1, + cast(part2 as char(3)) as vpart2, + cast(part3 as char(3)) as vpart3 + from testa +where part1 in ('US', 'CA') +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@testa +PREHOOK: Output: database:default +PREHOOK: Output: default@viewDeterministicUDFA +POSTHOOK: query: create view viewDeterministicUDFA partitioned on (vpart1, vpart2, vpart3) as select + cast(col1 as decimal(38,18)) as vcol1, + cast(col2 as decimal(38,18)) as vcol2, + cast(col3 as decimal(38,18)) as vcol3, + cast(col4 as decimal(38,18)) as vcol4, + cast(col5 as char(10)) as vcol5, + cast(part1 as char(2)) as vpart1, + cast(part2 as char(3)) as vpart2, + cast(part3 as char(3)) as vpart3 + from testa +where part1 in ('US', 'CA') +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@testa +POSTHOOK: Output: database:default +POSTHOOK: Output: default@viewDeterministicUDFA +POSTHOOK: Lineage: viewDeterministicUDFA.vcol1 EXPRESSION [(testa)testa.FieldSchema(name:col1, type:string, comment:), ] +POSTHOOK: Lineage: viewDeterministicUDFA.vcol2 EXPRESSION [(testa)testa.FieldSchema(name:col2, type:string, comment:), ] +POSTHOOK: Lineage: viewDeterministicUDFA.vcol3 EXPRESSION [(testa)testa.FieldSchema(name:col3, type:string, comment:), ] +POSTHOOK: Lineage: viewDeterministicUDFA.vcol4 EXPRESSION [(testa)testa.FieldSchema(name:col4, type:string, comment:), ] +POSTHOOK: Lineage: viewDeterministicUDFA.vcol5 EXPRESSION [(testa)testa.FieldSchema(name:col5, type:string, comment:), ] +PREHOOK: query: create view viewDeterministicUDFB partitioned on (vpart1, vpart2, vpart3) as select + cast(cola as decimal(38,18)) as vcolA, + cast(colb as decimal(38,18)) as vcolB, + cast(colc as char(10)) as vcolC, + cast(part1 as char(2)) as vpart1, + cast(part2 as char(3)) as vpart2, + cast(part3 as char(3)) as vpart3 + from testb +where part1 in ('US', 'CA') +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@testb +PREHOOK: Output: database:default +PREHOOK: Output: default@viewDeterministicUDFB +POSTHOOK: query: create view viewDeterministicUDFB partitioned on (vpart1, vpart2, vpart3) as select + cast(cola as decimal(38,18)) as vcolA, + cast(colb as decimal(38,18)) as vcolB, + cast(colc as char(10)) as vcolC, + cast(part1 as char(2)) as vpart1, + cast(part2 as char(3)) as vpart2, + cast(part3 as char(3)) as vpart3 + from testb +where part1 in ('US', 'CA') +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@testb +POSTHOOK: Output: database:default +POSTHOOK: Output: default@viewDeterministicUDFB +POSTHOOK: Lineage: viewDeterministicUDFB.vcola EXPRESSION [(testb)testb.FieldSchema(name:cola, type:string, comment:), ] +POSTHOOK: Lineage: viewDeterministicUDFB.vcolb EXPRESSION [(testb)testb.FieldSchema(name:colb, type:string, comment:), ] +POSTHOOK: Lineage: viewDeterministicUDFB.vcolc EXPRESSION [(testb)testb.FieldSchema(name:colc, type:string, comment:), ] +PREHOOK: query: create view viewNoUDFA partitioned on (part1, part2, part3) as select + cast(col1 as decimal(38,18)) as vcol1, + cast(col2 as decimal(38,18)) as vcol2, + cast(col3 as decimal(38,18)) as vcol3, + cast(col4 as decimal(38,18)) as vcol4, + cast(col5 as char(10)) as vcol5, + part1, + part2, + part3 + from testa +where part1 in ('US', 'CA') +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@testa +PREHOOK: Output: database:default +PREHOOK: Output: default@viewNoUDFA +POSTHOOK: query: create view viewNoUDFA partitioned on (part1, part2, part3) as select + cast(col1 as decimal(38,18)) as vcol1, + cast(col2 as decimal(38,18)) as vcol2, + cast(col3 as decimal(38,18)) as vcol3, + cast(col4 as decimal(38,18)) as vcol4, + cast(col5 as char(10)) as vcol5, + part1, + part2, + part3 + from testa +where part1 in ('US', 'CA') +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@testa +POSTHOOK: Output: database:default +POSTHOOK: Output: default@viewNoUDFA +POSTHOOK: Lineage: viewNoUDFA.vcol1 EXPRESSION [(testa)testa.FieldSchema(name:col1, type:string, comment:), ] +POSTHOOK: Lineage: viewNoUDFA.vcol2 EXPRESSION [(testa)testa.FieldSchema(name:col2, type:string, comment:), ] +POSTHOOK: Lineage: viewNoUDFA.vcol3 EXPRESSION [(testa)testa.FieldSchema(name:col3, type:string, comment:), ] +POSTHOOK: Lineage: viewNoUDFA.vcol4 EXPRESSION [(testa)testa.FieldSchema(name:col4, type:string, comment:), ] +POSTHOOK: Lineage: viewNoUDFA.vcol5 EXPRESSION [(testa)testa.FieldSchema(name:col5, type:string, comment:), ] +PREHOOK: query: create view viewNoUDFB partitioned on (part1, part2, part3) as select + cast(cola as decimal(38,18)) as vcolA, + cast(colb as decimal(38,18)) as vcolB, + cast(colc as char(10)) as vcolC, + part1, + part2, + part3 + from testb +where part1 in ('US', 'CA') +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@testb +PREHOOK: Output: database:default +PREHOOK: Output: default@viewNoUDFB +POSTHOOK: query: create view viewNoUDFB partitioned on (part1, part2, part3) as select + cast(cola as decimal(38,18)) as vcolA, + cast(colb as decimal(38,18)) as vcolB, + cast(colc as char(10)) as vcolC, + part1, + part2, + part3 + from testb +where part1 in ('US', 'CA') +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@testb +POSTHOOK: Output: database:default +POSTHOOK: Output: default@viewNoUDFB +POSTHOOK: Lineage: viewNoUDFB.vcola EXPRESSION [(testb)testb.FieldSchema(name:cola, type:string, comment:), ] +POSTHOOK: Lineage: viewNoUDFB.vcolb EXPRESSION [(testb)testb.FieldSchema(name:colb, type:string, comment:), ] +POSTHOOK: Lineage: viewNoUDFB.vcolc EXPRESSION [(testb)testb.FieldSchema(name:colc, type:string, comment:), ] +PREHOOK: query: explain +select vcol1, vcol2, vcol3, vcola, vcolb +from viewDeterministicUDFA a inner join viewDeterministicUDFB b +on a.vpart1 = b.vpart1 +and a.vpart2 = b.vpart2 +and a.vpart3 = b.vpart3 +and a.vpart1 = 'US' +and a.vpart2 = 'DEF' +and a.vpart3 = '200' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select vcol1, vcol2, vcol3, vcola, vcolb +from viewDeterministicUDFA a inner join viewDeterministicUDFB b +on a.vpart1 = b.vpart1 +and a.vpart2 = b.vpart2 +and a.vpart3 = b.vpart3 +and a.vpart1 = 'US' +and a.vpart2 = 'DEF' +and a.vpart3 = '200' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: testa + filterExpr: (part1) IN ('US', 'CA') (type: boolean) + Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( col1 AS decimal(38,18)) (type: decimal(38,18)), CAST( col2 AS decimal(38,18)) (type: decimal(38,18)), CAST( col3 AS decimal(38,18)) (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 'US' (type: char(2)), 'DEF' (type: char(3)), '200' (type: char(3)) + sort order: +++ + Map-reduce partition columns: 'US' (type: char(2)), 'DEF' (type: char(3)), '200' (type: char(3)) + Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)) + TableScan + alias: testb + filterExpr: (part1) IN ('US', 'CA') (type: boolean) + Statistics: Num rows: 2 Data size: 3180 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( cola AS decimal(38,18)) (type: decimal(38,18)), CAST( colb AS decimal(38,18)) (type: decimal(38,18)), CAST( part1 AS CHAR(2)) (type: char(2)), CAST( part2 AS CHAR(3)) (type: char(3)) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 2 Data size: 3180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: char(2)), _col4 (type: char(3)), '200' (type: char(3)) + sort order: +++ + Map-reduce partition columns: _col3 (type: char(2)), _col4 (type: char(3)), '200' (type: char(3)) + Statistics: Num rows: 2 Data size: 3180 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: char(2)), _col6 (type: char(3)), _col7 (type: char(3)) + 1 _col3 (type: char(2)), _col4 (type: char(3)), _col5 (type: char(3)) + outputColumnNames: _col0, _col1, _col2, _col8, _col9 + Statistics: Num rows: 2 Data size: 5038 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col8 (type: decimal(38,18)), _col9 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 5038 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 5038 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select vcol1, vcol2, vcol3, vcola, vcolb +from viewDeterministicUDFA a inner join viewDeterministicUDFB b +on a.vpart1 = b.vpart1 +and a.vpart2 = b.vpart2 +and a.vpart3 = b.vpart3 +and a.vpart1 = 'US' +and a.vpart2 = 'DEF' +and a.vpart3 = '200' +PREHOOK: type: QUERY +PREHOOK: Input: default@testa +PREHOOK: Input: default@testa@part1=US/part2=DEF/part3=200 +PREHOOK: Input: default@testb +PREHOOK: Input: default@testb@part1=US/part2=DEF/part3=200 +PREHOOK: Input: default@viewdeterministicudfa +PREHOOK: Input: default@viewdeterministicudfb +#### A masked pattern was here #### +POSTHOOK: query: select vcol1, vcol2, vcol3, vcola, vcolb +from viewDeterministicUDFA a inner join viewDeterministicUDFB b +on a.vpart1 = b.vpart1 +and a.vpart2 = b.vpart2 +and a.vpart3 = b.vpart3 +and a.vpart1 = 'US' +and a.vpart2 = 'DEF' +and a.vpart3 = '200' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testa +POSTHOOK: Input: default@testa@part1=US/part2=DEF/part3=200 +POSTHOOK: Input: default@testb +POSTHOOK: Input: default@testb@part1=US/part2=DEF/part3=200 +POSTHOOK: Input: default@viewdeterministicudfa +POSTHOOK: Input: default@viewdeterministicudfb +#### A masked pattern was here #### +12.341000000000000000 1001.000000000000000000 2001.000000000000000000 601.000000000000000000 701.000000000000000000 +12.341000000000000000 1001.000000000000000000 2001.000000000000000000 600.000000000000000000 700.000000000000000000 +12.340000000000000000 100.000000000000000000 200.000000000000000000 601.000000000000000000 701.000000000000000000 +12.340000000000000000 100.000000000000000000 200.000000000000000000 600.000000000000000000 700.000000000000000000 +PREHOOK: query: explain +select vcol1, vcol2, vcol3, vcola, vcolb +from viewNoUDFA a inner join viewNoUDFB b +on a.part1 = b.part1 +and a.part2 = b.part2 +and a.part3 = b.part3 +and a.part1 = 'US' +and a.part2 = 'DEF' +and a.part3 = '200' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select vcol1, vcol2, vcol3, vcola, vcolb +from viewNoUDFA a inner join viewNoUDFB b +on a.part1 = b.part1 +and a.part2 = b.part2 +and a.part3 = b.part3 +and a.part1 = 'US' +and a.part2 = 'DEF' +and a.part3 = '200' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: testa + filterExpr: ((part1) IN ('US', 'CA') and (part1 = 'US') and (part2 = 'DEF') and (part3 = '200')) (type: boolean) + Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( col1 AS decimal(38,18)) (type: decimal(38,18)), CAST( col2 AS decimal(38,18)) (type: decimal(38,18)), CAST( col3 AS decimal(38,18)) (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 'US' (type: string), 'DEF' (type: string), '200' (type: string) + sort order: +++ + Map-reduce partition columns: 'US' (type: string), 'DEF' (type: string), '200' (type: string) + Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)) + TableScan + alias: testb + filterExpr: ((part1) IN ('US', 'CA') and (part3 = '200') and part1 is not null and part2 is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 3180 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( cola AS decimal(38,18)) (type: decimal(38,18)), CAST( colb AS decimal(38,18)) (type: decimal(38,18)), part1 (type: string), part2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 2 Data size: 3180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string), _col4 (type: string), '200' (type: string) + sort order: +++ + Map-reduce partition columns: _col3 (type: string), _col4 (type: string), '200' (type: string) + Statistics: Num rows: 2 Data size: 3180 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string), _col6 (type: string), _col7 (type: string) + 1 _col3 (type: string), _col4 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col8, _col9 + Statistics: Num rows: 2 Data size: 5038 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col8 (type: decimal(38,18)), _col9 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 5038 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 5038 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select vcol1, vcol2, vcol3, vcola, vcolb +from viewNoUDFA a inner join viewNoUDFB b +on a.part1 = b.part1 +and a.part2 = b.part2 +and a.part3 = b.part3 +and a.part1 = 'US' +and a.part2 = 'DEF' +and a.part3 = '200' +PREHOOK: type: QUERY +PREHOOK: Input: default@testa +PREHOOK: Input: default@testa@part1=US/part2=DEF/part3=200 +PREHOOK: Input: default@testb +PREHOOK: Input: default@testb@part1=US/part2=DEF/part3=200 +PREHOOK: Input: default@viewnoudfa +PREHOOK: Input: default@viewnoudfb +#### A masked pattern was here #### +POSTHOOK: query: select vcol1, vcol2, vcol3, vcola, vcolb +from viewNoUDFA a inner join viewNoUDFB b +on a.part1 = b.part1 +and a.part2 = b.part2 +and a.part3 = b.part3 +and a.part1 = 'US' +and a.part2 = 'DEF' +and a.part3 = '200' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testa +POSTHOOK: Input: default@testa@part1=US/part2=DEF/part3=200 +POSTHOOK: Input: default@testb +POSTHOOK: Input: default@testb@part1=US/part2=DEF/part3=200 +POSTHOOK: Input: default@viewnoudfa +POSTHOOK: Input: default@viewnoudfb +#### A masked pattern was here #### +12.341000000000000000 1001.000000000000000000 2001.000000000000000000 601.000000000000000000 701.000000000000000000 +12.341000000000000000 1001.000000000000000000 2001.000000000000000000 600.000000000000000000 700.000000000000000000 +12.340000000000000000 100.000000000000000000 200.000000000000000000 601.000000000000000000 701.000000000000000000 +12.340000000000000000 100.000000000000000000 200.000000000000000000 600.000000000000000000 700.000000000000000000 diff --git ql/src/test/results/clientpositive/ppd_udf_col.q.out ql/src/test/results/clientpositive/ppd_udf_col.q.out index 97ca3835930e050d517361f634b3f2e4a7d3842f..45aac1b560dc91fbeef21086752de76cc533c170 100644 --- ql/src/test/results/clientpositive/ppd_udf_col.q.out +++ ql/src/test/results/clientpositive/ppd_udf_col.q.out @@ -356,3 +356,406 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 <= 0.1) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 <= 0.1) and (_col2 > 0.1)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double), '4' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value * 10) <= 200.0D) and (key = 100)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double), (value * 10) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 <= 0.1) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 <= 0.1) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 0.1) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key = 100) and false) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), '4' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value * 10) <= 200.0D) and (key = 100)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double), (value * 10) (type: double) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 <= 200.0D) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/union_offcbo.q.out ql/src/test/results/clientpositive/union_offcbo.q.out index a723f00f7fcab3fc9146e02bf41c85b6410335eb..404d7f0ad98b66a3a3a8b2bf0a1cfb1965612b5d 100644 --- ql/src/test/results/clientpositive/union_offcbo.q.out +++ ql/src/test/results/clientpositive/union_offcbo.q.out @@ -286,21 +286,18 @@ STAGE PLANS: outputColumnNames: _col8, _col9, _col10, _col12, _col13, _col16, _col17, _col18, _col19 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((NVL(_col8,-1) <> NVL(_col18,-1)) or (NVL(_col9,-1) <> NVL(_col19,-1))) and _col18 is not null) (type: boolean) + predicate: (((NVL(_col8,-1) <> NVL(_col18,-1)) or (NVL(_col9,-1) <> NVL(_col19,-1))) and (CASE WHEN ((_col18 is not null and _col8 is null and (_col12 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col18 is not null and _col8 is null and (_col12 <= '2016-02-05'))) THEN ('RET') WHEN (((_col18 = _col8) and (_col19 <> _col9))) THEN ('A_INS') ELSE ('NA') END <> 'RET') and _col18 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col10 (type: bigint), _col16 (type: string), _col17 (type: bigint), _col13 (type: string), _col18 (type: string), _col19 (type: string), CASE WHEN ((_col18 is not null and _col8 is null and (_col12 >= '2016-02-05'))) THEN ('DEL') WHEN ((_col18 is not null and _col8 is null and (_col12 <= '2016-02-05'))) THEN ('RET') WHEN (((_col18 = _col8) and (_col19 <> _col9))) THEN ('A_INS') ELSE ('NA') END (type: string) outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (_col7 <> 'RET') (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -379,21 +376,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col6, _col7, _col8, _col9, _col11, _col18, _col19 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((NVL(_col8,-1) <> NVL(_col18,-1)) or (NVL(_col9,-1) <> NVL(_col19,-1))) and _col8 is not null) (type: boolean) + predicate: (((NVL(_col8,-1) <> NVL(_col18,-1)) or (NVL(_col9,-1) <> NVL(_col19,-1))) and (CASE WHEN ((_col18 is not null and _col8 is null and (_col11 <= _col1))) THEN ('DEL') WHEN (((_col18 is null and _col8 is not null) or ((_col18 = _col8) and (_col19 <> _col9)))) THEN ('INS') ELSE ('NA') END <> 'RET') and _col8 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col6 (type: string), _col7 (type: bigint), '2099-12-31' (type: string), _col8 (type: string), _col9 (type: string), CASE WHEN ((_col18 is not null and _col8 is null and (_col11 <= _col1))) THEN ('DEL') WHEN (((_col18 is null and _col8 is not null) or ((_col18 = _col8) and (_col19 <> _col9)))) THEN ('INS') ELSE ('NA') END (type: string) outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (_col7 <> 'RET') (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator