commit c301210fdf021d29cacd8faf8aaa8d47e753db4d Author: Janaki Lahorani Date: Mon Jun 18 12:02:33 2018 -0700 HIVE-19940: PPD of predicates with deterministic UDFs with RBO diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java index b01a9bad7e65ac20f466b26a0287e9afdb74e37d..1c662d7cfb5b4a999d66b93ded266c8d8860d25a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java @@ -104,7 +104,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return false; } else { if (exp instanceof ExprNodeGenericFuncDesc) { - isCandidate = false; + if (isDeterministic((ExprNodeGenericFuncDesc) exp)) { + isCandidate = true; + } else { + isCandidate = false; + } } if (exp instanceof ExprNodeColumnDesc && ci == null) { ExprNodeColumnDesc column = (ExprNodeColumnDesc)exp; @@ -135,6 +139,30 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } + /** + * + * @param funcDesc function descriptor + * @return true if the function is deterministic false otherwise + */ + public static boolean isDeterministic(ExprNodeGenericFuncDesc funcDesc) { + if (FunctionRegistry.isConsistentWithinQuery(funcDesc.getGenericUDF())) { + // check whether the children or deterministic + for (ExprNodeDesc exprNodeDesc : funcDesc.getChildren()) { + if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) { + if (!isDeterministic((ExprNodeGenericFuncDesc) exprNodeDesc)) { + // some child is not deterministic - return false + return false; + } + } + } + // all children are deterministic - return true + return true; + } + + // function is not deterministic - return false + return false; + } + /** * FieldExprProcessor. * diff --git ql/src/test/queries/clientpositive/ppd_deterministic_expr.q ql/src/test/queries/clientpositive/ppd_deterministic_expr.q new file mode 100644 index 0000000000000000000000000000000000000000..47c88493c623b5aa4eb924efce03de9e1b1173fb --- /dev/null +++ ql/src/test/queries/clientpositive/ppd_deterministic_expr.q @@ -0,0 +1,143 @@ +set hive.auto.convert.join=false; +set hive.optimize.index.filter=true; +set hive.cbo.enable=false; + +CREATE TABLE `testb`( + `cola` string COMMENT '', + `colb` string COMMENT '', + `colc` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) + +STORED AS AVRO; + +CREATE TABLE `testa`( + `col1` string COMMENT '', + `col2` string COMMENT '', + `col3` string COMMENT '', + `col4` string COMMENT '', + `col5` string COMMENT '') +PARTITIONED BY ( + `part1` string, + `part2` string, + `part3` string) +STORED AS AVRO; + +insert into testA partition (part1='US', part2='ABC', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testA partition (part1='UK', part2='DEF', part3='123') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testA partition (part1='US', part2='DEF', part3='200') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testA partition (part1='CA', part2='ABC', part3='300') +values ('12.34', '100', '200', '300', 'abc'), +('12.341', '1001', '2001', '3001', 'abcd'); + +insert into testB partition (part1='CA', part2='ABC', part3='300') +values ('600', '700', 'abc'), ('601', '701', 'abcd'); + +insert into testB partition (part1='CA', part2='ABC', part3='400') +values ( '600', '700', 'abc'), ( '601', '701', 'abcd'); + +insert into testB partition (part1='UK', part2='PQR', part3='500') +values ('600', '700', 'abc'), ('601', '701', 'abcd'); + +insert into testB partition (part1='US', part2='DEF', part3='200') +values ( '600', '700', 'abc'), ('601', '701', 'abcd'); + +insert into testB partition (part1='US', part2='PQR', part3='123') +values ( '600', '700', 'abc'), ('601', '701', 'abcd'); + +-- views with deterministic functions +create view viewDeterministicUDFA partitioned on (vpart1, vpart2, vpart3) as select + cast(col1 as decimal(38,18)) as vcol1, + cast(col2 as decimal(38,18)) as vcol2, + cast(col3 as decimal(38,18)) as vcol3, + cast(col4 as decimal(38,18)) as vcol4, + cast(col5 as char(10)) as vcol5, + cast(part1 as char(2)) as vpart1, + cast(part2 as char(3)) as vpart2, + cast(part3 as char(3)) as vpart3 + from testa +where part1 in ('US', 'CA'); + +create view viewDeterministicUDFB partitioned on (vpart1, vpart2, vpart3) as select + cast(cola as decimal(38,18)) as vcolA, + cast(colb as decimal(38,18)) as vcolB, + cast(colc as char(10)) as vcolC, + cast(part1 as char(2)) as vpart1, + cast(part2 as char(3)) as vpart2, + cast(part3 as char(3)) as vpart3 + from testb +where part1 in ('US', 'CA'); + +-- views without function reference +create view viewNoUDFA partitioned on (part1, part2, part3) as select + cast(col1 as decimal(38,18)) as vcol1, + cast(col2 as decimal(38,18)) as vcol2, + cast(col3 as decimal(38,18)) as vcol3, + cast(col4 as decimal(38,18)) as vcol4, + cast(col5 as char(10)) as vcol5, + part1, + part2, + part3 + from testa +where part1 in ('US', 'CA'); + +create view viewNoUDFB partitioned on (part1, part2, part3) as select + cast(cola as decimal(38,18)) as vcolA, + cast(colb as decimal(38,18)) as vcolB, + cast(colc as char(10)) as vcolC, + part1, + part2, + part3 + from testb +where part1 in ('US', 'CA'); + +-- query referencing deterministic functions +explain +select vcol1, vcol2, vcol3, vcola, vcolb +from viewDeterministicUDFA a inner join viewDeterministicUDFB b +on a.vpart1 = b.vpart1 +and a.vpart2 = b.vpart2 +and a.vpart3 = b.vpart3 +and a.vpart1 = 'US' +and a.vpart2 = 'DEF' +and a.vpart3 = '200'; + +select vcol1, vcol2, vcol3, vcola, vcolb +from viewDeterministicUDFA a inner join viewDeterministicUDFB b +on a.vpart1 = b.vpart1 +and a.vpart2 = b.vpart2 +and a.vpart3 = b.vpart3 +and a.vpart1 = 'US' +and a.vpart2 = 'DEF' +and a.vpart3 = '200'; + +-- query with views referencing no udfs +explain +select vcol1, vcol2, vcol3, vcola, vcolb +from viewNoUDFA a inner join viewNoUDFB b +on a.part1 = b.part1 +and a.part2 = b.part2 +and a.part3 = b.part3 +and a.part1 = 'US' +and a.part2 = 'DEF' +and a.part3 = '200'; + +select vcol1, vcol2, vcol3, vcola, vcolb +from viewNoUDFA a inner join viewNoUDFB b +on a.part1 = b.part1 +and a.part2 = b.part2 +and a.part3 = b.part3 +and a.part1 = 'US' +and a.part2 = 'DEF' +and a.part3 = '200'; diff --git ql/src/test/queries/clientpositive/ppd_udf_col.q ql/src/test/queries/clientpositive/ppd_udf_col.q index 9c7d4fdc20dad955469c3baa688595c2961cad6b..ac2f861f4f3c6401b99d034810c3ed705e47e26b 100644 --- ql/src/test/queries/clientpositive/ppd_udf_col.q +++ ql/src/test/queries/clientpositive/ppd_udf_col.q @@ -48,3 +48,51 @@ EXPLAIN SELECT key,randum123, v10 FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a WHERE a.v10 <= 200; + +set hive.cbo.enable=false; + +EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1; + +EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20; + +EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3; + +EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200; + +set hive.ppd.remove.duplicatefilters=false; + +EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1; + +EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20; + +EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3; + +EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200; diff --git ql/src/test/results/clientpositive/ppd_udf_col.q.out ql/src/test/results/clientpositive/ppd_udf_col.q.out index 97ca3835930e050d517361f634b3f2e4a7d3842f..45aac1b560dc91fbeef21086752de76cc533c170 100644 --- ql/src/test/results/clientpositive/ppd_udf_col.q.out +++ ql/src/test/results/clientpositive/ppd_udf_col.q.out @@ -356,3 +356,406 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 <= 0.1) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 <= 0.1) and (_col2 > 0.1)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double), '4' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value * 10) <= 200.0D) and (key = 100)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double), (value * 10) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 <= 0.1) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +( +SELECT key, randum123 +FROM (SELECT *, cast(rand() as double) AS randum123 FROM src WHERE key = 100) a +WHERE randum123 <=0.1)s WHERE s.randum123>0.1 LIMIT 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 <= 0.1) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 0.1) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key,randum123, h4 +FROM (SELECT *, cast(rand() as double) AS randum123, hex(4) AS h4 FROM src WHERE key = 100) a +WHERE a.h4 <= 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key = 100) and false) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), '4' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key,randum123, v10 +FROM (SELECT *, cast(rand() as double) AS randum123, value*10 AS v10 FROM src WHERE key = 100) a +WHERE a.v10 <= 200 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value * 10) <= 200.0D) and (key = 100)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), rand() (type: double), (value * 10) (type: double) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 <= 200.0D) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +