diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index ffd47a2..89f6ee1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -211,11 +211,38 @@ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions(); } } - if (!aggressive && !bypassFilter) { + + boolean onlyPruningFilter = bypassFilter; + Operator op = ts; + while (onlyPruningFilter) { + if (op instanceof FileSinkOperator || op.getChildOperators() == null) { + break; + } else if (op.getChildOperators().size() != 1) { + onlyPruningFilter = false; + break; + } else { + op = op.getChildOperators().get(0); + } + + if (op instanceof FilterOperator) { + ExprNodeDesc predicate = ((FilterOperator) op).getConf().getPredicate(); + if (predicate instanceof ExprNodeConstantDesc + && "boolean".equals(predicate.getTypeInfo().getTypeName())) { + continue; + } else if (PartitionPruner.onlyContainsPartnCols(table, predicate)) { + continue; + } else { + onlyPruningFilter = false; + } + } + } + + if (!aggressive && !onlyPruningFilter) { return null; } + PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts); - FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter); + FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, onlyPruningFilter); return checkOperators(fetch, aggressive, bypassFilter); } diff --git ql/src/test/queries/clientpositive/nonmr_fetch.q ql/src/test/queries/clientpositive/nonmr_fetch.q index 1b5ab56..5b7e61e 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch.q +++ ql/src/test/queries/clientpositive/nonmr_fetch.q @@ -15,6 +15,10 @@ select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select key from src limit 10; select key from src limit 10; +-- negative, filter on partition column and non-partition column +explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10; +select * from srcpart where ds='2008-04-08' AND key > 100 limit 10; + -- negative, filter on non-partition column explain select * from srcpart where key > 100 limit 10; select * from srcpart where key > 100 limit 10; diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index 29ef214..bafc6de 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -441,73 +441,112 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select locid from loc_orc_n4 where locid>0 and year='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n4 + filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: loc_orc_n4 - filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: locid (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: explain select locid,year from loc_orc_n4 where locid>0 and year='2001' PREHOOK: type: QUERY POSTHOOK: query: explain select locid,year from loc_orc_n4 where locid>0 and year='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n4 + filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int), '2001' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: loc_orc_n4 - filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: locid (type: int), '2001' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: explain select * from (select locid,year from loc_orc_n4) test where locid>0 and year='2001' PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select locid,year from loc_orc_n4) test where locid>0 and year='2001' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc_n4 + filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 0) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: locid (type: int), '2001' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: loc_orc_n4 - filterExpr: ((locid > 0) and (year = '2001')) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: locid (type: int), '2001' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink diff --git ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out index d12b5f6..2e7d796 100644 --- ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out +++ ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out @@ -746,25 +746,38 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = c_int) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = c_int) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = c_int) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) @@ -773,25 +786,38 @@ POSTHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = (2 * c_int)) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = (2 * c_int)) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is 0 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) @@ -800,25 +826,38 @@ POSTHOOK: query: -- c_int is 0 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = c_int) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = c_int) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = c_int) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) @@ -827,25 +866,38 @@ POSTHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = null) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = null) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = null) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = null) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- rewrite to NULL EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) @@ -854,25 +906,37 @@ POSTHOOK: query: -- rewrite to NULL EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- no rewrite EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) @@ -881,25 +945,37 @@ POSTHOOK: query: -- no rewrite EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: key, value, c_int, c_float, c_boolean, dt + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int) IN (c_int, 0) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int) IN (c_int, 0) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- no rewrite diff --git ql/src/test/results/clientpositive/cbo_simple_select.q.out ql/src/test/results/clientpositive/cbo_simple_select.q.out index 588d924..33f0e71 100644 --- ql/src/test/results/clientpositive/cbo_simple_select.q.out +++ ql/src/test/results/clientpositive/cbo_simple_select.q.out @@ -746,25 +746,38 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = c_int) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = c_int) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = c_int) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) @@ -773,25 +786,38 @@ POSTHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (2*cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = (2 * c_int)) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = (2 * c_int)) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is 0 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) @@ -800,25 +826,38 @@ POSTHOOK: query: -- c_int is 0 EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) = (cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = c_int) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = c_int) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = c_int) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = c_int) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) @@ -827,25 +866,38 @@ POSTHOOK: query: -- c_int is not null EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (NULL) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int = null) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int = null) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int = null) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int = null) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), null (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3624 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- rewrite to NULL EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) @@ -854,25 +906,37 @@ POSTHOOK: query: -- rewrite to NULL EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- no rewrite EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) @@ -881,25 +945,37 @@ POSTHOOK: query: -- no rewrite EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 0) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t2 + filterExpr: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (c_int) IN (c_int, 0) (type: boolean) + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: cbo_t2 - filterExpr: (c_int) IN (c_int, 0) (type: boolean) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (c_int) IN (c_int, 0) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE - ListSink + ListSink PREHOOK: query: -- no rewrite diff --git ql/src/test/results/clientpositive/filter_in_or_dup.q.out ql/src/test/results/clientpositive/filter_in_or_dup.q.out index b821717..b50027d 100644 --- ql/src/test/results/clientpositive/filter_in_or_dup.q.out +++ ql/src/test/results/clientpositive/filter_in_or_dup.q.out @@ -11,25 +11,38 @@ WHERE (f.key = '1' OR f.key='2') AND f.key IN ('1', '2') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + filterExpr: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: f - filterExpr: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT f.key @@ -44,25 +57,38 @@ WHERE (f.key = '1' OR f.key = '2') AND f.key IN ('1', '2', '3') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + filterExpr: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: f - filterExpr: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: EXPLAIN SELECT f.key @@ -77,23 +103,36 @@ WHERE (f.key = '1' OR f.key='2' OR f.key='3') AND f.key IN ('1', '2') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + filterExpr: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: f - filterExpr: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key) IN ('1', '2') (type: boolean) - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink diff --git ql/src/test/results/clientpositive/input42.q.out ql/src/test/results/clientpositive/input42.q.out index df98800..98c2fd0 100644 --- ql/src/test/results/clientpositive/input42.q.out +++ ql/src/test/results/clientpositive/input42.q.out @@ -1143,14 +1143,55 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `key` < 200 STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) < 200.0D)) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (UDFToDouble(key) < 200.0D) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1197,7 +1238,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### Partition + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1244,21 +1287,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: a - filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) < 200.0D)) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (UDFToDouble(key) < 200.0D) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and key < 200 PREHOOK: type: QUERY @@ -1660,14 +1697,54 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND RAND(100) < 0.1 STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (rand(100) < 0.1D) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1714,7 +1791,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### Partition + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1761,20 +1840,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (rand(100) < 0.1D) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from srcpart a where a.ds='2008-04-08' and rand(100) < 0.1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/input_part9.q.out ql/src/test/results/clientpositive/input_part9.q.out index 9440167..5ecaeb1 100644 --- ql/src/test/results/clientpositive/input_part9.q.out +++ ql/src/test/results/clientpositive/input_part9.q.out @@ -8,14 +8,55 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + filterExpr: ((ds = '2008-04-08') and key is not null) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -62,7 +103,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart +#### A masked pattern was here #### Partition + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -109,21 +152,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [x] + /srcpart/ds=2008-04-08/hr=12 [x] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: x - filterExpr: ((ds = '2008-04-08') and key is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT x.* FROM SRCPART x WHERE key IS NOT NULL AND ds = '2008-04-08' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_1.q.out ql/src/test/results/clientpositive/list_bucket_dml_1.q.out index d13edd6..226e778 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_1.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_1.q.out @@ -409,14 +409,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value` FROM `default`.`list_bucketing_dynamic_part_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_dynamic_part_n0 + filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '484') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: key=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -463,21 +504,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_dynamic_part_n0 name: default.list_bucketing_dynamic_part_n0 + Truncated Path -> Alias: + /list_bucketing_dynamic_part_n0/ds=2008-04-08/hr=11/key=484 [list_bucketing_dynamic_part_n0] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_dynamic_part_n0 - filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '484') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select key, value from list_bucketing_dynamic_part_n0 where ds='2008-04-08' and hr='11' and key = "484" PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_11.q.out ql/src/test/results/clientpositive/list_bucket_dml_11.q.out index 44b712b..11f7dc1 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_11.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_11.q.out @@ -290,14 +290,55 @@ OPTIMIZED SQL: SELECT `key`, CAST('val_466' AS STRING) AS `value` FROM `default`.`list_bucketing_static_part_n3` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `value` = 'val_466' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_static_part_n3 + filterExpr: ((ds = '2008-04-08') and (hr = '11') and (value = 'val_466')) (type: boolean) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 'val_466') (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), 'val_466' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: value=val_466 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -344,21 +385,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part_n3 name: default.list_bucketing_static_part_n3 + Truncated Path -> Alias: + /list_bucketing_static_part_n3/ds=2008-04-08/hr=11/value=val_466 [list_bucketing_static_part_n3] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_static_part_n3 - filterExpr: ((ds = '2008-04-08') and (hr = '11') and (value = 'val_466')) (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 'val_466') (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), 'val_466' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select key, value from list_bucketing_static_part_n3 where ds='2008-04-08' and hr='11' and value = "val_466" PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_12.q.out ql/src/test/results/clientpositive/list_bucket_dml_12.q.out index f5e643e..f1c20e5 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_12.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -298,14 +298,55 @@ OPTIMIZED SQL: SELECT `col1`, CAST('466' AS STRING) AS `col2`, `col3`, CAST('val FROM `default`.`list_bucketing_mul_col_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `col2` = '466' AND `col4` = 'val_466' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_mul_col_n0 + filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:string:string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: col4=val_466 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -352,21 +393,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col_n0 name: default.list_bucketing_mul_col_n0 + Truncated Path -> Alias: + /list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/col2=466/col4=val_466 [list_bucketing_mul_col_n0] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_mul_col_n0 - filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_mul_col_n0 where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" @@ -395,14 +429,55 @@ OPTIMIZED SQL: SELECT `col1`, CAST('382' AS STRING) AS `col2`, `col3`, CAST('val FROM `default`.`list_bucketing_mul_col_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `col2` = '382' AND `col4` = 'val_382' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_mul_col_n0 + filterExpr: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:string:string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -449,21 +524,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col_n0 name: default.list_bucketing_mul_col_n0 + Truncated Path -> Alias: + /list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [list_bucketing_mul_col_n0] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_mul_col_n0 - filterExpr: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_mul_col_n0 where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" diff --git ql/src/test/results/clientpositive/list_bucket_dml_13.q.out ql/src/test/results/clientpositive/list_bucket_dml_13.q.out index a43a0b7..b9f658a 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_13.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_13.q.out @@ -298,14 +298,55 @@ OPTIMIZED SQL: SELECT `col1`, CAST('466' AS STRING) AS `col2`, `col3`, CAST('val FROM `default`.`list_bucketing_mul_col` WHERE `ds` = '2008-04-08' AND `hr` = '2013-01-23+18:00:99' AND `col2` = '466' AND `col4` = 'val_466' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_mul_col + filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:string:string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: col4=val_466 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -352,21 +393,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_mul_col name: default.list_bucketing_mul_col + Truncated Path -> Alias: + /list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466 [list_bucketing_mul_col] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_mul_col - filterExpr: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_mul_col where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" diff --git ql/src/test/results/clientpositive/list_bucket_dml_2.q.out ql/src/test/results/clientpositive/list_bucket_dml_2.q.out index 5750fdd..bd8e215 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_2.q.out @@ -365,14 +365,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) FROM `default`.`list_bucketing_static_part_n4` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_static_part_n4 + filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: value=val_484 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -419,21 +460,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part_n4 name: default.list_bucketing_static_part_n4 + Truncated Path -> Alias: + /list_bucketing_static_part_n4/ds=2008-04-08/hr=11/key=484/value=val_484 [list_bucketing_static_part_n4] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_static_part_n4 - filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_static_part_n4 where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_3.q.out ql/src/test/results/clientpositive/list_bucket_dml_3.q.out index affbdf5..5ba8948 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_3.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_3.q.out @@ -357,14 +357,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value` FROM `default`.`list_bucketing_static_part_n1` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_static_part_n1 + filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '484') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: key=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -411,21 +452,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_static_part_n1 name: default.list_bucketing_static_part_n1 + Truncated Path -> Alias: + /list_bucketing_static_part_n1/ds=2008-04-08/hr=11/key=484 [list_bucketing_static_part_n1] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_static_part_n1 - filterExpr: ((ds = '2008-04-08') and (hr = '11') and (key = '484')) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '484') (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select key, value from list_bucketing_static_part_n1 where ds='2008-04-08' and hr='11' and key = "484" PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_4.q.out ql/src/test/results/clientpositive/list_bucket_dml_4.q.out index 4ddd112..520d48e 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_4.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -808,14 +808,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) FROM `default`.`list_bucketing_static_part_n2` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_static_part_n2 + filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: value=val_484 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -862,21 +903,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part_n2 name: default.list_bucketing_static_part_n2 + Truncated Path -> Alias: + /list_bucketing_static_part_n2/ds=2008-04-08/hr=11/key=484/value=val_484 [list_bucketing_static_part_n2] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_static_part_n2 - filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_static_part_n2 where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_5.q.out ql/src/test/results/clientpositive/list_bucket_dml_5.q.out index 06e2a45..47c9e24 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_5.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_5.q.out @@ -414,14 +414,55 @@ OPTIMIZED SQL: SELECT CAST('103' AS STRING) AS `key`, CAST('val_103' AS STRING) FROM `default`.`list_bucketing_dynamic_part_n1` WHERE `ds` = '2008-04-08' AND `key` = '103' AND `value` = 'val_103' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_dynamic_part_n1 + filterExpr: ((key = '103') and (value = 'val_103')) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '103') and (value = 'val_103')) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '103' (type: string), 'val_103' (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: value=val_103 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -468,7 +509,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_dynamic_part_n1 name: default.list_bucketing_dynamic_part_n1 +#### A masked pattern was here #### Partition + base file name: value=val_103 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -515,21 +558,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.list_bucketing_dynamic_part_n1 name: default.list_bucketing_dynamic_part_n1 + Truncated Path -> Alias: + /list_bucketing_dynamic_part_n1/ds=2008-04-08/hr=11/key=103/value=val_103 [list_bucketing_dynamic_part_n1] + /list_bucketing_dynamic_part_n1/ds=2008-04-08/hr=12/key=103/value=val_103 [list_bucketing_dynamic_part_n1] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_dynamic_part_n1 - filterExpr: ((key = '103') and (value = 'val_103')) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '103') and (value = 'val_103')) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '103' (type: string), 'val_103' (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select key, value, ds, hr from list_bucketing_dynamic_part_n1 where ds='2008-04-08' and key = "103" and value ="val_103" PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_6.q.out ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index a3089a7..aad1458 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -904,14 +904,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) FROM `default`.`list_bucketing_dynamic_part_n3` WHERE `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_dynamic_part_n3 + filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=a1 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -958,7 +999,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part_n3 name: default.list_bucketing_dynamic_part_n3 +#### A masked pattern was here #### Partition + base file name: value=val_484 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -1005,21 +1048,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part_n3 name: default.list_bucketing_dynamic_part_n3 + Truncated Path -> Alias: + /list_bucketing_dynamic_part_n3/ds=2008-04-08/hr=a1 [list_bucketing_dynamic_part_n3] + /list_bucketing_dynamic_part_n3/ds=2008-04-08/hr=b1/key=484/value=val_484 [list_bucketing_dynamic_part_n3] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_dynamic_part_n3 - filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_dynamic_part_n3 where key = '484' and value = 'val_484' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_7.q.out ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index faef7a9..7232e79 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -904,14 +904,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) FROM `default`.`list_bucketing_dynamic_part` WHERE `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_dynamic_part + filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=a1 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -958,7 +999,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part name: default.list_bucketing_dynamic_part +#### A masked pattern was here #### Partition + base file name: hr=b1 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -1005,21 +1048,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part name: default.list_bucketing_dynamic_part + Truncated Path -> Alias: + /list_bucketing_dynamic_part/ds=2008-04-08/hr=a1 [list_bucketing_dynamic_part] + /list_bucketing_dynamic_part/ds=2008-04-08/hr=b1 [list_bucketing_dynamic_part] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_dynamic_part - filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_8.q.out ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index eac6407..337f9dc 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -464,14 +464,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) FROM `default`.`list_bucketing_dynamic_part_n2` WHERE `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_dynamic_part_n2 + filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=a1 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -518,7 +559,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part_n2 name: default.list_bucketing_dynamic_part_n2 +#### A masked pattern was here #### Partition + base file name: hr=b1 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -564,21 +607,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_dynamic_part_n2 name: default.list_bucketing_dynamic_part_n2 + Truncated Path -> Alias: + /list_bucketing_dynamic_part_n2/ds=2008-04-08/hr=a1 [list_bucketing_dynamic_part_n2] + /list_bucketing_dynamic_part_n2/ds=2008-04-08/hr=b1 [list_bucketing_dynamic_part_n2] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_dynamic_part_n2 - filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_dynamic_part_n2 where key = '484' and value = 'val_484' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_dml_9.q.out ql/src/test/results/clientpositive/list_bucket_dml_9.q.out index 1e1fc13..fbd4fde 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_9.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -808,14 +808,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) FROM `default`.`list_bucketing_static_part_n0` WHERE `ds` = '2008-04-08' AND `hr` = '11' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: list_bucketing_static_part_n0 + filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: key=484 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -862,21 +903,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.list_bucketing_static_part_n0 name: default.list_bucketing_static_part_n0 + Truncated Path -> Alias: + /list_bucketing_static_part_n0/ds=2008-04-08/hr=11/key=484 [list_bucketing_static_part_n0] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: list_bucketing_static_part_n0 - filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from list_bucketing_static_part_n0 where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out index a09e007..e324cab 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out @@ -84,14 +84,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key` FROM `default`.`fact_daily` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily + filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: value=val_484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -138,21 +179,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily + Truncated Path -> Alias: + /fact_daily/ds=1/hr=4/key=484/value=val_484 [fact_daily] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily - filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484') PREHOOK: type: QUERY @@ -173,14 +207,55 @@ OPTIMIZED SQL: SELECT CAST('238' AS STRING) AS `key`, CAST('val_238' AS STRING) FROM `default`.`fact_daily` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '238' AND `value` = 'val_238' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily + filterExpr: ((key = '238') and (value = 'val_238')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '238') and (value = 'val_238')) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '238' (type: string), 'val_238' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: value=val_238 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -227,21 +302,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily + Truncated Path -> Alias: + /fact_daily/ds=1/hr=4/key=238/value=val_238 [fact_daily] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily - filterExpr: ((key = '238') and (value = 'val_238')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '238') and (value = 'val_238')) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '238' (type: string), 'val_238' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238') PREHOOK: type: QUERY @@ -263,14 +331,55 @@ OPTIMIZED SQL: SELECT `key` FROM `default`.`fact_daily` WHERE `ds` = '1' AND `hr` = '4' AND `value` = '3' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily + filterExpr: ((ds = '1') and (hr = '4') and (value = '3')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = '3') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -317,21 +426,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily + Truncated Path -> Alias: + /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily - filterExpr: ((ds = '1') and (hr = '4') and (value = '3')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = '3') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (value = "3") PREHOOK: type: QUERY @@ -351,14 +453,55 @@ OPTIMIZED SQL: SELECT CAST('495' AS STRING) AS `key`, `value` FROM `default`.`fact_daily` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '495' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily + filterExpr: ((ds = '1') and (hr = '4') and (key = '495')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '495') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '495' (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -405,21 +548,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily + Truncated Path -> Alias: + /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily - filterExpr: ((ds = '1') and (hr = '4') and (key = '495')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '495') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '495' (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and key = '369' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 6217adb..98ad365 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -84,14 +84,104 @@ OPTIMIZED SQL: SELECT `key`, CAST('val_484' AS STRING) AS `value` FROM `default`.`fact_daily_n2` WHERE `ds` = '1' AND `hr` = '4' AND `value` = 'val_484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily_n2 + filterExpr: ((ds = '1') and (hr = '4') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 'val_484') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), 'val_484' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 4 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.fact_daily_n2 + numFiles 3 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct fact_daily_n2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.fact_daily_n2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct fact_daily_n2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.fact_daily_n2 + name: default.fact_daily_n2 +#### A masked pattern was here #### Partition + base file name: value=val_484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -138,21 +228,15 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily_n2 name: default.fact_daily_n2 + Truncated Path -> Alias: + /fact_daily_n2/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily_n2] + /fact_daily_n2/ds=1/hr=4/key=484/value=val_484 [fact_daily_n2] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily_n2 - filterExpr: ((ds = '1') and (hr = '4') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 'val_484') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), 'val_484' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT key, value FROM fact_daily_n2 WHERE ds='1' and hr='4' and value= 'val_484' PREHOOK: type: QUERY @@ -173,14 +257,55 @@ OPTIMIZED SQL: SELECT CAST('406' AS STRING) AS `key` FROM `default`.`fact_daily_n2` WHERE `ds` = '1' AND `hr` = '4' AND `key` = '406' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily_n2 + filterExpr: ((ds = '1') and (hr = '4') and (key = '406')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '406') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '406' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -227,21 +352,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily_n2 name: default.fact_daily_n2 + Truncated Path -> Alias: + /fact_daily_n2/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily_n2] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily_n2 - filterExpr: ((ds = '1') and (hr = '4') and (key = '406')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '406') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '406' (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT key, value FROM fact_daily_n2 WHERE ds='1' and hr='4' and key= '406' PREHOOK: type: QUERY @@ -265,14 +383,55 @@ OPTIMIZED SQL: SELECT `key`, `value` FROM `default`.`fact_daily_n2` WHERE `ds` = '1' AND `hr` = '4' AND (`key` = '484' AND `value` = 'val_484' OR `key` = '238' AND `value` = 'val_238') STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily_n2 + filterExpr: ((ds = '1') and (hr = '4') and (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238')))) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((key = '238') and (value = 'val_238')) or ((key = '484') and (value = 'val_484'))) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: value=val_238 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -319,21 +478,64 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily_n2 name: default.fact_daily_n2 +#### A masked pattern was here #### + Partition + base file name: value=val_484 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 4 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.fact_daily_n2 + numFiles 3 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct fact_daily_n2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.fact_daily_n2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct fact_daily_n2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.fact_daily_n2 + name: default.fact_daily_n2 + Truncated Path -> Alias: + /fact_daily_n2/ds=1/hr=4/key=238/value=val_238 [fact_daily_n2] + /fact_daily_n2/ds=1/hr=4/key=484/value=val_484 [fact_daily_n2] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily_n2 - filterExpr: ((ds = '1') and (hr = '4') and (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238')))) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (((key = '238') and (value = 'val_238')) or ((key = '484') and (value = 'val_484'))) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT key, value FROM fact_daily_n2 WHERE ds='1' and hr='4' and ( (key='484' and value ='val_484') or (key='238' and value= 'val_238')) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out index b256f4f..889f23c 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out @@ -194,14 +194,55 @@ OPTIMIZED SQL: SELECT CAST('145' AS STRING) AS `key`, `value`, CAST('1' AS STRIN FROM `default`.`fact_daily_n3` WHERE `ds` = '1' AND `hr` = '1' AND `key` = '145' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily_n3 + filterExpr: ((ds = '1') and (hr = '1') and (key = '145')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '145') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '145' (type: string), value (type: string), '1' (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -248,21 +289,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily_n3 name: default.fact_daily_n3 + Truncated Path -> Alias: + /fact_daily_n3/ds=1/hr=1 [fact_daily_n3] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily_n3 - filterExpr: ((ds = '1') and (hr = '1') and (key = '145')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '145') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '145' (type: string), value (type: string), '1' (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from fact_daily_n3 where ds = '1' and hr='1' and key='145' PREHOOK: type: QUERY @@ -313,14 +347,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, CAST('val_484' AS STRING) FROM `default`.`fact_daily_n3` WHERE `ds` = '1' AND `hr` = '2' AND `key` = '484' AND `value` = 'val_484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily_n3 + filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '1' (type: string), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: value=val_484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -367,21 +442,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily_n3 name: default.fact_daily_n3 + Truncated Path -> Alias: + /fact_daily_n3/ds=1/hr=2/key=484/value=val_484 [fact_daily_n3] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily_n3 - filterExpr: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '1' (type: string), '2' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='2' and (key='484' and value='val_484') PREHOOK: type: QUERY @@ -404,14 +472,55 @@ OPTIMIZED SQL: SELECT CAST('327' AS STRING) AS `key`, CAST('val_327' AS STRING) FROM `default`.`fact_daily_n3` WHERE `ds` = '1' AND `hr` = '3' AND `key` = '327' AND `value` = 'val_327' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: fact_daily_n3 + filterExpr: ((key = '327') and (value = 'val_327')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '327') and (value = 'val_327')) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '327' (type: string), 'val_327' (type: string), '1' (type: string), '3' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: value=val_327 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -458,21 +567,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily_n3 name: default.fact_daily_n3 + Truncated Path -> Alias: + /fact_daily_n3/ds=1/hr=3/key=327/value=val_327 [fact_daily_n3] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: fact_daily_n3 - filterExpr: ((key = '327') and (value = 'val_327')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '327') and (value = 'val_327')) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '327' (type: string), 'val_327' (type: string), '1' (type: string), '3' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='3' and (key='327' and value='val_327') PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out index cd8d78b..366dc1e 100644 --- ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -127,33 +127,109 @@ POSTHOOK: Input: default@src 278 98 484 +PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + filterExpr: ((ds = '2008-04-08') and (UDFToDouble(key) > 100.0D)) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select * from srcpart where ds='2008-04-08' AND key > 100 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds='2008-04-08' AND key > 100 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +238 val_238 2008-04-08 11 +311 val_311 2008-04-08 11 +165 val_165 2008-04-08 11 +409 val_409 2008-04-08 11 +255 val_255 2008-04-08 11 +278 val_278 2008-04-08 11 +484 val_484 2008-04-08 11 +265 val_265 2008-04-08 11 +193 val_193 2008-04-08 11 +401 val_401 2008-04-08 11 PREHOOK: query: explain select * from srcpart where key > 100 limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain select * from srcpart where key > 100 limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 100.0D) (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: srcpart - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from srcpart where key > 100 limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/ppr_pushdown3.q.out ql/src/test/results/clientpositive/ppr_pushdown3.q.out index 56e90d7..f802ff7 100644 --- ql/src/test/results/clientpositive/ppr_pushdown3.q.out +++ ql/src/test/results/clientpositive/ppr_pushdown3.q.out @@ -3,25 +3,38 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from srcpart where key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) < 10.0D) (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: srcpart - filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from srcpart where key < 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/rand_partitionpruner3.q.out ql/src/test/results/clientpositive/rand_partitionpruner3.q.out index 232d188..3f6efd9 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner3.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner3.q.out @@ -6,14 +6,54 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE RAND(1) < 0.1 AND `ds` = '2008-04-08' AND `key` <= 50 AND `key` >= 10 AND `hr` LIKE '%2' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D) and (rand(1) < 0.1D)) (type: boolean) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -60,20 +100,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=12 [a] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D) and (rand(1) < 0.1D)) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' PREHOOK: type: QUERY @@ -98,14 +132,55 @@ OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` FROM `default`.`srcpart` WHERE `ds` = '2008-04-08' AND `key` <= 50 AND `key` >= 10 AND `hr` LIKE '%2' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -152,21 +227,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=12 [a] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: a - filterExpr: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out index 27d21f3..60e5cd8 100644 --- ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out +++ ql/src/test/results/clientpositive/truncate_column_list_bucket.q.out @@ -57,14 +57,55 @@ OPTIMIZED SQL: SELECT CAST('484' AS STRING) AS `key`, `value`, CAST('1' AS STRIN FROM `default`.`test_tab_n3` WHERE `part` = '1' AND `key` = '484' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_tab_n3 + filterExpr: ((part = '1') and (key = '484')) (type: boolean) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '484') (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), value (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: key=484 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -109,21 +150,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_tab_n3 name: default.test_tab_n3 + Truncated Path -> Alias: + /test_tab_n3/part=1/key=484 [test_tab_n3] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: test_tab_n3 - filterExpr: ((part = '1') and (key = '484')) (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '484') (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), value (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT * FROM test_tab_n3 WHERE part = '1' AND key = '484' PREHOOK: type: QUERY @@ -144,14 +178,55 @@ OPTIMIZED SQL: SELECT CAST('0' AS STRING) AS `key`, `value`, CAST('1' AS STRING) FROM `default`.`test_tab_n3` WHERE `part` = '1' AND `key` = '0' STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_tab_n3 + filterExpr: ((part = '1') and (key = '0')) (type: boolean) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = '0') (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '0' (type: string), value (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: @@ -196,21 +271,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_tab_n3 name: default.test_tab_n3 + Truncated Path -> Alias: + /test_tab_n3/part=1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [test_tab_n3] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: test_tab_n3 - filterExpr: ((part = '1') and (key = '0')) (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key = '0') (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '0' (type: string), value (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: SELECT * FROM test_tab_n3 WHERE part = '1' AND key = '0' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/union_view.q.out ql/src/test/results/clientpositive/union_view.q.out index ba98ef0..8d844b8 100644 --- ql/src/test/results/clientpositive/union_view.q.out +++ ql/src/test/results/clientpositive/union_view.q.out @@ -23,67 +23,106 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@src_union_3_n0 STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_union_1_n0 + filterExpr: ((key = 86) and (ds = '1')) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 86 (type: int), value (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src_union_1_n0 - filterExpr: ((key = 86) and (ds = '1')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 86 (type: int), value (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_union_2_n0 + filterExpr: ((key = 86) and (ds = '2')) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 86 (type: int), value (type: string), '2' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src_union_2_n0 - filterExpr: ((key = 86) and (ds = '2')) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 86 (type: int), value (type: string), '2' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src_union_3_n0 + filterExpr: ((key = 86) and (ds = '3')) (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 86) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 86 (type: int), value (type: string), '3' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src_union_3_n0 - filterExpr: ((key = 86) and (ds = '3')) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = 86) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 86 (type: int), value (type: string), '3' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink 86 val_86 1 86 val_86 2