diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 5d4774db1f..4019f132d3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; @@ -572,7 +573,14 @@ private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcCont int numBuckets = bigTableRS.getParentOperators().get(0).getOpTraits().getNumBuckets(); int size = -1; + boolean shouldCheckExternalTables = + context.conf.getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS); + StringBuilder sb = new StringBuilder(); for (Operator parentOp : joinOp.getParentOperators()) { + if (shouldCheckExternalTables && hasExternalTableAncestor(parentOp, sb)) { + LOG.debug("External table {} found in join - disabling SMB join.", sb.toString()); + return false; + } // each side better have 0 or more RS. if either side is unbalanced, cannot convert. // This is a workaround for now. Right fix would be to refactor code in the // MapRecordProcessor and ReduceRecordProcessor with respect to the sources. @@ -690,6 +698,18 @@ private boolean checkConvertJoinBucketMapJoin(JoinOperator joinOp, return false; } + boolean shouldCheckExternalTables = tezBucketJoinProcCtx.getConf() + .getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS); + if (shouldCheckExternalTables) { + StringBuilder sb = new StringBuilder(); + for (Operator parentOp : joinOp.getParentOperators()) { + if (hasExternalTableAncestor(parentOp, sb)) { + LOG.debug("External table {} found in join - disabling bucket map join.", sb.toString()); + return false; + } + } + } + /* * this is the case when the big table is a sub-query and is probably already bucketed by the * join column in say a group by operation @@ -1357,4 +1377,17 @@ private static long estimateNDV(long numRows, List columnStats) { // to go a few % over. return Math.min(Math.round(v), numRows); } + + private static boolean hasExternalTableAncestor(Operator op, StringBuilder sb) { + boolean result = false; + Operator ancestor = OperatorUtils.findSingleOperatorUpstream(op, TableScanOperator.class); + if (ancestor != null) { + TableScanOperator ts = (TableScanOperator) ancestor; + if (MetaStoreUtils.isExternalTable(ts.getConf().getTableMetadata().getTTable())) { + sb.append(ts.getConf().getTableMetadata().getFullyQualifiedName()); + return true; + } + } + return result; + } } diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q index 1361e32c1a..0ee49fca0b 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q @@ -119,4 +119,22 @@ insert into small values (1),(2),(3),(4),(5),(6); insert into big partition(k=1) values(1),(3),(5),(7),(9); insert into big partition(k=2) values(0),(2),(4),(6),(8); explain select small.i, big.i from small,big where small.i=big.i; -select small.i, big.i from small,big where small.i=big.i order by small.i, big.i; \ No newline at end of file +select small.i, big.i from small,big where small.i=big.i order by small.i, big.i; + +-- Bucket map join disabled for external tables +-- Create external table equivalent of tab_part +CREATE EXTERNAL TABLE tab_part_ext (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab_part_ext partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; +analyze table tab_part_ext compute statistics for columns; + +set hive.auto.convert.join.noconditionaltask.size=1500; +set hive.convert.join.bucket.mapjoin.tez = true; +set hive.disable.unsafe.external.table.operations=true; +set test.comment=Bucket map join should work here; +set test.comment; +explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; + +set test.comment=External tables, bucket map join should be disabled; +set test.comment; +explain select a.key, b.key from tab_part_ext a join tab_part_ext c on a.key = c.key join tab_part_ext b on a.value = b.value; diff --git a/ql/src/test/queries/clientpositive/tez_smb_1.q b/ql/src/test/queries/clientpositive/tez_smb_1.q index 0f8f22fef9..e121d52fd0 100644 --- a/ql/src/test/queries/clientpositive/tez_smb_1.q +++ b/ql/src/test/queries/clientpositive/tez_smb_1.q @@ -108,3 +108,16 @@ join (select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2 where vt1.id=vt2.id; +-- SMB disabled for external tables +set hive.disable.unsafe.external.table.operations=true; +CREATE EXTERNAL TABLE tab_ext(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab_ext partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +set hive.convert.join.bucket.mapjoin.tez = true; +set hive.auto.convert.sortmerge.join = true; +set hive.auto.convert.join.noconditionaltask.size=500; +set test.comment=SMB disabled for external tables; +set test.comment; +explain +select count(*) from tab_ext s1 join tab_ext s3 on s1.key=s3.key; diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index 5aa247840a..d9aa25eb77 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -1843,3 +1843,271 @@ POSTHOOK: Input: default@small 4 4 5 5 6 6 +PREHOOK: query: CREATE EXTERNAL TABLE tab_part_ext (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part_ext +POSTHOOK: query: CREATE EXTERNAL TABLE tab_part_ext (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part_ext +PREHOOK: query: insert overwrite table tab_part_ext partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part_ext@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab_part_ext partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part_ext@ds=2008-04-08 +POSTHOOK: Lineage: tab_part_ext PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part_ext PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: analyze table tab_part_ext compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@tab_part_ext +PREHOOK: Input: default@tab_part_ext@ds=2008-04-08 +PREHOOK: Output: default@tab_part_ext +PREHOOK: Output: default@tab_part_ext@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table tab_part_ext compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab_part_ext +POSTHOOK: Input: default@tab_part_ext@ds=2008-04-08 +POSTHOOK: Output: default@tab_part_ext +POSTHOOK: Output: default@tab_part_ext@ds=2008-04-08 +#### A masked pattern was here #### +test.comment=Bucket map join should work here +PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +test.comment=External tables, bucket map join should be disabled +PREHOOK: query: explain select a.key, b.key from tab_part_ext a join tab_part_ext c on a.key = c.key join tab_part_ext b on a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key, b.key from tab_part_ext a join tab_part_ext c on a.key = c.key join tab_part_ext b on a.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out index 4e625e4c9e..c5b765a053 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out @@ -130,40 +130,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s3 - Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 253 Data size: 3647 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 382 Data size: 3056 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -173,10 +173,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -222,37 +222,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vt1 - Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -264,15 +264,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -281,10 +281,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -295,12 +295,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -367,37 +367,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: vt1 - Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -406,12 +406,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -421,15 +421,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -438,10 +438,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -513,36 +513,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 4 @@ -550,28 +550,28 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -580,10 +580,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -659,36 +659,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -697,12 +697,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -712,15 +712,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -729,10 +729,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -743,12 +743,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -783,3 +783,124 @@ POSTHOOK: Input: default@tab_part POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### 480 +PREHOOK: query: CREATE EXTERNAL TABLE tab_ext(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_ext +POSTHOOK: query: CREATE EXTERNAL TABLE tab_ext(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_ext +PREHOOK: query: insert overwrite table tab_ext partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab_ext@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab_ext partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab_ext@ds=2008-04-08 +POSTHOOK: Lineage: tab_ext PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_ext PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +test.comment=SMB disabled for external tables +PREHOOK: query: explain +select count(*) from tab_ext s1 join tab_ext s3 on s1.key=s3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from tab_ext s1 join tab_ext s3 on s1.key=s3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s3 + Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 253 Data size: 3647 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +