diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java index d9a1e0a..e5df744 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java @@ -303,7 +303,7 @@ protected boolean checkConvertBucketMapJoin( // The number of files for the table should be same as number of buckets. int bucketCount = p.getBucketCount(); - if (fileNames.size() != bucketCount) { + if (fileNames.size() != 0 && fileNames.size() != bucketCount) { String msg = "The number of buckets for table " + tbl.getTableName() + " partition " + p.getName() + " is " + p.getBucketCount() + ", whereas the number of files is " + fileNames.size(); @@ -333,7 +333,7 @@ protected boolean checkConvertBucketMapJoin( Integer num = new Integer(tbl.getNumBuckets()); // The number of files for the table should be same as number of buckets. - if (fileNames.size() != num) { + if (fileNames.size() != 0 && fileNames.size() != num) { String msg = "The number of buckets for table " + tbl.getTableName() + " is " + tbl.getNumBuckets() + ", whereas the number of files is " + fileNames.size(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java index 7d99c2e..5320143 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java @@ -48,7 +48,7 @@ public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp) throws SemanticException { int bigTablePos = 0; - long maxSize = 0; + long maxSize = -1; int numPartitionsCurrentBigTable = 0; // number of partitions for the chosen big table HiveConf conf = parseCtx.getConf(); @@ -79,7 +79,7 @@ public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp) for (Partition part : partsList.getNotDeniedPartns()) { totalSize += getSize(conf, part); } - averageSize = totalSize/numPartitions; + averageSize = numPartitions == 0 ? 0 : totalSize/numPartitions; } if (averageSize > maxSize) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java index 5c4de0a..b882f87 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java @@ -41,7 +41,7 @@ public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp) throws SemanticException { int bigTablePos = 0; - long maxSize = 0; + long maxSize = -1; HiveConf conf = parseCtx.getConf(); try { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java index 2b3131f..8139f4a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java @@ -466,7 +466,7 @@ private void mergeMapJoinTaskWithMapReduceTask(MapRedTask mapJoinTask, Configura HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); boolean bigTableFound = false; - long largestBigTableCandidateSize = 0; + long largestBigTableCandidateSize = -1; long sumTableSizes = 0; for (String alias : aliasToWork.keySet()) { int tablePosition = getPosition(currWork, joinOp, alias); diff --git ql/src/test/queries/clientpositive/auto_join32.q ql/src/test/queries/clientpositive/auto_join32.q new file mode 100644 index 0000000..289bfbc --- /dev/null +++ ql/src/test/queries/clientpositive/auto_join32.q @@ -0,0 +1,78 @@ +set hive.auto.convert.join=true; + +-- empty tables +create table studenttab10k (name string, age int, gpa double); +create table votertab10k (name string, age int, registration string, contributions float); + +explain select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name; + +select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name; + +set hive.optimize.bucketmapjoin=true; +set hive.optimize.bucketmapjoin.sortedmerge=true; +set hive.auto.convert.sortmerge.join=true; + +-- smb +create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets; +create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets; + +explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name; + +select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name; + +load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb; +load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb; +load data local inpath '../data/files/empty1.txt' into table votertab10k_smb; +load data local inpath '../data/files/empty2.txt' into table votertab10k_smb; + +explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name; + +select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name; + +-- smb + partitions +create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets; +create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets; + +load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo'); +load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo'); +load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo'); +load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo'); + +explain select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name; + +select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name; + +drop table studenttab10k; +drop table votertab10k; +drop table studenttab10k_smb; +drop table votertab10k_smb; +drop table studenttab10k_part; +drop table votertab10k_part; \ No newline at end of file diff --git ql/src/test/results/clientpositive/auto_join32.q.out ql/src/test/results/clientpositive/auto_join32.q.out new file mode 100644 index 0000000..312664a --- /dev/null +++ ql/src/test/results/clientpositive/auto_join32.q.out @@ -0,0 +1,633 @@ +PREHOOK: query: -- empty tables +create table studenttab10k (name string, age int, gpa double) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- empty tables +create table studenttab10k (name string, age int, gpa double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@studenttab10k +PREHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@votertab10k +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k) s) (TOK_TABREF (TOK_TABNAME votertab10k) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name)))) + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-4 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + s + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + s + TableScan + alias: s + HashTable Sink Operator + condition expressions: + 0 {name} + 1 {registration} + handleSkewJoin: false + keys: + 0 [Column[name]] + 1 [Column[name]] + Position of Big Table: 1 + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + v + TableScan + alias: v + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + handleSkewJoin: false + keys: + 0 [Column[name]] + 1 [Column[name]] + outputColumnNames: _col0, _col7 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col7 + type: string + outputColumnNames: _col0, _col7 + Group By Operator + aggregations: + expr: count(DISTINCT _col7) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col7 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k +PREHOOK: Input: default@votertab10k +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k +POSTHOOK: Input: default@votertab10k +#### A masked pattern was here #### +PREHOOK: query: -- smb +create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- smb +create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_smb) s) (TOK_TABREF (TOK_TABNAME votertab10k_smb) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + s + TableScan + alias: s + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + handleSkewJoin: false + keys: + 0 [Column[name]] + 1 [Column[name]] + outputColumnNames: _col0, _col7 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col7 + type: string + outputColumnNames: _col0, _col7 + Group By Operator + aggregations: + expr: count(DISTINCT _col7) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col7 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb +PREHOOK: type: LOAD +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb +POSTHOOK: type: LOAD +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb +PREHOOK: type: LOAD +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb +POSTHOOK: type: LOAD +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_smb +PREHOOK: type: LOAD +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_smb +POSTHOOK: type: LOAD +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_smb +PREHOOK: type: LOAD +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_smb +POSTHOOK: type: LOAD +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_smb) s) (TOK_TABREF (TOK_TABNAME votertab10k_smb) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + s + TableScan + alias: s + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + handleSkewJoin: false + keys: + 0 [Column[name]] + 1 [Column[name]] + outputColumnNames: _col0, _col7 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col7 + type: string + outputColumnNames: _col0, _col7 + Group By Operator + aggregations: + expr: count(DISTINCT _col7) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col7 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +PREHOOK: query: -- smb + partitions +create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- smb + partitions +create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@studenttab10k_part +PREHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@votertab10k_part +PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo') +PREHOOK: type: LOAD +PREHOOK: Output: default@studenttab10k_part +POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@studenttab10k_part +POSTHOOK: Output: default@studenttab10k_part@p=foo +PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo') +PREHOOK: type: LOAD +PREHOOK: Output: default@studenttab10k_part@p=foo +POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@studenttab10k_part@p=foo +PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo') +PREHOOK: type: LOAD +PREHOOK: Output: default@votertab10k_part +POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@votertab10k_part +POSTHOOK: Output: default@votertab10k_part@p=foo +PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo') +PREHOOK: type: LOAD +PREHOOK: Output: default@votertab10k_part@p=foo +POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@votertab10k_part@p=foo +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_part) s) (TOK_TABREF (TOK_TABNAME votertab10k_part) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL s) p) 'bar') (= (. (TOK_TABLE_OR_COL v) p) 'bar'))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + s + TableScan + alias: s + Filter Operator + predicate: + expr: (p = 'bar') + type: boolean + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + handleSkewJoin: false + keys: + 0 [Column[name]] + 1 [Column[name]] + outputColumnNames: _col0, _col8 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col8 + type: string + outputColumnNames: _col0, _col8 + Group By Operator + aggregations: + expr: count(DISTINCT _col8) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col8 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_part +PREHOOK: Input: default@votertab10k_part +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_part +POSTHOOK: Input: default@votertab10k_part +#### A masked pattern was here #### +PREHOOK: query: drop table studenttab10k +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k +PREHOOK: Output: default@studenttab10k +POSTHOOK: query: drop table studenttab10k +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k +POSTHOOK: Output: default@studenttab10k +PREHOOK: query: drop table votertab10k +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k +PREHOOK: Output: default@votertab10k +POSTHOOK: query: drop table votertab10k +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k +POSTHOOK: Output: default@votertab10k +PREHOOK: query: drop table studenttab10k_smb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: drop table studenttab10k_smb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: drop table votertab10k_smb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k_smb +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: drop table votertab10k_smb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k_smb +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: drop table studenttab10k_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k_part +PREHOOK: Output: default@studenttab10k_part +POSTHOOK: query: drop table studenttab10k_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k_part +POSTHOOK: Output: default@studenttab10k_part +PREHOOK: query: drop table votertab10k_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k_part +PREHOOK: Output: default@votertab10k_part +POSTHOOK: query: drop table votertab10k_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k_part +POSTHOOK: Output: default@votertab10k_part