Index: conf/hive-default.xml.template
===================================================================
--- conf/hive-default.xml.template (revision 1462986)
+++ conf/hive-default.xml.template (working copy)
@@ -933,6 +933,16 @@
+ hive.optimize.bucketingsorting
+ true
+ If hive.enforce.bucketing or hive.enforce.sorting is true, dont create a reducer for enforcing
+ bucketing/sorting for queries of the form:
+ insert overwrite table T2 select * from T1;
+ where T1 and T2 are bucketed/sorted by the same keys into the same number of buckets.
+
+
+
+
hive.enforce.sortmergebucketmapjoin
false
If the user asked for sort-merge bucketed map-side join, and it cannot be performed,
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1462986)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -513,6 +513,7 @@
HIVEENFORCEBUCKETING("hive.enforce.bucketing", false),
HIVEENFORCESORTING("hive.enforce.sorting", false),
+ HIVEOPTIMIZEBUCKETINGSORTING("hive.optimize.bucketingsorting", true),
HIVEPARTITIONER("hive.mapred.partitioner", "org.apache.hadoop.hive.ql.io.DefaultHivePartitioner"),
HIVEENFORCESORTMERGEBUCKETMAPJOIN("hive.enforce.sortmergebucketmapjoin", false),
HIVEENFORCEBUCKETMAPJOIN("hive.enforce.bucketmapjoin", false),
Index: common/src/java/org/apache/hadoop/hive/common/ObjectPair.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/common/ObjectPair.java (revision 1462986)
+++ common/src/java/org/apache/hadoop/hive/common/ObjectPair.java (working copy)
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.common;
+
+
public class ObjectPair {
private F first;
private S second;
@@ -44,4 +46,24 @@
public void setSecond(S second) {
this.second = second;
}
+
+ @Override
+ public boolean equals(Object that) {
+ if (that == null) {
+ return false;
+ }
+ if (that instanceof ObjectPair) {
+ return this.equals((ObjectPair)that);
+ }
+ return false;
+ }
+
+ public boolean equals(ObjectPair that) {
+ if (that == null) {
+ return false;
+ }
+
+ return this.getFirst().equals(that.getFirst()) &&
+ this.getSecond().equals(that.getSecond());
+ }
}
Index: ql/src/test/results/clientpositive/smb_mapjoin_19.q.out
===================================================================
--- ql/src/test/results/clientpositive/smb_mapjoin_19.q.out (revision 0)
+++ ql/src/test/results/clientpositive/smb_mapjoin_19.q.out (working copy)
@@ -0,0 +1,315 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table1 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+36
+PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+40
+PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+29
+PREHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+36
+PREHOOK: query: select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+40
+PREHOOK: query: select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+29
+PREHOOK: query: select count(*) from test_table2 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+36
+PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+40
+PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+29
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+36
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+40
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+29
Index: ql/src/test/results/clientpositive/smb_mapjoin_20.q.out
===================================================================
--- ql/src/test/results/clientpositive/smb_mapjoin_20.q.out (revision 0)
+++ ql/src/test/results/clientpositive/smb_mapjoin_20.q.out (working copy)
@@ -0,0 +1,454 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key STRING, value1 STRING, value2 string) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key STRING, value1 STRING, value2 string) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- with different datatypes. This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- with different datatypes. This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: UDFToString(_col0)
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: UDFToString(_col0)
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table2 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+242
+PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+258
+PREHOOK: query: CREATE TABLE test_table3 (key STRING, value1 int, value2 string) PARTITIONED BY (ds STRING)
+CLUSTERED BY (value1) SORTED BY (value1) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table3 (key STRING, value1 int, value2 string) PARTITIONED BY (ds STRING)
+CLUSTERED BY (value1) SORTED BY (value1) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table3
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, although the bucketing positions dont match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, although the bucketing positions dont match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: value
+ type: string
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table3 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table3 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+247
+PREHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: select count(*) from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+247
+PREHOOK: query: select count(*) from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- However, since an expression is being selected, it should involve a reducer
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key+a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- However, since an expression is being selected, it should involve a reducer
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key+a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) key))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: (key + key)
+ type: int
+ expr: value
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: UDFToString(_col0)
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: UDFToString(_col0)
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
Index: ql/src/test/results/clientpositive/smb_mapjoin_18.q.out
===================================================================
--- ql/src/test/results/clientpositive/smb_mapjoin_18.q.out (revision 0)
+++ ql/src/test/results/clientpositive/smb_mapjoin_18.q.out (working copy)
@@ -0,0 +1,591 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table1 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+247
+PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+247
+PREHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: select count(*) from test_table2 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+247
+PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+247
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, one of the buckets should be empty
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, one of the buckets should be empty
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL a) key) 238)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: (key = 238)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Output: default@test_table2@ds=2
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Output: default@test_table2@ds=2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table2 where ds = '2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+2
+PREHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+2
+PREHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+0
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+2
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+0
+PREHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3')
+SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3')
+SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '3')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '2'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 3
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=2
+PREHOOK: Output: default@test_table2@ds=2
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=2
+POSTHOOK: Output: default@test_table2@ds=2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table2 where ds = '3'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '3'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+0
+PREHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+0
+PREHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+0
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '3'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '3'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+0
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '3'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '3'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+0
Index: ql/src/test/results/clientpositive/smb_mapjoin_22.q.out
===================================================================
--- ql/src/test/results/clientpositive/smb_mapjoin_22.q.out (revision 0)
+++ ql/src/test/results/clientpositive/smb_mapjoin_22.q.out (working copy)
@@ -0,0 +1,360 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 SELECT *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 SELECT *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ test_table1
+ TableScan
+ alias: test_table1
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Output: default@test_table2
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: select count(*) from test_table2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: drop table test_table1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_table1
+PREHOOK: Output: default@test_table1
+POSTHOOK: query: drop table test_table1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Output: default@test_table1
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: drop table test_table2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_table2
+PREHOOK: Output: default@test_table2
+POSTHOOK: query: drop table test_table2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 SELECT *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 SELECT *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ test_table1
+ TableScan
+ alias: test_table1
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Output: default@test_table2
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: select count(*) from test_table2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ]
+253
Index: ql/src/test/results/clientpositive/smb_mapjoin_21.q.out
===================================================================
--- ql/src/test/results/clientpositive/smb_mapjoin_21.q.out (revision 0)
+++ ql/src/test/results/clientpositive/smb_mapjoin_21.q.out (working copy)
@@ -0,0 +1,568 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: drop table test_table2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_table2
+PREHOOK: Output: default@test_table2
+POSTHOOK: query: drop table test_table2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the sort orders does not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the sort orders does not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: -
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: drop table test_table2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_table2
+PREHOOK: Output: default@test_table2
+POSTHOOK: query: drop table test_table2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key, value) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key, value) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the sort columns do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the sort columns do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: drop table test_table2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_table2
+PREHOOK: Output: default@test_table2
+POSTHOOK: query: drop table test_table2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (value) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (value) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the sort columns do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the sort columns do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: drop table test_table2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_table2
+PREHOOK: Output: default@test_table2
+POSTHOOK: query: drop table test_table2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the number of buckets do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the number of buckets do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: drop table test_table2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_table2
+PREHOOK: Output: default@test_table2
+POSTHOOK: query: drop table test_table2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since sort columns do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since sort columns do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
Index: ql/src/test/queries/clientpositive/smb_mapjoin_20.q
===================================================================
--- ql/src/test/queries/clientpositive/smb_mapjoin_20.q (revision 0)
+++ ql/src/test/queries/clientpositive/smb_mapjoin_20.q (working copy)
@@ -0,0 +1,53 @@
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key STRING, value1 STRING, value2 string) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- with different datatypes. This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1';
+
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1';
+
+select count(*) from test_table2 where ds = '1';
+select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0;
+select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1;
+
+CREATE TABLE test_table3 (key STRING, value1 int, value2 string) PARTITIONED BY (ds STRING)
+CLUSTERED BY (value1) SORTED BY (value1) INTO 2 BUCKETS;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, although the bucketing positions dont match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+select count(*) from test_table3 where ds = '1';
+select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 0;
+select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 1;
+select count(*) from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select count(*) from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- However, since an expression is being selected, it should involve a reducer
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key+a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1';
Index: ql/src/test/queries/clientpositive/smb_mapjoin_21.q
===================================================================
--- ql/src/test/queries/clientpositive/smb_mapjoin_21.q (revision 0)
+++ ql/src/test/queries/clientpositive/smb_mapjoin_21.q (working copy)
@@ -0,0 +1,77 @@
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+drop table test_table2;
+
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the sort orders does not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+drop table test_table2;
+
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key, value) INTO 2 BUCKETS;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the sort columns do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+drop table test_table2;
+
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (value) INTO 2 BUCKETS;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the sort columns do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+drop table test_table2;
+
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since the number of buckets do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+drop table test_table2;
+
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation since sort columns do not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1';
Index: ql/src/test/queries/clientpositive/smb_mapjoin_18.q
===================================================================
--- ql/src/test/queries/clientpositive/smb_mapjoin_18.q (revision 0)
+++ ql/src/test/queries/clientpositive/smb_mapjoin_18.q (working copy)
@@ -0,0 +1,65 @@
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+select count(*) from test_table1 where ds = '1';
+select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0;
+select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1;
+select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1';
+select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1';
+
+select count(*) from test_table2 where ds = '1';
+select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0;
+select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1;
+select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1';
+select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, one of the buckets should be empty
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238;
+
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238;
+
+select count(*) from test_table2 where ds = '2';
+select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 0;
+select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 1;
+select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '2';
+select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '2';
+
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3')
+SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2';
+
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2')
+SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2';
+
+select count(*) from test_table2 where ds = '3';
+select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 0;
+select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 1;
+select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '3';
+select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '3';
Index: ql/src/test/queries/clientpositive/smb_mapjoin_22.q
===================================================================
--- ql/src/test/queries/clientpositive/smb_mapjoin_22.q (revision 0)
+++ ql/src/test/queries/clientpositive/smb_mapjoin_22.q (working copy)
@@ -0,0 +1,55 @@
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 SELECT *;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1;
+
+INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1;
+
+select count(*) from test_table1;
+select count(*) from test_table1 tablesample (bucket 2 out of 2) s;
+
+select count(*) from test_table2;
+select count(*) from test_table2 tablesample (bucket 2 out of 2) s;
+
+drop table test_table1;
+drop table test_table2;
+
+CREATE TABLE test_table1 (key INT, value STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 SELECT *;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1;
+
+INSERT OVERWRITE TABLE test_table2
+SELECT * FROM test_table1;
+
+select count(*) from test_table1;
+select count(*) from test_table1 tablesample (bucket 2 out of 2) s;
+
+select count(*) from test_table2;
+select count(*) from test_table2 tablesample (bucket 2 out of 2) s;
Index: ql/src/test/queries/clientpositive/smb_mapjoin_19.q
===================================================================
--- ql/src/test/queries/clientpositive/smb_mapjoin_19.q (revision 0)
+++ ql/src/test/queries/clientpositive/smb_mapjoin_19.q (working copy)
@@ -0,0 +1,41 @@
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1';
+
+select count(*) from test_table1 where ds = '1';
+select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0;
+select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5;
+select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12;
+select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1';
+select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1';
+select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1';
+
+select count(*) from test_table2 where ds = '1';
+select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0;
+select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5;
+select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12;
+select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1';
+select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1';
+select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1';
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (working copy)
@@ -0,0 +1,399 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+
+/**
+ * This transformation does optimization for enforcing bucketing and sorting.
+ * For a query of the form:
+ * insert overwrite table T1 select * from T2;
+ * where T1 and T2 are bucketized/sorted on the same keys, we don't need a reducer to
+ * enforce bucketing and sorting.
+ */
+public class BucketingSortingReduceSinkOptimizer implements Transform {
+
+ private static final Log LOG = LogFactory.getLog(BucketingSortingReduceSinkOptimizer.class
+ .getName());
+
+ public BucketingSortingReduceSinkOptimizer() {
+ }
+
+ @Override
+ public ParseContext transform(ParseContext pctx) throws SemanticException {
+
+ Map opRules = new LinkedHashMap();
+ HiveConf conf = pctx.getConf();
+
+ // process group-by pattern
+ opRules.put(new RuleRegExp("R1",
+ ReduceSinkOperator.getOperatorName() + "%" +
+ ExtractOperator.getOperatorName() + "%" +
+ FileSinkOperator.getOperatorName() + "%"),
+ getBucketSortReduceSinkProc(pctx));
+
+ // The dispatcher fires the processor corresponding to the closest matching rule
+ Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, null);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+
+ // Create a list of topop nodes
+ ArrayList topNodes = new ArrayList();
+ topNodes.addAll(pctx.getTopOps().values());
+ ogw.startWalking(topNodes, null);
+
+ return pctx;
+ }
+
+ private NodeProcessor getDefaultProc() {
+ return new NodeProcessor() {
+ @Override
+ public Object process(Node nd, Stack stack,
+ NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
+ return null;
+ }
+ };
+ }
+
+ private NodeProcessor getBucketSortReduceSinkProc(ParseContext pctx) {
+ return new BucketSortReduceSinkProcessor(pctx);
+ }
+
+ /**
+ * BucketSortReduceSinkProcessor.
+ *
+ */
+ public class BucketSortReduceSinkProcessor implements NodeProcessor {
+
+ protected ParseContext pGraphContext;
+
+ public BucketSortReduceSinkProcessor(ParseContext pGraphContext) {
+ this.pGraphContext = pGraphContext;
+ }
+
+ private List getBucketPositions(List tabBucketCols,
+ List tabCols) {
+ List posns = new ArrayList();
+ for (String bucketCol : tabBucketCols) {
+ int pos = 0;
+ for (FieldSchema tabCol : tabCols) {
+ if (bucketCol.equals(tabCol.getName())) {
+ posns.add(pos);
+ break;
+ }
+ pos++;
+ }
+ }
+ return posns;
+ }
+
+ private List> getSortPositions(List tabSortCols,
+ List tabCols) {
+ List> posns = new ArrayList>();
+ for (Order sortCol : tabSortCols) {
+ int pos = 0;
+ for (FieldSchema tabCol : tabCols) {
+ if (sortCol.getCol().equals(tabCol.getName())) {
+ posns.add(new ObjectPair(pos, sortCol.getOrder()));
+ break;
+ }
+ pos++;
+ }
+ }
+ return posns;
+ }
+
+ private boolean checkPartition(Partition partition,
+ List bucketPositionsDest,
+ List> sortPositionsDest,
+ int numBucketsDest) {
+ // The bucketing and sorting positions should exactly match
+ int numBuckets = partition.getBucketCount();
+ if (numBucketsDest != numBuckets) {
+ return false;
+ }
+
+ List partnBucketPositions =
+ getBucketPositions(partition.getBucketCols(), partition.getTable().getCols());
+ List> partnSortPositions =
+ getSortPositions(partition.getSortCols(), partition.getTable().getCols());
+ return bucketPositionsDest.equals(partnBucketPositions) &&
+ sortPositionsDest.equals(partnSortPositions);
+ }
+
+ private boolean checkTable(Table table,
+ List bucketPositionsDest,
+ List> sortPositionsDest,
+ int numBucketsDest) {
+ // The bucketing and sorting positions should exactly match
+ int numBuckets = table.getNumBuckets();
+ if (numBucketsDest != numBuckets) {
+ return false;
+ }
+
+ List tableBucketPositions =
+ getBucketPositions(table.getBucketCols(), table.getCols());
+ List> tableSortPositions =
+ getSortPositions(table.getSortCols(), table.getCols());
+ return bucketPositionsDest.equals(tableBucketPositions) &&
+ sortPositionsDest.equals(tableSortPositions);
+ }
+
+ private void removeReduceSink(ReduceSinkOperator rsOp,
+ TableScanOperator tsOp,
+ FileSinkOperator fsOp,
+ Table table) {
+ removeReduceSink(rsOp, tsOp, fsOp);
+ FileStatus[] srcs = table.getSortedPaths();
+ if (srcs == null) {
+ return;
+ }
+ Map bucketFileNameMapping = new HashMap();
+ for (int pos = 0; pos < srcs.length; pos++) {
+ bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos);
+ }
+ tsOp.getConf().setBucketFileNameMapping(bucketFileNameMapping);
+ }
+
+ private void removeReduceSink(ReduceSinkOperator rsOp,
+ TableScanOperator tsOp,
+ FileSinkOperator fsOp,
+ Partition partition) {
+ removeReduceSink(rsOp, tsOp, fsOp);
+ FileStatus[] srcs = partition.getSortedPaths();
+ if (srcs == null) {
+ return;
+ }
+ Map bucketFileNameMapping = new HashMap();
+ for (int pos = 0; pos < srcs.length; pos++) {
+ bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos);
+ }
+ tsOp.getConf().setBucketFileNameMapping(bucketFileNameMapping);
+ }
+
+ private void removeReduceSink(ReduceSinkOperator rsOp,
+ TableScanOperator tsOp,
+ FileSinkOperator fsOp) {
+ Operator extends OperatorDesc> parRSOp = rsOp.getParentOperators().get(0);
+ parRSOp.getChildOperators().set(0, fsOp);
+ fsOp.getParentOperators().set(0, parRSOp);
+ fsOp.getConf().setMultiFileSpray(false);
+ fsOp.getConf().setTotalFiles(1);
+ fsOp.getConf().setNumFiles(1);
+ tsOp.setUseBucketizedHiveInputFormat(true);
+ }
+
+ private int findColumnPosition(List cols, String colName) {
+ int pos = 0;
+ for (FieldSchema col : cols) {
+ if (colName.equals(col.getName())) {
+ return pos;
+ }
+ pos++;
+ }
+ return -1;
+ }
+
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ // If the reduce sink has not been introduced due to bucketing/sorting, ignore it
+ FileSinkOperator fsOp = (FileSinkOperator) nd;
+ ExtractOperator exOp = (ExtractOperator) fsOp.getParentOperators().get(0);
+ ReduceSinkOperator rsOp = (ReduceSinkOperator) exOp.getParentOperators().get(0);
+
+ List rsOps = pGraphContext
+ .getReduceSinkOperatorsAddedByEnforceBucketingSorting();
+ // nothing to do
+ if ((rsOps != null) && (!rsOps.contains(rsOp))) {
+ return null;
+ }
+
+ // Support for dynamic partitions can be added later
+ if (fsOp.getConf().getDynPartCtx() != null) {
+ return null;
+ }
+
+ // No conversion is possible for the reduce keys
+ for (ExprNodeDesc keyCol : rsOp.getConf().getKeyCols()) {
+ if (!(keyCol instanceof ExprNodeColumnDesc)) {
+ return null;
+ }
+ }
+
+ Table destTable = pGraphContext.getFsopToTable().get(fsOp);
+ if (destTable == null) {
+ return null;
+ }
+
+ // Get the positions for sorted and bucketed columns
+ List bucketPositions =
+ getBucketPositions(destTable.getBucketCols(), destTable.getCols());
+ List> sortPositions =
+ getSortPositions(destTable.getSortCols(), destTable.getCols());
+
+ // Only selects and filters are allowed
+ Operator extends OperatorDesc> op = rsOp;
+ // TableScan will also be followed by a Select Operator. Find the expressions for the
+ // bucketed columns for the destination table
+ List sourceTableBucketCols = new ArrayList();
+ List sourceTableSortCols = new ArrayList();
+
+ while (true) {
+ if (op.getParentOperators().size() > 1) {
+ return null;
+ }
+
+ op = op.getParentOperators().get(0);
+ if (!(op instanceof TableScanOperator) &&
+ !(op instanceof FilterOperator) &&
+ !(op instanceof SelectOperator)) {
+ return null;
+ }
+
+ // nothing to be done for filters - the output schema does not change.
+ if (op instanceof TableScanOperator) {
+ Table srcTable = pGraphContext.getTopToTable().get(op);
+
+ // Find the positions of the columns in the table corresponding to the select list.
+ List newBucketPositions = new ArrayList();
+ for (int pos = 0; pos < bucketPositions.size(); pos++) {
+ ExprNodeColumnDesc col = sourceTableBucketCols.get(pos);
+ String colName = col.getColumn();
+ int bucketPos = findColumnPosition(srcTable.getCols(), colName);
+ if (bucketPos < 0) {
+ return null;
+ }
+ newBucketPositions.add(bucketPos);
+ }
+
+ // Find the positions of the columns in the table corresponding to the select list.
+ List> newSortPositions =
+ new ArrayList>();
+ for (int pos = 0; pos < sortPositions.size(); pos++) {
+ ExprNodeColumnDesc col = sourceTableSortCols.get(pos);
+ String colName = col.getColumn();
+ int sortPos = findColumnPosition(srcTable.getCols(), colName);
+ if (sortPos < 0) {
+ return null;
+ }
+ newSortPositions.add(
+ new ObjectPair(sortPos, sortPositions.get(pos).getSecond()));
+ }
+
+
+ if (srcTable.isPartitioned()) {
+ PrunedPartitionList prunedParts = pGraphContext.getOpToPartList().get(op);
+ List partitions = prunedParts.getNotDeniedPartns();
+
+ // Support for dynamic partitions can be added later
+ // Don't optimize the case:
+ // insert overwrite table T1(ds='1', hr) select key, value, hr from T2 where ds = '1';
+ // where T1 and T2 are bucketed by the same keys and partitioned by ds. hr
+ if ((partitions == null) || (partitions.isEmpty()) || (partitions.size() > 1)) {
+ return null;
+ }
+ for (Partition partition : partitions) {
+ if (!checkPartition(partition, newBucketPositions, newSortPositions,
+ pGraphContext.getFsopToTable().get(fsOp).getNumBuckets())) {
+ return null;
+ }
+ }
+
+ removeReduceSink(rsOp, (TableScanOperator) op, fsOp,
+ partitions.get(0));
+ return null;
+ }
+ else {
+ if (!checkTable(srcTable, newBucketPositions, newSortPositions,
+ pGraphContext.getFsopToTable().get(fsOp).getNumBuckets())) {
+ return null;
+ }
+
+ removeReduceSink(rsOp, (TableScanOperator) op, fsOp, srcTable);
+ return null;
+ }
+ }
+ // None of the operators is changing the positions
+ else if (op instanceof SelectOperator) {
+ SelectOperator selectOp = (SelectOperator) op;
+ SelectDesc selectDesc = selectOp.getConf();
+
+ sourceTableBucketCols.clear();
+ sourceTableSortCols.clear();
+
+ // Only columns can be selected for both sorted and bucketed positions
+ for (int pos : bucketPositions) {
+ ExprNodeDesc selectColList = selectDesc.getColList().get(pos);
+ if (!(selectColList instanceof ExprNodeColumnDesc)) {
+ return null;
+ }
+ sourceTableBucketCols.add((ExprNodeColumnDesc) selectColList);
+ }
+
+ for (ObjectPair pos : sortPositions) {
+ ExprNodeDesc selectColList = selectDesc.getColList().get(pos.getFirst());
+ if (!(selectColList instanceof ExprNodeColumnDesc)) {
+ return null;
+ }
+ sourceTableSortCols.add((ExprNodeColumnDesc) selectColList);
+ }
+ }
+ }
+ }
+ }
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 1462986)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy)
@@ -58,6 +58,9 @@
/* Add list bucketing pruner. */
transformations.add(new ListBucketingPruner());
}
+ if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEBUCKETINGSORTING)) {
+ transformations.add(new BucketingSortingReduceSinkOptimizer());
+ }
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCP)) {
transformations.add(new ColumnPruner());
Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 1462986)
+++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy)
@@ -384,11 +384,10 @@
}
/**
- * mapping from bucket number to bucket path
+ * get all paths for this partition in a sorted manner
*/
- // TODO: add test case and clean it up
@SuppressWarnings("nls")
- public Path getBucketPath(int bucketNum) {
+ public FileStatus[] getSortedPaths() {
try {
// Previously, this got the filesystem of the Table, which could be
// different from the filesystem of the partition.
@@ -407,14 +406,26 @@
if (srcs.length == 0) {
return null;
}
- return srcs[bucketNum].getPath();
+ return srcs;
} catch (Exception e) {
- throw new RuntimeException("Cannot get bucket path for bucket "
- + bucketNum, e);
+ throw new RuntimeException("Cannot get path ", e);
}
}
+ /**
+ * mapping from bucket number to bucket path
+ */
+ // TODO: add test case and clean it up
@SuppressWarnings("nls")
+ public Path getBucketPath(int bucketNum) {
+ FileStatus srcs[] = getSortedPaths();
+ if (srcs == null) {
+ return null;
+ }
+ return srcs[bucketNum].getPath();
+ }
+
+ @SuppressWarnings("nls")
public Path[] getPath(Sample s) throws HiveException {
if (s == null) {
return getPath();
Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (revision 1462986)
+++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (working copy)
@@ -22,6 +22,7 @@
import java.io.Serializable;
import java.net.URI;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
@@ -31,6 +32,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.JavaUtils;
@@ -925,4 +927,30 @@
Hive hive = Hive.get();
return hive.getIndexes(getTTable().getDbName(), getTTable().getTableName(), max);
}
+
+ @SuppressWarnings("nls")
+ public FileStatus[] getSortedPaths() {
+ try {
+ // Previously, this got the filesystem of the Table, which could be
+ // different from the filesystem of the partition.
+ FileSystem fs = FileSystem.get(getPath().toUri(), Hive.get()
+ .getConf());
+ String pathPattern = getPath().toString();
+ if (getNumBuckets() > 0) {
+ pathPattern = pathPattern + "/*";
+ }
+ LOG.info("Path pattern = " + pathPattern);
+ FileStatus srcs[] = fs.globStatus(new Path(pathPattern));
+ Arrays.sort(srcs);
+ for (FileStatus src : srcs) {
+ LOG.info("Got file: " + src.getPath());
+ }
+ if (srcs.length == 0) {
+ return null;
+ }
+ return srcs;
+ } catch (Exception e) {
+ throw new RuntimeException("Cannot get path ", e);
+ }
+ }
};
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (revision 1462986)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (working copy)
@@ -94,6 +94,19 @@
@Override
public void cleanUpInputFileChangedOp() throws HiveException {
inputFileChanged = true;
+ // If the file name to bucket number mapping is maintained, store the bucket number
+ // in the execution context. This is needed for the following scenario:
+ // insert overwrite table T1 select * from T2;
+ // where T1 and T2 are sorted/bucketed by the same keys into the same number of buckets
+ // Although one mapper per file is used (bucketizedinputhiveinput), it is possible that
+ // any mapper can pick up any file (depending on the size of the files). The bucket number
+ // corresponding to the input file is stored to name the output bucket file appropriately.
+ Map bucketNameMapping = conf != null ? conf.getBucketFileNameMapping() : null;
+ if ((bucketNameMapping != null) && (!bucketNameMapping.isEmpty())) {
+ String currentInputFile = getExecContext().getCurrentInputFile();
+ getExecContext().setFileId(Integer.toString(bucketNameMapping.get(
+ Utilities.getFileNameFromDirName(currentInputFile))));
+ }
}
private void gatherStats(Object row) {
Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java (revision 1462986)
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java (working copy)
@@ -20,6 +20,7 @@
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
@@ -46,7 +47,7 @@
/**
* Used for split sampling (row count per split)
* For example,
- * select count(1) from ss_src2 tablesample(10 ROWS);
+ * select count(1) from ss_src2 tablesample (10 ROWS) s;
* provides first 10 rows from all input splits
*/
private int rowLimit = -1;
@@ -67,6 +68,9 @@
public static final String FILTER_TEXT_CONF_STR =
"hive.io.filter.text";
+ // input file name (big) to bucket number
+ private Map bucketFileNameMapping;
+
@SuppressWarnings("nls")
public TableScanDesc() {
}
@@ -170,4 +174,12 @@
public Integer getRowLimitExplain() {
return rowLimit >= 0 ? rowLimit : null;
}
+
+ public Map getBucketFileNameMapping() {
+ return bucketFileNameMapping;
+ }
+
+ public void setBucketFileNameMapping(Map bucketFileNameMapping) {
+ this.bucketFileNameMapping = bucketFileNameMapping;
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1462986)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy)
@@ -198,6 +198,8 @@
private Map joinContext;
private Map smbMapJoinContext;
private final HashMap topToTable;
+ private final Map fsopToTable;
+ private final List reduceSinkOperatorsAddedByEnforceBucketingSorting;
private QB qb;
private ASTNode ast;
private int destTableId;
@@ -259,6 +261,8 @@
joinContext = new HashMap();
smbMapJoinContext = new HashMap();
topToTable = new HashMap();
+ fsopToTable = new HashMap();
+ reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList();
destTableId = 1;
uCtx = null;
listMapJoinOpsNoReducer = new ArrayList>();
@@ -317,11 +321,13 @@
public ParseContext getParseContext() {
return new ParseContext(conf, qb, ast, opToPartPruner, opToPartList, topOps,
- topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable, loadTableWork,
+ topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable,
+ fsopToTable, loadTableWork,
loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
- opToPartToSkewedPruner, viewAliasToInput);
+ opToPartToSkewedPruner, viewAliasToInput,
+ reduceSinkOperatorsAddedByEnforceBucketingSorting);
}
@SuppressWarnings("nls")
@@ -5180,6 +5186,7 @@
+ dest_path + " row schema: " + inputRR.toString());
}
+ fsopToTable.put((FileSinkOperator) output, dest_tab);
return output;
}
@@ -5587,6 +5594,7 @@
partitionCols, order.toString(), numReducers),
new RowSchema(inputRR.getColumnInfos()), input), inputRR);
interim.setColumnExprMap(colExprMap);
+ reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator) interim);
// Add the extract operator to get the value fields
RowResolver out_rwsch = new RowResolver();
@@ -5609,6 +5617,7 @@
LOG.debug("Created ReduceSink Plan for table: " + tab.getTableName() +
" row schema: " + out_rwsch.toString());
}
+
return output;
}
@@ -8515,11 +8524,12 @@
ParseContext pCtx = new ParseContext(conf, qb, child, opToPartPruner,
opToPartList, topOps, topSelOps, opParseCtx, joinContext, smbMapJoinContext,
- topToTable,
+ topToTable, fsopToTable,
loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
- opToPartToSkewedPruner, viewAliasToInput);
+ opToPartToSkewedPruner, viewAliasToInput,
+ reduceSinkOperatorsAddedByEnforceBucketingSorting);
// Generate table access stats if required
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == true) {
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 1462986)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy)
@@ -30,10 +30,12 @@
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
@@ -74,6 +76,8 @@
private Map mapJoinContext;
private Map smbMapJoinContext;
private HashMap topToTable;
+ private Map fsopToTable;
+ private List reduceSinkOperatorsAddedByEnforceBucketingSorting;
private HashMap nameToSplitSample;
private List loadTableWork;
private List loadFileWork;
@@ -164,6 +168,7 @@
Map joinContext,
Map smbMapJoinContext,
HashMap topToTable,
+ Map fsopToTable,
List loadTableWork, List loadFileWork,
Context ctx, HashMap idToTableNameMap, int destTableId,
UnionProcContext uCtx, List> listMapJoinOpsNoReducer,
@@ -174,7 +179,8 @@
HashMap nameToSplitSample,
HashSet semanticInputs, List> rootTasks,
Map> opToPartToSkewedPruner,
- Map viewAliasToInput) {
+ Map viewAliasToInput,
+ List reduceSinkOperatorsAddedByEnforceBucketingSorting) {
this.conf = conf;
this.qb = qb;
this.ast = ast;
@@ -183,6 +189,7 @@
this.joinContext = joinContext;
this.smbMapJoinContext = smbMapJoinContext;
this.topToTable = topToTable;
+ this.fsopToTable = fsopToTable;
this.loadFileWork = loadFileWork;
this.loadTableWork = loadTableWork;
this.opParseCtx = opParseCtx;
@@ -203,6 +210,8 @@
this.rootTasks = rootTasks;
this.opToPartToSkewedPruner = opToPartToSkewedPruner;
this.viewAliasToInput = viewAliasToInput;
+ this.reduceSinkOperatorsAddedByEnforceBucketingSorting =
+ reduceSinkOperatorsAddedByEnforceBucketingSorting;
}
/**
@@ -304,6 +313,24 @@
this.topToTable = topToTable;
}
+ public Map getFsopToTable() {
+ return fsopToTable;
+ }
+
+ public void setFsopToTable(Map fsopToTable) {
+ this.fsopToTable = fsopToTable;
+ }
+
+ public List getReduceSinkOperatorsAddedByEnforceBucketingSorting() {
+ return reduceSinkOperatorsAddedByEnforceBucketingSorting;
+ }
+
+ public void setReduceSinkOperatorsAddedByEnforceBucketingSorting(
+ List reduceSinkOperatorsAddedByEnforceBucketingSorting) {
+ this.reduceSinkOperatorsAddedByEnforceBucketingSorting =
+ reduceSinkOperatorsAddedByEnforceBucketingSorting;
+ }
+
/**
* @return the topOps
*/
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (revision 1462986)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (working copy)
@@ -31,7 +31,7 @@
public class PrunedPartitionList {
// source table
- private Table source;
+ private final Table source;
// confirmed partitions - satisfy the partition criteria
private Set confirmedPartns;
@@ -44,7 +44,7 @@
/**
* @param confirmedPartns
- * confirmed paritions
+ * confirmed partitions
* @param unknownPartns
* unknown partitions
*/
@@ -62,7 +62,7 @@
/**
* get confirmed partitions.
- *
+ *
* @return confirmedPartns confirmed paritions
*/
public Set getConfirmedPartns() {
@@ -71,7 +71,7 @@
/**
* get unknown partitions.
- *
+ *
* @return unknownPartns unknown paritions
*/
public Set getUnknownPartns() {
@@ -80,7 +80,7 @@
/**
* get denied partitions.
- *
+ *
* @return deniedPartns denied paritions
*/
public Set getDeniedPartns() {
@@ -99,7 +99,7 @@
/**
* set confirmed partitions.
- *
+ *
* @param confirmedPartns
* confirmed paritions
*/
@@ -109,7 +109,7 @@
/**
* set unknown partitions.
- *
+ *
* @param unknownPartns
* unknown partitions
*/