Index: build.xml
===================================================================
--- build.xml (revision 1467161)
+++ build.xml (working copy)
@@ -136,7 +136,6 @@
-
@@ -510,14 +509,6 @@
-
-
-
-
-
-
-
-
@@ -766,9 +756,6 @@
-
-
@@ -962,7 +949,6 @@
-
@@ -979,7 +965,6 @@
-
@@ -1081,8 +1066,6 @@
todir="${mvn.jar.dir}" />
-
-
-
-
-
@@ -1373,16 +1351,6 @@
output.file="${mvn.jar.dir}/hive-metastore-${version}.pom.asc"
gpg.passphrase="${gpg.passphrase}"/>
-
-
-
-
-
@@ -198,7 +197,6 @@
-
Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out (revision 0)
+++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out (working copy)
@@ -0,0 +1,446 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: CREATE TABLE test_table3 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key2) SORTED BY (key2) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table3 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key2) SORTED BY (key2) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table3
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, since the insert is happening on the bucketing position
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, since the insert is happening on the bucketing position
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col6
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col6)
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+2 2 val_2val_2 1
+4 4 val_4val_4 1
+8 8 val_8val_8 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+9 9 val_9val_9 1
+PREHOOK: query: DROP TABLE test_table3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_table3
+PREHOOK: Output: default@test_table3
+POSTHOOK: query: DROP TABLE test_table3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Output: default@test_table3
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table3
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation, since the insert is happening on a non-bucketing position
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation, since the insert is happening on a non-bucketing position
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+0 val_0 1
+0 val_0 1
+0 val_0 1
+0 val_0 1
+0 val_0 1
+0 val_0 1
+0 val_0 1
+0 val_0 1
+0 val_0 1
+2 val_2 1
+4 val_4 1
+8 val_8 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+5 val_5 1
+5 val_5 1
+5 val_5 1
+5 val_5 1
+5 val_5 1
+5 val_5 1
+5 val_5 1
+5 val_5 1
+5 val_5 1
+9 val_9 1
+PREHOOK: query: DROP TABLE test_table3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_table3
+PREHOOK: Output: default@test_table3
+POSTHOOK: query: DROP TABLE test_table3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Output: default@test_table3
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out (revision 0)
+++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out (working copy)
@@ -0,0 +1,389 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: CREATE TABLE test_table3 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table3 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table3
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, b.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, b.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col5
+ type: int
+ expr: concat(_col1, _col6)
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, b.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, b.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+2 2 val_2val_2 1
+4 4 val_4val_4 1
+8 8 val_8val_8 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+9 9 val_9val_9 1
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT b.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT b.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col5
+ type: int
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col6)
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT b.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT b.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+0 0 val_0val_0 1
+2 2 val_2val_2 1
+4 4 val_4val_4 1
+8 8 val_8val_8 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table2)b.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+5 5 val_5val_5 1
+9 9 val_9val_9 1
Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out (revision 0)
+++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out (working copy)
@@ -0,0 +1,326 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (value STRING, key INT) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (value STRING, key INT) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- The bucketing positions dont match - although the actual bucketing do.
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.value, x.key from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- The bucketing positions dont match - although the actual bucketing do.
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.value, x.key from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ x:a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: value
+ type: string
+ expr: key
+ type: int
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.value, x.key from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.value, x.key from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table2 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+247
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table3
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- The bucketing positions dont match - this should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- The bucketing positions dont match - this should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ x:a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: UDFToInteger(_col1)
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: UDFToInteger(_col1)
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: UDFToInteger(_col1)
+ type: int
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table2 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+0
Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out (revision 0)
+++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out (working copy)
@@ -0,0 +1,592 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table3
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+and (a.key = 0 or a.key = 5)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+and (a.key = 0 or a.key = 5)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1')) (or (= (. (TOK_TABLE_OR_COL a) key) 0) (= (. (TOK_TABLE_OR_COL a) key) 5))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: ((key = 0) or (key = 5))
+ type: boolean
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col6
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col6)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+and (a.key = 0 or a.key = 5)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+and (a.key = 0 or a.key = 5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+PREHOOK: query: -- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and (key = 0 or key = 5)) a
+JOIN
+(select key, value from test_table2 where ds = '1' and (key = 0 or key = 5)) b
+ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and (key = 0 or key = 5)) a
+JOIN
+(select key, value from test_table2 where ds = '1' and (key = 0 or key = 5)) b
+ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (or (= (TOK_TABLE_OR_COL key) 0) (= (TOK_TABLE_OR_COL key) 5)))))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (or (= (TOK_TABLE_OR_COL key) 0) (= (TOK_TABLE_OR_COL key) 5)))))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:test_table1
+ TableScan
+ alias: test_table1
+ Filter Operator
+ predicate:
+ expr: ((key = 0) or (key = 5))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col3)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and (key = 0 or key = 5)) a
+JOIN
+(select key, value from test_table2 where ds = '1' and (key = 0 or key = 5)) b
+ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and (key = 0 or key = 5)) a
+JOIN
+(select key, value from test_table2 where ds = '1' and (key = 0 or key = 5)) b
+ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+PREHOOK: query: -- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and key < 8) a
+JOIN
+(select key, value from test_table2 where ds = '1' and key < 8) b
+ON a.key = b.key
+WHERE a.key = 0 or a.key = 5
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and key < 8) a
+JOIN
+(select key, value from test_table2 where ds = '1' and key < 8) b
+ON a.key = b.key
+WHERE a.key = 0 or a.key = 5
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (< (TOK_TABLE_OR_COL key) 8))))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (< (TOK_TABLE_OR_COL key) 8))))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (or (= (. (TOK_TABLE_OR_COL a) key) 0) (= (. (TOK_TABLE_OR_COL a) key) 5)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:test_table1
+ TableScan
+ alias: test_table1
+ Filter Operator
+ predicate:
+ expr: ((key < 8) and ((key = 0) or (key = 5)))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col3)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and key < 8) a
+JOIN
+(select key, value from test_table2 where ds = '1' and key < 8) b
+ON a.key = b.key
+WHERE a.key = 0 or a.key = 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and key < 8) a
+JOIN
+(select key, value from test_table2 where ds = '1' and key < 8) b
+ON a.key = b.key
+WHERE a.key = 0 or a.key = 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out (revision 0)
+++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out (working copy)
@@ -0,0 +1,1384 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table3
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '2') SELECT * where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=2
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '2') SELECT * where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT * where key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table2@ds=2
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT * where key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table2@ds=2
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col6
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col6)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+2 val_2val_2 1
+4 val_4val_4 1
+8 val_8val_8 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+9 val_9val_9 1
+PREHOOK: query: -- Since more than one partition of 'a' (the big table) is being selected,
+-- it should be a map-reduce job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds is not null and b.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since more than one partition of 'a' (the big table) is being selected,
+-- it should be a map-reduce job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds is not null and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) ds)) (= (. (TOK_TABLE_OR_COL b) ds) '1')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col6
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col6)
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds is not null and b.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table1@ds=2
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds is not null and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table1@ds=2
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+2 val_2val_2 1
+2 val_2val_2 1
+4 val_4val_4 1
+4 val_4val_4 1
+8 val_8val_8 1
+8 val_8val_8 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+9 val_9val_9 1
+9 val_9val_9 1
+PREHOOK: query: -- Since a single partition of the big table ('a') is being selected, it should be a map-only
+-- job even though multiple partitions of 'b' are being selected
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds is not null
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since a single partition of the big table ('a') is being selected, it should be a map-only
+-- job even though multiple partitions of 'b' are being selected
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) ds))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col6
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col6)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Input: default@test_table2@ds=2
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Input: default@test_table2@ds=2
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+2 val_2val_2 1
+2 val_2val_2 1
+4 val_4val_4 1
+4 val_4val_4 1
+8 val_8val_8 1
+8 val_8val_8 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+9 val_9val_9 1
+9 val_9val_9 1
+PREHOOK: query: -- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:test_table1
+ TableScan
+ alias: test_table1
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col3)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+2 val_2val_2 1
+4 val_4val_4 1
+8 val_8val_8 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+9 val_9val_9 1
+PREHOOK: query: -- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.v1, b.v2)
+FROM
+(select key, concat(value, value) as v1 from test_table1 where ds = '1') a
+JOIN
+(select key, concat(value, value) as v2 from test_table2 where ds = '1') b
+ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.v1, b.v2)
+FROM
+(select key, concat(value, value) as v1 from test_table1 where ds = '1') a
+JOIN
+(select key, concat(value, value) as v2 from test_table2 where ds = '1') b
+ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value)) v1)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL value)) v2)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) v1) (. (TOK_TABLE_OR_COL b) v2))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:test_table1
+ TableScan
+ alias: test_table1
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: concat(value, value)
+ type: string
+ outputColumnNames: _col0, _col1
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col3)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.v1, b.v2)
+FROM
+(select key, concat(value, value) as v1 from test_table1 where ds = '1') a
+JOIN
+(select key, concat(value, value) as v2 from test_table2 where ds = '1') b
+ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.v1, b.v2)
+FROM
+(select key, concat(value, value) as v1 from test_table1 where ds = '1') a
+JOIN
+(select key, concat(value, value) as v2 from test_table2 where ds = '1') b
+ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 val_0val_0val_0val_0 1
+0 val_0val_0val_0val_0 1
+0 val_0val_0val_0val_0 1
+0 val_0val_0val_0val_0 1
+0 val_0val_0val_0val_0 1
+0 val_0val_0val_0val_0 1
+0 val_0val_0val_0val_0 1
+0 val_0val_0val_0val_0 1
+0 val_0val_0val_0val_0 1
+2 val_2val_2val_2val_2 1
+4 val_4val_4val_4val_4 1
+8 val_8val_8val_8val_8 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+5 val_5val_5val_5val_5 1
+5 val_5val_5val_5val_5 1
+5 val_5val_5val_5val_5 1
+5 val_5val_5val_5val_5 1
+5 val_5val_5val_5val_5 1
+5 val_5val_5val_5val_5 1
+5 val_5val_5val_5val_5 1
+5 val_5val_5val_5val_5 1
+5 val_5val_5val_5val_5 1
+9 val_9val_9val_9val_9 1
+PREHOOK: query: -- This should be a map-reduce job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key+a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This should be a map-reduce job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key+a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) key))) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:test_table1
+ TableScan
+ alias: test_table1
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: (_col0 + _col0)
+ type: int
+ expr: concat(_col1, _col3)
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key+a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key+a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key EXPRESSION [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key EXPRESSION [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+4 val_2val_2 1
+8 val_4val_4 1
+10 val_5val_5 1
+10 val_5val_5 1
+10 val_5val_5 1
+10 val_5val_5 1
+10 val_5val_5 1
+10 val_5val_5 1
+10 val_5val_5 1
+10 val_5val_5 1
+10 val_5val_5 1
+16 val_8val_8 1
+18 val_9val_9 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key EXPRESSION [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out (revision 0)
+++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out (working copy)
@@ -0,0 +1,1254 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: CREATE TABLE test_table3 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table3 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table3
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT key, key+1, value where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT key, key+1, value where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT key, key+1, value where key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT key, key+1, value where key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, since the sort-order matches
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key2, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, since the sort-order matches
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key2, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {key2} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key], Column[key2]]
+ 1 [Column[key], Column[key2]]
+ outputColumnNames: _col0, _col1, _col2, _col8
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: concat(_col2, _col8)
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key2, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key2, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+2 3 val_2val_2 1
+4 5 val_4val_4 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+8 9 val_8val_8 1
+9 10 val_9val_9 1
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, since the sort-order matches
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq1.key, subq1.key2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, since the sort-order matches
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq1.key, subq1.key2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)) value)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq1:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {key2} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key], Column[key2]]
+ 1 [Column[key], Column[key2]]
+ outputColumnNames: _col0, _col1, _col2, _col8
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: concat(_col2, _col8)
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq1.key, subq1.key2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq1.key, subq1.key2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+2 3 val_2val_2 1
+4 5 val_4val_4 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+8 9 val_8val_8 1
+9 10 val_9val_9 1
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key2, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key2, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {key2} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key], Column[key2]]
+ 1 [Column[key], Column[key2]]
+ outputColumnNames: _col0, _col1, _col2, _col8
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col1
+ type: int
+ expr: _col0
+ type: int
+ expr: concat(_col2, _col8)
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ sort order: +-
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq1.key2, subq1.key, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq1.key2, subq1.key, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)) value)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq1:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {key2} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key], Column[key2]]
+ 1 [Column[key], Column[key2]]
+ outputColumnNames: _col0, _col1, _col2, _col8
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col1
+ type: int
+ expr: _col0
+ type: int
+ expr: concat(_col2, _col8)
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ sort order: +-
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.key, subq2.key2, subq2.value from
+(
+SELECT subq1.key2, subq1.key, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.key, subq2.key2, subq2.value from
+(
+SELECT subq1.key2, subq1.key, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)) value)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq2:subq1:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {key2} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key], Column[key2]]
+ 1 [Column[key], Column[key2]]
+ outputColumnNames: _col0, _col1, _col2, _col8
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: concat(_col2, _col8)
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.key, subq2.key2, subq2.value from
+(
+SELECT subq1.key2, subq1.key, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.key, subq2.key2, subq2.value from
+(
+SELECT subq1.key2, subq1.key, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+2 3 val_2val_2 1
+4 5 val_4val_4 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+8 9 val_8val_8 1
+9 10 val_9val_9 1
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.k2, subq2.k1, subq2.value from
+(
+SELECT subq1.key2 as k1, subq1.key as k2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.k2, subq2.k1, subq2.value from
+(
+SELECT subq1.key2 as k1, subq1.key as k2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)) value)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key2) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) k1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq2:subq1:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {key2} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key], Column[key2]]
+ 1 [Column[key], Column[key2]]
+ outputColumnNames: _col0, _col1, _col2, _col8
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: concat(_col2, _col8)
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.k2, subq2.k1, subq2.value from
+(
+SELECT subq1.key2 as k1, subq1.key as k2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.k2, subq2.k1, subq2.value from
+(
+SELECT subq1.key2 as k1, subq1.key as k2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+0 1 val_0val_0 1
+2 3 val_2val_2 1
+4 5 val_4val_4 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+5 6 val_5val_5 1
+8 9 val_8val_8 1
+9 10 val_9val_9 1
+PREHOOK: query: CREATE TABLE test_table4 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key DESC, key2 DESC) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table4 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key DESC, key2 DESC) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table4
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table4 PARTITION (ds = '1')
+SELECT subq2.k2, subq2.k1, subq2.value from
+(
+SELECT subq1.key2 as k1, subq1.key as k2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table4 PARTITION (ds = '1')
+SELECT subq2.k2, subq2.k1, subq2.value from
+(
+SELECT subq1.key2 as k1, subq1.key as k2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key2 SIMPLE [(test_table1)a.FieldSchema(name:key2, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key2)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)) value)) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1'))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key2) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table4) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) k1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq2:subq1:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {key2} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key], Column[key2]]
+ 1 [Column[key], Column[key2]]
+ outputColumnNames: _col0, _col1, _col2, _col8
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: concat(_col2, _col8)
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ sort order: --
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table4
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table4
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out (revision 0)
+++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out (working copy)
@@ -0,0 +1,525 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ x:a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table2 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+247
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT * from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT * from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ x:a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT * from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT * from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from test_table2 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+500
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+247
+PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+253
+PREHOOK: query: -- it should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, concat(x.value, x.value) from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+PREHOOK: type: QUERY
+POSTHOOK: query: -- it should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, concat(x.value, x.value) from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL x) value))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ x:a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: concat(value, value)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: -- it should be a map-reduce job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key+x.key, x.value from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+PREHOOK: type: QUERY
+POSTHOOK: query: -- it should be a map-reduce job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key+x.key, x.value from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL x) key))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ x:a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: (key + key)
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: -- it should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.k1, concat(x.v1, x.v1) from
+(
+SELECT a.key as k1, a.value as v1 FROM test_table1 a WHERE a.ds = '1'
+)x
+PREHOOK: type: QUERY
+POSTHOOK: query: -- it should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.k1, concat(x.v1, x.v1) from
+(
+SELECT a.key as k1, a.value as v1 FROM test_table1 a WHERE a.ds = '1'
+)x
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) v1)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL a) ds) '1')))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table2) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) k1)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL x) v1) (. (TOK_TABLE_OR_COL x) v1))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ x:a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: concat(value, value)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
Index: ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out (revision 0)
+++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out (working copy)
@@ -0,0 +1,424 @@
+PREHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table3
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table2@ds=1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table2@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation, since the sort-order does not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation, since the sort-order does not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '1')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col6
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col6)
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: -
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+8 val_8val_8 1
+4 val_4val_4 1
+2 val_2val_2 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+9 val_9val_9 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+PREHOOK: query: -- This should be a map-reduce job since the sort order does not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This should be a map-reduce job since the sort order does not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test_table2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_table3) (TOK_PARTSPEC (TOK_PARTVAL ds '1')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:test_table1
+ TableScan
+ alias: test_table1
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: concat(_col1, _col3)
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: -
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_table3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table1@ds=1
+PREHOOK: Input: default@test_table2
+PREHOOK: Input: default@test_table2@ds=1
+PREHOOK: Output: default@test_table3@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table1@ds=1
+POSTHOOK: Input: default@test_table2
+POSTHOOK: Input: default@test_table2@ds=1
+POSTHOOK: Output: default@test_table3@ds=1
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+8 val_8val_8 1
+4 val_4val_4 1
+2 val_2val_2 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+0 val_0val_0 1
+PREHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table3
+PREHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table3
+POSTHOOK: Input: default@test_table3@ds=1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ]
+9 val_9val_9 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
+5 val_5val_5 1
Index: ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
===================================================================
--- ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q (revision 0)
+++ ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q (working copy)
@@ -0,0 +1,141 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10;
+
+FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '2') SELECT * where key < 10;
+
+FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT * where key < 100;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- Since more than one partition of 'a' (the big table) is being selected,
+-- it should be a map-reduce job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds is not null and b.ds = '1';
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds is not null and b.ds = '1';
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- Since a single partition of the big table ('a') is being selected, it should be a map-only
+-- job even though multiple partitions of 'b' are being selected
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds is not null;
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds is not null;
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key;
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key;
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.v1, b.v2)
+FROM
+(select key, concat(value, value) as v1 from test_table1 where ds = '1') a
+JOIN
+(select key, concat(value, value) as v2 from test_table2 where ds = '1') b
+ON a.key = b.key;
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.v1, b.v2)
+FROM
+(select key, concat(value, value) as v1 from test_table1 where ds = '1') a
+JOIN
+(select key, concat(value, value) as v2 from test_table2 where ds = '1') b
+ON a.key = b.key;
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- This should be a map-reduce job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key+a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key;
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key+a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key;
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
Index: ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
===================================================================
--- ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q (revision 0)
+++ ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q (working copy)
@@ -0,0 +1,154 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS;
+CREATE TABLE test_table3 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT key, key+1, value where key < 10;
+
+FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT key, key+1, value where key < 100;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, since the sort-order matches
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key2, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1';
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key2, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1';
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, since the sort-order matches
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq1.key, subq1.key2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1;
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq1.key, subq1.key2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1;
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key2, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1';
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq1.key2, subq1.key, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.key, subq2.key2, subq2.value from
+(
+SELECT subq1.key2, subq1.key, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2;
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.key, subq2.key2, subq2.value from
+(
+SELECT subq1.key2, subq1.key, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2;
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.k2, subq2.k1, subq2.value from
+(
+SELECT subq1.key2 as k1, subq1.key as k2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2;
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT subq2.k2, subq2.k1, subq2.value from
+(
+SELECT subq1.key2 as k1, subq1.key as k2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2;
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+CREATE TABLE test_table4 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key, key2) SORTED BY (key DESC, key2 DESC) INTO 2 BUCKETS;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table4 PARTITION (ds = '1')
+SELECT subq2.k2, subq2.k1, subq2.value from
+(
+SELECT subq1.key2 as k1, subq1.key as k2, subq1.value from
+(
+SELECT a.key, a.key2, concat(a.value, b.value) as value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1'
+)subq1
+)subq2;
Index: ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q
===================================================================
--- ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q (revision 0)
+++ ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q (working copy)
@@ -0,0 +1,50 @@
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (value STRING, key INT) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- The bucketing positions dont match - although the actual bucketing do.
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.value, x.key from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x;
+
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.value, x.key from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x;
+
+select count(*) from test_table2 where ds = '1';
+select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1';
+select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1';
+
+CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- The bucketing positions dont match - this should be a map-reduce operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x;
+
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x;
+
+select count(*) from test_table2 where ds = '1';
+select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1';
+select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1';
Index: ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
===================================================================
--- ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q (revision 0)
+++ ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q (working copy)
@@ -0,0 +1,86 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10;
+
+FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+and (a.key = 0 or a.key = 5);
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
+and (a.key = 0 or a.key = 5);
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and (key = 0 or key = 5)) a
+JOIN
+(select key, value from test_table2 where ds = '1' and (key = 0 or key = 5)) b
+ON a.key = b.key;
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and (key = 0 or key = 5)) a
+JOIN
+(select key, value from test_table2 where ds = '1' and (key = 0 or key = 5)) b
+ON a.key = b.key;
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- This should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and key < 8) a
+JOIN
+(select key, value from test_table2 where ds = '1' and key < 8) b
+ON a.key = b.key
+WHERE a.key = 0 or a.key = 5;
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1' and key < 8) a
+JOIN
+(select key, value from test_table2 where ds = '1' and key < 8) b
+ON a.key = b.key
+WHERE a.key = 0 or a.key = 5;
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
Index: ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q
===================================================================
--- ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q (revision 0)
+++ ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q (working copy)
@@ -0,0 +1,63 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table3 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key2) SORTED BY (key2) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10;
+
+FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation, since the insert is happening on the bucketing position
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+DROP TABLE test_table3;
+
+CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation, since the insert is happening on a non-bucketing position
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, a.value
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+DROP TABLE test_table3;
Index: ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q
===================================================================
--- ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q (revision 0)
+++ ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q (working copy)
@@ -0,0 +1,56 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table3 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10;
+
+FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, b.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, b.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT b.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT b.key, a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
Index: ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q
===================================================================
--- ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q (revision 0)
+++ ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q (working copy)
@@ -0,0 +1,76 @@
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-only operation
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x;
+
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, x.value from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x;
+
+select count(*) from test_table2 where ds = '1';
+select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1';
+select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1';
+
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT * from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x;
+
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT * from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x;
+
+select count(*) from test_table2 where ds = '1';
+select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1';
+select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- it should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key, concat(x.value, x.value) from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x;
+
+-- it should be a map-reduce job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.key+x.key, x.value from
+(
+SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1'
+)x;
+
+-- it should be a map-only job
+EXPLAIN
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
+SELECT x.k1, concat(x.v1, x.v1) from
+(
+SELECT a.key as k1, a.value as v1 FROM test_table1 a WHERE a.ds = '1'
+)x;
Index: ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q
===================================================================
--- ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q (revision 0)
+++ ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q (working copy)
@@ -0,0 +1,61 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+
+-- Create two bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
+CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * where key < 10;
+
+FROM src
+INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * where key < 100;
+
+-- Insert data into the bucketed table by selecting from another bucketed table
+-- This should be a map-reduce operation, since the sort-order does not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM test_table1 a JOIN test_table2 b
+ON a.key = b.key WHERE a.ds = '1' and b.ds = '1';
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
+
+-- This should be a map-reduce job since the sort order does not match
+EXPLAIN
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key;
+
+INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
+SELECT a.key, concat(a.value, b.value)
+FROM
+(select key, value from test_table1 where ds = '1') a
+JOIN
+(select key, value from test_table2 where ds = '1') b
+ON a.key = b.key;
+
+select * from test_table3 tablesample (bucket 1 out of 2) s where ds = '1';
+select * from test_table3 tablesample (bucket 2 out of 2) s where ds = '1';
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (revision 1467161)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (working copy)
@@ -25,11 +25,8 @@
import java.util.Map;
import java.util.Stack;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.hive.common.ObjectPair;
-import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.ql.exec.ExtractOperator;
@@ -37,6 +34,7 @@
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -56,6 +54,7 @@
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
/**
@@ -64,12 +63,15 @@
* insert overwrite table T1 select * from T2;
* where T1 and T2 are bucketized/sorted on the same keys, we don't need a reducer to
* enforce bucketing and sorting.
+ *
+ * It also optimizes queries of the form:
+ * insert overwrite table T1
+ * select * from T1 join T2 on T1.key = T2.key
+ * where T1, T2 and T3 are bucketized/sorted on the same key 'key', we don't need a reducer
+ * to enforce bucketing and sorting
*/
public class BucketingSortingReduceSinkOptimizer implements Transform {
- private static final Log LOG = LogFactory.getLog(BucketingSortingReduceSinkOptimizer.class
- .getName());
-
public BucketingSortingReduceSinkOptimizer() {
}
@@ -77,7 +79,6 @@
public ParseContext transform(ParseContext pctx) throws SemanticException {
Map opRules = new LinkedHashMap();
- HiveConf conf = pctx.getConf();
// process reduce sink added by hive.enforce.bucketing or hive.enforce.sorting
opRules.put(new RuleRegExp("R1",
@@ -90,7 +91,7 @@
Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, null);
GraphWalker ogw = new DefaultGraphWalker(disp);
- // Create a list of topop nodes
+ // Create a list of top nodes
ArrayList topNodes = new ArrayList();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
@@ -117,7 +118,6 @@
*
*/
public class BucketSortReduceSinkProcessor implements NodeProcessor {
-
protected ParseContext pGraphContext;
public BucketSortReduceSinkProcessor(ParseContext pGraphContext) {
@@ -142,28 +142,33 @@
}
// Get the sort positions and sort order for the table
- private List> getSortPositions(List tabSortCols,
+ // The sort order contains whether the sorting is happening ascending or descending
+ private ObjectPair, List> getSortPositionsOrder(
+ List tabSortCols,
List tabCols) {
- List> posns = new ArrayList>();
+ List sortPositions = new ArrayList();
+ List sortOrders = new ArrayList();
for (Order sortCol : tabSortCols) {
int pos = 0;
for (FieldSchema tabCol : tabCols) {
if (sortCol.getCol().equals(tabCol.getName())) {
- posns.add(new ObjectPair(pos, sortCol.getOrder()));
+ sortPositions.add(pos);
+ sortOrders.add(sortCol.getOrder());
break;
}
pos++;
}
}
- return posns;
+ return new ObjectPair, List>(sortPositions, sortOrders);
}
- // Return true if the parition is bucketed/sorted by the specified positions
+ // Return true if the partition is bucketed/sorted by the specified positions
// The number of buckets, the sort order should also match along with the
// columns which are bucketed/sorted
private boolean checkPartition(Partition partition,
List bucketPositionsDest,
- List> sortPositionsDest,
+ List sortPositionsDest,
+ List sortOrderDest,
int numBucketsDest) {
// The bucketing and sorting positions should exactly match
int numBuckets = partition.getBucketCount();
@@ -173,10 +178,11 @@
List partnBucketPositions =
getBucketPositions(partition.getBucketCols(), partition.getTable().getCols());
- List> partnSortPositions =
- getSortPositions(partition.getSortCols(), partition.getTable().getCols());
+ ObjectPair, List> partnSortPositionsOrder =
+ getSortPositionsOrder(partition.getSortCols(), partition.getTable().getCols());
return bucketPositionsDest.equals(partnBucketPositions) &&
- sortPositionsDest.equals(partnSortPositions);
+ sortPositionsDest.equals(partnSortPositionsOrder.getFirst()) &&
+ sortOrderDest.equals(partnSortPositionsOrder.getSecond());
}
// Return true if the table is bucketed/sorted by the specified positions
@@ -184,7 +190,8 @@
// columns which are bucketed/sorted
private boolean checkTable(Table table,
List bucketPositionsDest,
- List> sortPositionsDest,
+ List sortPositionsDest,
+ List sortOrderDest,
int numBucketsDest) {
// The bucketing and sorting positions should exactly match
int numBuckets = table.getNumBuckets();
@@ -194,12 +201,17 @@
List tableBucketPositions =
getBucketPositions(table.getBucketCols(), table.getCols());
- List> tableSortPositions =
- getSortPositions(table.getSortCols(), table.getCols());
+ ObjectPair, List> tableSortPositionsOrder =
+ getSortPositionsOrder(table.getSortCols(), table.getCols());
return bucketPositionsDest.equals(tableBucketPositions) &&
- sortPositionsDest.equals(tableSortPositions);
+ sortPositionsDest.equals(tableSortPositionsOrder.getFirst()) &&
+ sortOrderDest.equals(tableSortPositionsOrder.getSecond());
}
+ // Store the bucket path to bucket number mapping in the table scan operator.
+ // Although one mapper per file is used (BucketizedInputHiveInput), it is possible that
+ // any mapper can pick up any file (depending on the size of the files). The bucket number
+ // corresponding to the input file is stored to name the output bucket file appropriately.
private void storeBucketPathMapping(TableScanOperator tsOp, FileStatus[] srcs) {
Map bucketFileNameMapping = new HashMap();
for (int pos = 0; pos < srcs.length; pos++) {
@@ -222,12 +234,12 @@
// Store the mapping -> path, bucket number
// This is needed since for the map-only job, any mapper can process any file.
// For eg: if mapper 1 is processing the file corresponding to bucket 2, it should
- // also output the file correspodning to bucket 2 of the output.
+ // also output the file corresponding to bucket 2 of the output.
storeBucketPathMapping(tsOp, srcs);
}
// Remove the reduce sink operator
- // Use bucketized hive input format so that one mapper processes exactly one file
+ // Use BucketizedHiveInputFormat so that one mapper processes exactly one file
private void removeReduceSink(ReduceSinkOperator rsOp,
TableScanOperator tsOp,
FileSinkOperator fsOp) {
@@ -251,6 +263,97 @@
return -1;
}
+ // The output columns for the destination table should match with the join keys
+ // This is to handle queries of the form:
+ // insert overwrite table T3
+ // select T1.key, T1.key2, UDF(T1.value, T2.value)
+ // from T1 join T2 on T1.key = T2.key and T1.key2 = T2.key2
+ // where T1, T2 and T3 are bucketized/sorted on key and key2
+ // Assuming T1 is the table on which the mapper is run, the following is true:
+ // . The number of buckets for T1 and T3 should be same
+ // . The bucketing/sorting columns for T1, T2 and T3 should be same
+ // . The sort order of T1 should match with the sort order for T3.
+ // . If T1 is partitioned, only a single partition of T1 can be selected.
+ // . The select list should contain with (T1.key, T1.key2) or (T2.key, T2.key2)
+ // . After the join, only selects and filters are allowed.
+ private boolean validateSMBJoinKeys(SMBJoinDesc smbJoinDesc,
+ List sourceTableBucketCols,
+ List sourceTableSortCols,
+ List sortOrder) {
+ // The sort-merge join creates the output sorted and bucketized by the same columns.
+ // This can be relaxed in the future if there is a requirement.
+ if (!sourceTableBucketCols.equals(sourceTableSortCols)) {
+ return false;
+ }
+
+ // Get the total number of columns selected, and for each output column, store the
+ // base table it points to. For
+ // insert overwrite table T3
+ // select T1.key, T1.key2, UDF(T1.value, T2.value)
+ // from T1 join T2 on T1.key = T2.key and T1.key2 = T2.key2
+ // the following arrays are created
+ // [0, 0, 0, 1] --> [T1, T1, T1, T2] (table mapping)
+ // [0, 1, 2, 0] --> [T1.0, T1.1, T1.2, T2.0] (table columns mapping)
+ Byte[] tagOrder = smbJoinDesc.getTagOrder();
+ Map> retainList = smbJoinDesc.getRetainList();
+ int totalNumberColumns = 0;
+ for (Byte tag : tagOrder) {
+ totalNumberColumns += retainList.get(tag).size();
+ }
+
+ byte[] columnTableMappings = new byte[totalNumberColumns];
+ int[] columnNumberMappings = new int[totalNumberColumns];
+ int currentColumnPosition = 0;
+ for (Byte tag : tagOrder) {
+ for (int pos = 0; pos < retainList.get(tag).size(); pos++) {
+ columnTableMappings[currentColumnPosition] = tag;
+ columnNumberMappings[currentColumnPosition] = pos;
+ currentColumnPosition++;
+ }
+ }
+
+ // All output columns used for bucketing/sorting of the destination table should
+ // belong to the same input table
+ // insert overwrite table T3
+ // select T1.key, T2.key2, UDF(T1.value, T2.value)
+ // from T1 join T2 on T1.key = T2.key and T1.key2 = T2.key2
+ // is not optimized, whereas the insert is optimized if the select list is either changed to
+ // (T1.key, T1.key2, UDF(T1.value, T2.value)) or (T2.key, T2.key2, UDF(T1.value, T2.value))
+ // Get the input table and make sure the keys match
+ List outputColumnNames = smbJoinDesc.getOutputColumnNames();
+ byte tableTag = -1;
+ int[] columnNumbersExprList = new int[sourceTableBucketCols.size()];
+ int currentColPosition = 0;
+ for (ExprNodeColumnDesc bucketCol : sourceTableBucketCols) {
+ String colName = bucketCol.getColumn();
+ int colNumber = outputColumnNames.indexOf(colName);
+ if (colNumber < 0) {
+ return false;
+ }
+ if (tableTag < 0) {
+ tableTag = columnTableMappings[colNumber];
+ }
+ else if (tableTag != columnTableMappings[colNumber]) {
+ return false;
+ }
+ columnNumbersExprList[currentColPosition++] = columnNumberMappings[colNumber];
+ }
+
+ List allExprs = smbJoinDesc.getExprs().get(tableTag);
+ List keysSelectedTable = smbJoinDesc.getKeys().get(tableTag);
+ currentColPosition = 0;
+ for (ExprNodeDesc keySelectedTable : keysSelectedTable) {
+ if (!(keySelectedTable instanceof ExprNodeColumnDesc)) {
+ return false;
+ }
+ if (!allExprs.get(columnNumbersExprList[currentColPosition++]).isSame(keySelectedTable)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
@@ -283,14 +386,21 @@
if (destTable == null) {
return null;
}
+ int numBucketsDestination = destTable.getNumBuckets();
// Get the positions for sorted and bucketed columns
// For sorted columns, also get the order (ascending/descending) - that should
// also match for this to be converted to a map-only job.
+ // Get the positions for sorted and bucketed columns
+ // For sorted columns, also get the order (ascending/descending) - that should
+ // also match for this to be converted to a map-only job.
List bucketPositions =
getBucketPositions(destTable.getBucketCols(), destTable.getCols());
- List> sortPositions =
- getSortPositions(destTable.getSortCols(), destTable.getCols());
+ ObjectPair, List> sortOrderPositions =
+ getSortPositionsOrder(destTable.getSortCols(), destTable.getCols());
+ List sortPositions = sortOrderPositions.getFirst();
+ List sortOrder = sortOrderPositions.getSecond();
+ boolean useBucketSortPositions = true;
// Only selects and filters are allowed
Operator extends OperatorDesc> op = rsOp;
@@ -298,119 +408,179 @@
// bucketed/sorted columns for the destination table
List sourceTableBucketCols = new ArrayList();
List sourceTableSortCols = new ArrayList();
+ op = op.getParentOperators().get(0);
while (true) {
- if (op.getParentOperators().size() > 1) {
- return null;
- }
-
- op = op.getParentOperators().get(0);
if (!(op instanceof TableScanOperator) &&
!(op instanceof FilterOperator) &&
- !(op instanceof SelectOperator)) {
+ !(op instanceof SelectOperator) &&
+ !(op instanceof SMBMapJoinOperator)) {
return null;
}
- // nothing to be done for filters - the output schema does not change.
- if (op instanceof TableScanOperator) {
- Table srcTable = pGraphContext.getTopToTable().get(op);
+ if (op instanceof SMBMapJoinOperator) {
+ // Bucketing and sorting keys should exactly match
+ if (!(bucketPositions.equals(sortPositions))) {
+ return null;
+ }
+ SMBMapJoinOperator smbOp = (SMBMapJoinOperator) op;
+ SMBJoinDesc smbJoinDesc = smbOp.getConf();
+ int posBigTable = smbJoinDesc.getPosBigTable();
- // Find the positions of the bucketed columns in the table corresponding
- // to the select list.
- // Consider the following scenario:
- // T1(key, value1, value2) bucketed/sorted by key into 2 buckets
- // T2(dummy, key, value1, value2) bucketed/sorted by key into 2 buckets
- // A query like: insert overwrite table T2 select 1, key, value1, value2 from T1
- // should be optimized.
+ // join keys dont match the bucketing keys
+ List keysBigTable = smbJoinDesc.getKeys().get((byte) posBigTable);
+ if (keysBigTable.size() != bucketPositions.size()) {
+ return null;
+ }
- // Start with the destination: T2, bucketed/sorted position is [1]
- // At the source T1, the column corresponding to that position is [key], which
- // maps to column [0] of T1, which is also bucketed/sorted into the same
- // number of buckets
- List newBucketPositions = new ArrayList();
- for (int pos = 0; pos < bucketPositions.size(); pos++) {
- ExprNodeColumnDesc col = sourceTableBucketCols.get(pos);
- String colName = col.getColumn();
- int bucketPos = findColumnPosition(srcTable.getCols(), colName);
- if (bucketPos < 0) {
- return null;
- }
- newBucketPositions.add(bucketPos);
+ if (!validateSMBJoinKeys(smbJoinDesc, sourceTableBucketCols,
+ sourceTableSortCols, sortOrder)) {
+ return null;
}
- // Find the positions/order of the sorted columns in the table corresponding
- // to the select list.
- List> newSortPositions =
- new ArrayList>();
- for (int pos = 0; pos < sortPositions.size(); pos++) {
- ExprNodeColumnDesc col = sourceTableSortCols.get(pos);
- String colName = col.getColumn();
- int sortPos = findColumnPosition(srcTable.getCols(), colName);
- if (sortPos < 0) {
+ sourceTableBucketCols.clear();
+ sourceTableSortCols.clear();
+ useBucketSortPositions = false;
+
+ for (ExprNodeDesc keyBigTable : keysBigTable) {
+ if (!(keyBigTable instanceof ExprNodeColumnDesc)) {
return null;
}
- newSortPositions.add(
- new ObjectPair(sortPos, sortPositions.get(pos).getSecond()));
+ sourceTableBucketCols.add((ExprNodeColumnDesc) keyBigTable);
+ sourceTableSortCols.add((ExprNodeColumnDesc) keyBigTable);
}
+ // since it is a sort-merge join, only follow the big table
+ op = op.getParentOperators().get(posBigTable);
+ } else {
+ // nothing to be done for filters - the output schema does not change.
+ if (op instanceof TableScanOperator) {
+ assert !useBucketSortPositions;
+ Table srcTable = pGraphContext.getTopToTable().get(op);
- if (srcTable.isPartitioned()) {
- PrunedPartitionList prunedParts = pGraphContext.getOpToPartList().get(op);
- List partitions = prunedParts.getNotDeniedPartns();
+ // Find the positions of the bucketed columns in the table corresponding
+ // to the select list.
+ // Consider the following scenario:
+ // T1(key, value1, value2) bucketed/sorted by key into 2 buckets
+ // T2(dummy, key, value1, value2) bucketed/sorted by key into 2 buckets
+ // A query like: insert overwrite table T2 select 1, key, value1, value2 from T1
+ // should be optimized.
- // Support for dynamic partitions can be added later
- // The following is not optimized:
- // insert overwrite table T1(ds='1', hr) select key, value, hr from T2 where ds = '1';
- // where T1 and T2 are bucketed by the same keys and partitioned by ds. hr
- if ((partitions == null) || (partitions.isEmpty()) || (partitions.size() > 1)) {
- return null;
+ // Start with the destination: T2, bucketed/sorted position is [1]
+ // At the source T1, the column corresponding to that position is [key], which
+ // maps to column [0] of T1, which is also bucketed/sorted into the same
+ // number of buckets
+ List newBucketPositions = new ArrayList();
+ for (int pos = 0; pos < bucketPositions.size(); pos++) {
+ ExprNodeColumnDesc col = sourceTableBucketCols.get(pos);
+ String colName = col.getColumn();
+ int bucketPos = findColumnPosition(srcTable.getCols(), colName);
+ if (bucketPos < 0) {
+ return null;
+ }
+ newBucketPositions.add(bucketPos);
}
- for (Partition partition : partitions) {
- if (!checkPartition(partition, newBucketPositions, newSortPositions,
- pGraphContext.getFsopToTable().get(fsOp).getNumBuckets())) {
+
+ // Find the positions/order of the sorted columns in the table corresponding
+ // to the select list.
+ List newSortPositions = new ArrayList();
+ for (int pos = 0; pos < sortPositions.size(); pos++) {
+ ExprNodeColumnDesc col = sourceTableSortCols.get(pos);
+ String colName = col.getColumn();
+ int sortPos = findColumnPosition(srcTable.getCols(), colName);
+ if (sortPos < 0) {
return null;
}
+ newSortPositions.add(sortPos);
}
- removeReduceSink(rsOp, (TableScanOperator) op, fsOp,
- partitions.get(0).getSortedPaths());
- return null;
- }
- else {
- if (!checkTable(srcTable, newBucketPositions, newSortPositions,
- pGraphContext.getFsopToTable().get(fsOp).getNumBuckets())) {
+ if (srcTable.isPartitioned()) {
+ PrunedPartitionList prunedParts = pGraphContext.getOpToPartList().get(op);
+ List partitions = prunedParts.getNotDeniedPartns();
+
+ // Support for dynamic partitions can be added later
+ // The following is not optimized:
+ // insert overwrite table T1(ds='1', hr) select key, value, hr from T2 where ds = '1';
+ // where T1 and T2 are bucketed by the same keys and partitioned by ds. hr
+ if ((partitions == null) || (partitions.isEmpty()) || (partitions.size() > 1)) {
+ return null;
+ }
+ for (Partition partition : partitions) {
+ if (!checkPartition(partition, newBucketPositions, newSortPositions, sortOrder,
+ numBucketsDestination)) {
+ return null;
+ }
+ }
+
+ removeReduceSink(rsOp, (TableScanOperator) op, fsOp,
+ partitions.get(0).getSortedPaths());
return null;
}
+ else {
+ if (!checkTable(srcTable, newBucketPositions, newSortPositions, sortOrder,
+ numBucketsDestination)) {
+ return null;
+ }
- removeReduceSink(rsOp, (TableScanOperator) op, fsOp, srcTable.getSortedPaths());
- return null;
+ removeReduceSink(rsOp, (TableScanOperator) op, fsOp, srcTable.getSortedPaths());
+ return null;
+ }
}
- }
- // None of the operators is changing the positions
- else if (op instanceof SelectOperator) {
- SelectOperator selectOp = (SelectOperator) op;
- SelectDesc selectDesc = selectOp.getConf();
+ // None of the operators is changing the positions
+ else if (op instanceof SelectOperator) {
+ SelectOperator selectOp = (SelectOperator) op;
+ SelectDesc selectDesc = selectOp.getConf();
- // There may be multiple selects - chose the one closest to the table
- sourceTableBucketCols.clear();
- sourceTableSortCols.clear();
+ // Iterate backwards, from the destination table to the top of the tree
+ // Based on the output column names, get the new columns.
+ if (!useBucketSortPositions) {
+ bucketPositions.clear();
+ sortPositions.clear();
+ List outputColumnNames = selectDesc.getOutputColumnNames();
- // Only columns can be selected for both sorted and bucketed positions
- for (int pos : bucketPositions) {
- ExprNodeDesc selectColList = selectDesc.getColList().get(pos);
- if (!(selectColList instanceof ExprNodeColumnDesc)) {
- return null;
+ for (ExprNodeColumnDesc col : sourceTableBucketCols) {
+ String colName = col.getColumn();
+ int colPos = outputColumnNames.indexOf(colName);
+ if (colPos < 0) {
+ return null;
+ }
+ bucketPositions.add(colPos);
+ }
+
+ for (ExprNodeColumnDesc col : sourceTableSortCols) {
+ String colName = col.getColumn();
+ int colPos = outputColumnNames.indexOf(colName);
+ if (colPos < 0) {
+ return null;
+ }
+ sortPositions.add(colPos);
+ }
}
- sourceTableBucketCols.add((ExprNodeColumnDesc) selectColList);
- }
- for (ObjectPair pos : sortPositions) {
- ExprNodeDesc selectColList = selectDesc.getColList().get(pos.getFirst());
- if (!(selectColList instanceof ExprNodeColumnDesc)) {
- return null;
+ // There may be multiple selects - chose the one closest to the table
+ sourceTableBucketCols.clear();
+ sourceTableSortCols.clear();
+
+ // Only columns can be selected for both sorted and bucketed positions
+ for (int pos : bucketPositions) {
+ ExprNodeDesc selectColList = selectDesc.getColList().get(pos);
+ if (!(selectColList instanceof ExprNodeColumnDesc)) {
+ return null;
+ }
+ sourceTableBucketCols.add((ExprNodeColumnDesc) selectColList);
}
- sourceTableSortCols.add((ExprNodeColumnDesc) selectColList);
+
+ for (int pos : sortPositions) {
+ ExprNodeDesc selectColList = selectDesc.getColList().get(pos);
+ if (!(selectColList instanceof ExprNodeColumnDesc)) {
+ return null;
+ }
+ sourceTableSortCols.add((ExprNodeColumnDesc) selectColList);
+ }
+
+ useBucketSortPositions = false;
}
+ op = op.getParentOperators().get(0);
}
}
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (revision 1467161)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (working copy)
@@ -98,10 +98,11 @@
// in the execution context. This is needed for the following scenario:
// insert overwrite table T1 select * from T2;
// where T1 and T2 are sorted/bucketed by the same keys into the same number of buckets
- // Although one mapper per file is used (bucketizedinputhiveinput), it is possible that
+ // Although one mapper per file is used (BucketizedInputHiveInput), it is possible that
// any mapper can pick up any file (depending on the size of the files). The bucket number
// corresponding to the input file is stored to name the output bucket file appropriately.
- Map bucketNameMapping = conf != null ? conf.getBucketFileNameMapping() : null;
+ Map bucketNameMapping =
+ (conf != null) ? conf.getBucketFileNameMapping() : null;
if ((bucketNameMapping != null) && (!bucketNameMapping.isEmpty())) {
String currentInputFile = getExecContext().getCurrentInputFile();
getExecContext().setFileId(Integer.toString(bucketNameMapping.get(