Index: conf/hive-default.xml.template
===================================================================
--- conf/hive-default.xml.template (revision 1407273)
+++ conf/hive-default.xml.template (working copy)
@@ -1557,6 +1557,25 @@
Whether to enable TCP keepalive for the Hive server. Keepalive will prevent accumulation of half-open connections.
+
+ hive.exec.infer.bucket.sort
+ true
+
+ If this is set, when writing partitions, the metadata will include the bucketing/sorting
+ properties with which the data was written if any (this will not overwrite the metadata
+ inherited from the table if the table is bucketed/sorted)
+
+
+
+ hive.exec.infer.bucket.sort.num.buckets.power.two
+ false
+
+ If this is set, when setting the number of reducers for the map reduce task which writes the
+ final output files, it will choose a number which is a power of two, unless the user specifies
+ the number of reducers to use using mapred.reduce.tasks.
+
+
+
Index: build-common.xml
===================================================================
--- build-common.xml (revision 1407273)
+++ build-common.xml (working copy)
@@ -57,7 +57,7 @@
-
+
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1407273)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -654,6 +654,15 @@
HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES(
"hive.multi.insert.move.tasks.share.dependencies", false),
+ // If this is set, when writing partitions, the metadata will include the bucketing/sorting
+ // properties with which the data was written if any (this will not overwrite the metadata
+ // inherited from the table if the table is bucketed/sorted)
+ HIVE_INFER_BUCKET_SORT("hive.exec.infer.bucket.sort", true),
+ // If this is set, when setting the number of reducers for the map reduce task which writes the
+ // final output files, it will choose a number which is a power of two
+ HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO(
+ "hive.exec.infer.bucket.sort.num.buckets.power.two", false),
+
/* The following section contains all configurations used for list bucketing feature.*/
// Enable list bucketing DDL. Default value is false so that we disable it by default.
// This will be removed once the rest of the DML changes are committed.
Index: ql/src/test/results/clientpositive/infer_bucket_sort.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort.q.out (revision 0)
@@ -0,0 +1,1599 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 10968
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 10968
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key, value]
+Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 4970
+ totalSize 5998
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 2654
+ rawDataSize 28466
+ totalSize 31120
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 2654
+ rawDataSize 28466
+ totalSize 31120
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test distribute by, should only be clustered by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test distribute by, should only be clustered by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test sort by, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test sort by, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test sort by desc, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key DESC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test sort by desc, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key DESC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:key, order:0)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test cluster by, should be bucketed and sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src CLUSTER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test cluster by, should be bucketed and sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src CLUSTER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test distribute by and sort by different keys, should be distributed by one key sorted by the other
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key SORT BY value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test distribute by and sort by different keys, should be distributed by one key sorted by the other
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key SORT BY value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 6
+ rawDataSize 18
+ totalSize 24
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 618
+ rawDataSize 2964
+ totalSize 3582
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 5
+ rawDataSize 19
+ totalSize 24
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and
+-- sorted by the column the group by key ends up in
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and
+-- sorted by the column the group by key ends up in
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 309
+ rawDataSize 1482
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out (revision 0)
@@ -0,0 +1,69 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where not every reducer writes a file.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where not every reducer writes a file.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Tests a query with more reducers than rows, so the data should be sorted by key, but
+-- not bucketed
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM (SELECT key FROM src LIMIT 3) a GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Tests a query with more reducers than rows, so the data should be sorted by key, but
+-- not bucketed
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM (SELECT key FROM src LIMIT 3) a GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 3
+ rawDataSize 14
+ totalSize 17
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/input_part7.q.out
===================================================================
--- ql/src/test/results/clientpositive/input_part7.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/input_part7.q.out (working copy)
@@ -248,6 +248,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [null-subquery1:a-subquery1:x, null-subquery2:a-subquery2:y]
/srcpart/ds=2008-04-08/hr=12 [null-subquery1:a-subquery1:x, null-subquery2:a-subquery2:y]
Index: ql/src/test/results/clientpositive/pcr.q.out
===================================================================
--- ql/src/test/results/clientpositive/pcr.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/pcr.q.out (working copy)
@@ -220,6 +220,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -464,6 +466,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -706,6 +710,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -912,6 +918,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-10 [pcr_t1]
@@ -1166,6 +1174,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -1431,6 +1441,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -1654,6 +1666,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -1839,6 +1853,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -2110,6 +2126,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -2369,6 +2387,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -2457,6 +2477,8 @@
type: string
expr: ds
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -2546,6 +2568,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [t2, t1]
@@ -2611,6 +2635,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
@@ -2733,6 +2759,8 @@
type: string
expr: ds
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -2868,6 +2896,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [t1]
/pcr_t1/ds=2000-04-09 [t2]
@@ -2934,6 +2964,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
@@ -3277,6 +3309,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -3571,6 +3605,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/pcr_t1/ds=2000-04-08 [pcr_t1]
/pcr_t1/ds=2000-04-09 [pcr_t1]
@@ -4958,6 +4994,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
@@ -5175,6 +5213,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
@@ -5399,6 +5439,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-09/hr=11 [srcpart]
Index: ql/src/test/results/clientpositive/join33.q.out
===================================================================
--- ql/src/test/results/clientpositive/join33.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/join33.q.out (working copy)
@@ -175,6 +175,8 @@
value expressions:
expr: value
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -284,6 +286,8 @@
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [z]
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out (revision 0)
@@ -0,0 +1,125 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where where merging may or may not be used.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where where merging may or may not be used.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Tests a reduce task followed by a move. The output should be bucketed and sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Tests a reduce task followed by a move. The output should be bucketed and sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 2
+ numRows 0
+ rawDataSize 0
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 2
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/groupby_sort_1.q.out
===================================================================
--- ql/src/test/results/clientpositive/groupby_sort_1.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/groupby_sort_1.q.out (working copy)
@@ -2511,6 +2511,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -2593,6 +2595,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t1 [null-subquery2:subq1-subquery2:t1]
@@ -3368,6 +3372,8 @@
value expressions:
expr: _col2
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -3454,6 +3460,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t1 [subq2:t1]
@@ -3516,6 +3524,8 @@
type: string
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -3621,6 +3631,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t1 [subq1:t1]
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out (revision 0)
@@ -0,0 +1,139 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where joins may be auto converted to map joins.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where joins may be auto converted to map joins.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 10968
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Tests a join which is not converted to a map join, the output should be bucketed and sorted
+
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+Execution failed with exit status: 3
+Obtaining error information
+
+Task failed!
+Task ID:
+ Stage-7
+
+Logs:
+
+#### A masked pattern was here ####
+FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.MapredLocalTask
+ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.MapRedTask
+POSTHOOK: query: -- Tests a join which is not converted to a map join, the output should be bucketed and sorted
+
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 1028
+ rawDataSize 10968
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/filter_join_breaktask.q.out
===================================================================
--- ql/src/test/results/clientpositive/filter_join_breaktask.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/filter_join_breaktask.q.out (working copy)
@@ -83,6 +83,8 @@
type: string
expr: ds
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -157,6 +159,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/filter_join_breaktask/ds=2008-04-08 [f, m]
@@ -197,6 +201,8 @@
value expressions:
expr: value
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -296,6 +302,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/filter_join_breaktask/ds=2008-04-08 [g]
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/input_part9.q.out
===================================================================
--- ql/src/test/results/clientpositive/input_part9.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/input_part9.q.out (working copy)
@@ -169,6 +169,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [x]
/srcpart/ds=2008-04-08/hr=12 [x]
Index: ql/src/test/results/clientpositive/join35.q.out
===================================================================
--- ql/src/test/results/clientpositive/join35.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/join35.q.out (working copy)
@@ -78,6 +78,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -156,6 +158,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [null-subquery1:subq1-subquery1:x]
@@ -569,6 +573,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -647,6 +653,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [null-subquery2:subq1-subquery2:x1]
Index: ql/src/test/results/clientpositive/ppd_join_filter.q.out
===================================================================
--- ql/src/test/results/clientpositive/ppd_join_filter.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/ppd_join_filter.q.out (working copy)
@@ -66,6 +66,8 @@
value expressions:
expr: _col1
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -153,6 +155,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [b:src]
@@ -190,6 +194,8 @@
value expressions:
expr: key
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -287,6 +293,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [a]
#### A masked pattern was here ####
@@ -398,6 +406,8 @@
value expressions:
expr: _col1
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -485,6 +495,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [b:src]
@@ -522,6 +534,8 @@
value expressions:
expr: key
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -619,6 +633,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [a]
#### A masked pattern was here ####
@@ -730,6 +746,8 @@
value expressions:
expr: _col1
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -817,6 +835,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [b:src]
@@ -854,6 +874,8 @@
value expressions:
expr: key
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -951,6 +973,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [a]
#### A masked pattern was here ####
@@ -1062,6 +1086,8 @@
value expressions:
expr: _col1
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -1149,6 +1175,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [b:src]
@@ -1186,6 +1214,8 @@
value expressions:
expr: key
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -1283,6 +1313,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [a]
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/sample6.q.out
===================================================================
--- ql/src/test/results/clientpositive/sample6.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/sample6.q.out (working copy)
@@ -681,6 +681,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcbucket/srcbucket1.txt [s]
@@ -1055,6 +1057,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcbucket/srcbucket0.txt [s]
@@ -1683,6 +1687,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcbucket [s]
@@ -2154,6 +2160,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcbucket [s]
@@ -2655,6 +2663,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcbucket2/srcbucket20.txt [s]
/srcbucket2/srcbucket22.txt [s]
@@ -2913,6 +2923,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcbucket2/srcbucket21.txt [s]
@@ -3046,6 +3058,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/join_filters_overlap.q.out
===================================================================
--- ql/src/test/results/clientpositive/join_filters_overlap.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/join_filters_overlap.q.out (working copy)
@@ -90,6 +90,8 @@
type: int
expr: value
type: int
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -185,6 +187,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/a [b, c, a]
@@ -296,6 +300,8 @@
type: int
expr: value
type: int
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -391,6 +397,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/a [b, c, a]
@@ -502,6 +510,8 @@
type: int
expr: value
type: int
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -597,6 +607,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/a [b, c, a]
@@ -726,6 +738,8 @@
type: int
expr: value
type: int
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -829,6 +843,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/a [d, b, c, a]
@@ -964,6 +980,8 @@
type: int
expr: value
type: int
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -1066,6 +1084,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/a [d, b, c, a]
Index: ql/src/test/results/clientpositive/reduce_deduplicate.q.out
===================================================================
--- ql/src/test/results/clientpositive/reduce_deduplicate.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/reduce_deduplicate.q.out (working copy)
@@ -48,6 +48,8 @@
type: string
expr: _col1
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -128,6 +130,8 @@
TotalFiles: 2
GatherStats: true
MultiFileSpray: true
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [src]
Index: ql/src/test/results/clientpositive/index_auto_mult_tables.q.out
===================================================================
--- ql/src/test/results/clientpositive/index_auto_mult_tables.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/index_auto_mult_tables.q.out (working copy)
@@ -294,7 +294,7 @@
Group By Operator
aggregations:
expr: collect_set(_col1)
- bucketGroup: false
+ bucketGroup: true
keys:
expr: _col0
type: string
Index: ql/src/test/results/clientpositive/udtf_explode.q.out
===================================================================
--- ql/src/test/results/clientpositive/udtf_explode.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/udtf_explode.q.out (working copy)
@@ -237,6 +237,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -291,6 +293,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
@@ -511,6 +515,8 @@
value expressions:
expr: _col2
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -569,6 +575,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/merge3.q.out
===================================================================
--- ql/src/test/results/clientpositive/merge3.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/merge3.q.out (working copy)
@@ -4847,6 +4847,8 @@
type: string
expr: _col2
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/smb_mapjoin_13.q.out
===================================================================
--- ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (working copy)
@@ -249,6 +249,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
@@ -504,6 +506,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out
===================================================================
--- ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out (working copy)
@@ -297,6 +297,8 @@
value expressions:
expr: _col2
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -374,6 +376,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t1 [t1]
@@ -1327,6 +1331,8 @@
value expressions:
expr: _col3
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -1406,6 +1412,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t1 [t1]
@@ -1678,6 +1686,8 @@
value expressions:
expr: _col2
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -1755,6 +1765,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t1 [t1]
@@ -2064,6 +2076,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -2139,6 +2153,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t1 [subq1:t1]
@@ -2788,6 +2804,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -2863,6 +2881,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t1 [null-subquery2:subq1-subquery2:t1]
@@ -2882,6 +2902,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -2934,6 +2956,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
@@ -3708,6 +3732,8 @@
value expressions:
expr: _col2
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -3785,6 +3811,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t1 [subq2:t1]
@@ -3808,6 +3836,8 @@
value expressions:
expr: _col2
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -3864,6 +3894,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
@@ -3926,6 +3958,8 @@
type: string
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -4031,6 +4065,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t1 [subq1:t1]
#### A masked pattern was here ####
@@ -4201,6 +4237,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -4276,6 +4314,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/t2 [t2]
Index: ql/src/test/results/clientpositive/sample8.q.out
===================================================================
--- ql/src/test/results/clientpositive/sample8.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/sample8.q.out (working copy)
@@ -330,6 +330,8 @@
type: string
expr: _col3
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -369,6 +371,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/transform_ppr2.q.out
===================================================================
--- ql/src/test/results/clientpositive/transform_ppr2.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/transform_ppr2.q.out (working copy)
@@ -73,6 +73,8 @@
type: string
expr: _col2
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -197,6 +199,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [tmap:src]
/srcpart/ds=2008-04-08/hr=12 [tmap:src]
@@ -205,7 +209,6 @@
Fetch Operator
limit: -1
-
PREHOOK: query: FROM (
FROM srcpart src
SELECT TRANSFORM(src.ds, src.key, src.value)
Index: ql/src/test/results/clientpositive/union_ppr.q.out
===================================================================
--- ql/src/test/results/clientpositive/union_ppr.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/union_ppr.q.out (working copy)
@@ -250,6 +250,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [null-subquery1:a-subquery1:x, null-subquery2:a-subquery2:y]
/srcpart/ds=2008-04-08/hr=12 [null-subquery1:a-subquery1:x, null-subquery2:a-subquery2:y]
Index: ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out
===================================================================
--- ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out (working copy)
@@ -86,7 +86,7 @@
Group By Operator
aggregations:
expr: collect_set(_col1)
- bucketGroup: false
+ bucketGroup: true
keys:
expr: _col0
type: string
Index: ql/src/test/results/clientpositive/router_join_ppr.q.out
===================================================================
--- ql/src/test/results/clientpositive/router_join_ppr.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/router_join_ppr.q.out (working copy)
@@ -75,6 +75,8 @@
type: string
expr: ds
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -356,6 +358,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [a]
/srcpart/ds=2008-04-08/hr=11 [b]
@@ -483,6 +487,8 @@
type: string
expr: value
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -665,6 +671,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [b]
/srcpart/ds=2008-04-08/hr=11 [a]
@@ -788,6 +796,8 @@
type: string
expr: ds
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -970,6 +980,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [a]
/srcpart/ds=2008-04-08/hr=11 [b]
@@ -1093,6 +1105,8 @@
type: string
expr: value
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -1369,6 +1383,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [b]
/srcpart/ds=2008-04-08/hr=11 [a]
Index: ql/src/test/results/clientpositive/input42.q.out
===================================================================
--- ql/src/test/results/clientpositive/input42.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/input42.q.out (working copy)
@@ -164,6 +164,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [a]
/srcpart/ds=2008-04-08/hr=12 [a]
@@ -1354,6 +1356,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [a]
/srcpart/ds=2008-04-08/hr=12 [a]
@@ -1922,6 +1926,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [a]
/srcpart/ds=2008-04-08/hr=12 [a]
Index: ql/src/test/results/clientpositive/union24.q.out
===================================================================
--- ql/src/test/results/clientpositive/union24.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/union24.q.out (working copy)
@@ -97,6 +97,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -175,6 +177,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src5 [null-subquery2:s-subquery2:src5]
@@ -481,6 +485,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src2 [null-subquery1-subquery1-subquery1:s-subquery1-subquery1-subquery1:src2]
/src3 [null-subquery1-subquery1-subquery2:s-subquery1-subquery1-subquery2:src3]
@@ -622,6 +628,8 @@
value expressions:
expr: count
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -741,6 +749,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src4 [null-subquery2:s-subquery2:a]
/src5 [null-subquery2:s-subquery2:b]
@@ -969,6 +979,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src2 [null-subquery1-subquery1:s-subquery1-subquery1:src2]
/src3 [null-subquery1-subquery2:s-subquery1-subquery2:src3]
@@ -1243,6 +1255,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -1295,6 +1309,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
@@ -1522,6 +1538,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src2 [null-subquery1-subquery1:s-subquery1-subquery1:src2]
/src3 [null-subquery1-subquery2:s-subquery1-subquery2:src3]
Index: ql/src/test/results/clientpositive/sample10.q.out
===================================================================
--- ql/src/test/results/clientpositive/sample10.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/sample10.q.out (working copy)
@@ -93,6 +93,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -325,6 +327,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpartbucket/ds=2008-04-08/hr=11/000000_0 [srcpartbucket]
/srcpartbucket/ds=2008-04-08/hr=12/000000_0 [srcpartbucket]
@@ -385,6 +389,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/louter_join_ppr.q.out
===================================================================
--- ql/src/test/results/clientpositive/louter_join_ppr.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/louter_join_ppr.q.out (working copy)
@@ -73,6 +73,8 @@
type: string
expr: value
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -255,6 +257,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [a]
/srcpart/ds=2008-04-08/hr=11 [b]
@@ -378,6 +382,8 @@
type: string
expr: value
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -659,6 +665,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [b]
/srcpart/ds=2008-04-08/hr=11 [a]
@@ -788,6 +796,8 @@
type: string
expr: ds
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -1064,6 +1074,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [a]
/srcpart/ds=2008-04-08/hr=11 [b]
@@ -1193,6 +1205,8 @@
type: string
expr: value
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -1375,6 +1389,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [b]
/srcpart/ds=2008-04-08/hr=11 [a]
Index: ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out
===================================================================
--- ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out (revision 0)
+++ ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out (revision 0)
@@ -0,0 +1,1601 @@
+PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, it also verifies that the
+-- number of reducers chosen will be a power of two
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, it also verifies that the
+-- number of reducers chosen will be a power of two
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table
+PREHOOK: query: -- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 3
+ numRows 0
+ rawDataSize 0
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 11996
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key, value]
+Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 3
+ numRows 0
+ rawDataSize 0
+ totalSize 5998
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 31120
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 16
+ numRows 0
+ rawDataSize 0
+ totalSize 31120
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 16
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test distribute by, should only be clustered by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test distribute by, should only be clustered by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test sort by, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test sort by, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 3
+ numRows 0
+ rawDataSize 0
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test sort by desc, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key DESC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test sort by desc, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key DESC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 3
+ numRows 0
+ rawDataSize 0
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:key, order:0)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test cluster by, should be bucketed and sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src CLUSTER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test cluster by, should be bucketed and sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src CLUSTER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test distribute by and sort by different keys, should be distributed by one key sorted by the other
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key SORT BY value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test distribute by and sort by different keys, should be distributed by one key sorted by the other
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key SORT BY value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 24
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 3582
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 24
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 1
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and
+-- sorted by the column the group by key ends up in
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and
+-- sorted by the column the group by key ends up in
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [value]
+Sort Columns: [Order(col:value, order:1)]
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 0
+ rawDataSize 0
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 4
+Bucket Columns: [key]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table@part=1
+POSTHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table@part=1
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE []
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT []
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_table
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 3
+ numRows 0
+ rawDataSize 0
+ totalSize 1791
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:key, order:1)]
+Storage Desc Params:
+ serialization.format 1
Index: ql/src/test/results/clientpositive/bucket_groupby.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucket_groupby.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/bucket_groupby.q.out (working copy)
@@ -60,7 +60,7 @@
Group By Operator
aggregations:
expr: count(1)
- bucketGroup: false
+ bucketGroup: true
keys:
expr: key
type: string
Index: ql/src/test/results/clientpositive/udf_explode.q.out
===================================================================
--- ql/src/test/results/clientpositive/udf_explode.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/udf_explode.q.out (working copy)
@@ -237,6 +237,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -291,6 +293,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
@@ -571,6 +575,8 @@
value expressions:
expr: _col2
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -629,6 +635,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/metadataonly1.q.out
===================================================================
--- ql/src/test/results/clientpositive/metadataonly1.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/metadataonly1.q.out (working copy)
@@ -1145,6 +1145,8 @@
value expressions:
expr: _col1
type: bigint
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -1294,6 +1296,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/test2/ds=1/hr=1 [test2]
/test2/ds=1/hr=2 [test2]
Index: ql/src/test/results/clientpositive/ctas.q.out
===================================================================
--- ql/src/test/results/clientpositive/ctas.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/ctas.q.out (working copy)
@@ -823,6 +823,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [src]
@@ -887,6 +889,8 @@
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
#### A masked pattern was here ####
Index: ql/src/test/results/clientpositive/outer_join_ppr.q.out
===================================================================
--- ql/src/test/results/clientpositive/outer_join_ppr.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/outer_join_ppr.q.out (working copy)
@@ -65,6 +65,8 @@
type: string
expr: ds
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -346,6 +348,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [a]
/srcpart/ds=2008-04-08/hr=11 [b]
@@ -465,6 +469,8 @@
type: string
expr: ds
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
@@ -741,6 +747,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [a]
/srcpart/ds=2008-04-08/hr=11 [b]
Index: ql/src/test/results/clientpositive/transform_ppr1.q.out
===================================================================
--- ql/src/test/results/clientpositive/transform_ppr1.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/transform_ppr1.q.out (working copy)
@@ -71,6 +71,8 @@
type: string
expr: _col2
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -289,6 +291,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [tmap:src]
/srcpart/ds=2008-04-08/hr=12 [tmap:src]
Index: ql/src/test/results/clientpositive/regexp_extract.q.out
===================================================================
--- ql/src/test/results/clientpositive/regexp_extract.q.out (revision 1407273)
+++ ql/src/test/results/clientpositive/regexp_extract.q.out (working copy)
@@ -72,6 +72,8 @@
type: string
expr: _col1
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -144,6 +146,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [tmap:src]
@@ -330,6 +334,8 @@
type: string
expr: _col1
type: string
+ Path -> Bucketed Columns:
+#### A masked pattern was here ####
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -402,6 +408,8 @@
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
+ Path -> Sorted Columns:
+#### A masked pattern was here ####
Truncated Path -> Alias:
/src [tmap:src]
Index: ql/src/test/results/compiler/plan/join2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join2.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/join2.q.xml (working copy)
@@ -974,6 +974,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input2.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input2.q.xml (working copy)
@@ -2681,6 +2681,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/join3.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join3.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/join3.q.xml (working copy)
@@ -1547,6 +1547,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input3.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input3.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input3.q.xml (working copy)
@@ -3347,6 +3347,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/join4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join4.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/join4.q.xml (working copy)
@@ -1515,6 +1515,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input4.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input4.q.xml (working copy)
@@ -1009,6 +1009,9 @@
+
+ true
+
true
@@ -1451,21 +1454,21 @@
- OP_4
+ EX_4
+
+ true
+
Index: ql/src/test/results/compiler/plan/input5.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input5.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input5.q.xml (working copy)
@@ -1039,6 +1039,9 @@
+
+ true
+
true
@@ -1489,21 +1492,21 @@
- OP_4
+ EX_4
+
+ true
+
Index: ql/src/test/results/compiler/plan/input_testxpath2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input_testxpath2.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input_testxpath2.q.xml (working copy)
@@ -875,6 +875,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input6.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input6.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input6.q.xml (working copy)
@@ -1063,6 +1063,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/join7.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join7.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/join7.q.xml (working copy)
@@ -2260,6 +2260,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input7.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input7.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input7.q.xml (working copy)
@@ -979,6 +979,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input8.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input8.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input8.q.xml (working copy)
@@ -675,6 +675,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/join8.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join8.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/join8.q.xml (working copy)
@@ -1597,6 +1597,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input_testsequencefile.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input_testsequencefile.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input_testsequencefile.q.xml (working copy)
@@ -987,6 +987,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/union.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/union.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/union.q.xml (working copy)
@@ -1674,6 +1674,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input9.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input9.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input9.q.xml (working copy)
@@ -1056,6 +1056,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/udf1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/udf1.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/udf1.q.xml (working copy)
@@ -1926,6 +1926,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/udf4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/udf4.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/udf4.q.xml (working copy)
@@ -1828,6 +1828,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input_testxpath.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input_testxpath.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input_testxpath.q.xml (working copy)
@@ -787,6 +787,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/udf6.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/udf6.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/udf6.q.xml (working copy)
@@ -606,6 +606,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input_part1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input_part1.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input_part1.q.xml (working copy)
@@ -786,6 +786,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby1.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/groupby1.q.xml (working copy)
@@ -997,6 +997,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/udf_case.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/udf_case.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/udf_case.q.xml (working copy)
@@ -700,6 +700,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby2.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/groupby2.q.xml (working copy)
@@ -1121,6 +1121,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/subq.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/subq.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/subq.q.xml (working copy)
@@ -1105,6 +1105,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby3.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby3.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/groupby3.q.xml (working copy)
@@ -1320,6 +1320,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby4.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/groupby4.q.xml (working copy)
@@ -770,6 +770,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby5.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby5.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/groupby5.q.xml (working copy)
@@ -863,6 +863,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/groupby6.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby6.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/groupby6.q.xml (working copy)
@@ -770,6 +770,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/case_sensitivity.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/case_sensitivity.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/case_sensitivity.q.xml (working copy)
@@ -1262,6 +1262,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/udf_when.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/udf_when.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/udf_when.q.xml (working copy)
@@ -780,6 +780,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/input20.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input20.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input20.q.xml (working copy)
@@ -878,6 +878,9 @@
+
+ true
+
@@ -1514,21 +1517,21 @@
- OP_4
+ EX_4
+
+ true
+
Index: ql/src/test/results/compiler/plan/sample2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample2.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/sample2.q.xml (working copy)
@@ -1189,6 +1189,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/sample3.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample3.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/sample3.q.xml (working copy)
@@ -1199,6 +1199,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/sample4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample4.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/sample4.q.xml (working copy)
@@ -1189,6 +1189,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/sample5.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample5.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/sample5.q.xml (working copy)
@@ -1186,6 +1186,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/sample6.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample6.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/sample6.q.xml (working copy)
@@ -1189,6 +1189,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/sample7.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/sample7.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/sample7.q.xml (working copy)
@@ -1271,6 +1271,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/cast1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/cast1.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/cast1.q.xml (working copy)
@@ -1100,6 +1100,9 @@
+
+ true
+
Index: ql/src/test/results/compiler/plan/join1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/join1.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/join1.q.xml (working copy)
@@ -1102,6 +1102,9 @@
+
+ true
+
true
Index: ql/src/test/results/compiler/plan/input1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/input1.q.xml (revision 1407273)
+++ ql/src/test/results/compiler/plan/input1.q.xml (working copy)
@@ -1077,6 +1077,9 @@
+
+ true
+
true
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q (revision 0)
@@ -0,0 +1,119 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+set hive.exec.reducers.bytes.per.reducer=2500;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata, it also verifies that the
+-- number of reducers chosen will be a power of two
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+-- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by where a key isn't selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key, value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join with two keys and only one selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key);
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value);
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test distribute by, should only be clustered by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test sort by, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key ASC;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test sort by desc, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key DESC;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test cluster by, should be bucketed and sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src CLUSTER BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test distribute by and sort by different keys, should be distributed by one key sorted by the other
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key SORT BY value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with where outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with lateral view outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with select on outside reordering the columns, should be bucketed and
+-- sorted by the column the group by key ends up in
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery followed by sort by, should only be sorted by the sort key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort.q (revision 0)
@@ -0,0 +1,117 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+-- Test group by, should be bucketed and sorted by group by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by where a key isn't selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM src GROUP BY key, value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join with two keys, should be bucketed and sorted by join keys
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join with two keys and only one selected, should not be bucketed or sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join on three tables on same key, should be bucketed and sorted by join key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key);
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test join on three tables on different keys, should be bucketed and sorted by latter key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value);
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test distribute by, should only be clustered by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test sort by, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key ASC;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test sort by desc, should be sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src SORT BY key DESC;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test cluster by, should be bucketed and sorted by key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src CLUSTER BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test distribute by and sort by different keys, should be distributed by one key sorted by the other
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM src DISTRIBUTE BY key SORT BY value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with where outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with lateral view outside, should still be bucketed and sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with another group by outside, should be bucketed and sorted by the
+-- key of the outer group by
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery with select on outside reordering the columns, should be bucketed and
+-- sorted by the column the group by key ends up in
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+-- Test group by in subquery followed by sort by, should only be sorted by the sort key
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_num_buckets.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_num_buckets.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_num_buckets.q (revision 0)
@@ -0,0 +1,17 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+set mapred.reduce.tasks=5;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where not every reducer writes a file.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+-- Tests a query with more reducers than rows, so the data should be sorted by key, but
+-- not bucketed
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT key, count(*) FROM (SELECT key FROM src LIMIT 3) a GROUP BY key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
Index: ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q
===================================================================
--- ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q (revision 1407273)
+++ ql/src/test/queries/clientpositive/reduce_deduplicate_exclude_gby.q (working copy)
@@ -1,5 +1,7 @@
create table t1( key_int1 int, key_int2 int, key_string1 string, key_string2 string);
+set hive.optimize.reducededuplication=false;
+
set hive.map.aggr=false;
select Q1.key_int1, sum(Q1.key_int1) from (select * from t1 cluster by key_int1) Q1 group by Q1.key_int1;
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_merge.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_merge.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_merge.q (revision 0)
@@ -0,0 +1,25 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+set hive.merge.mapredfiles=true;
+set mapred.reduce.tasks=2;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where where merging may or may not be used.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+-- Tests a reduce task followed by a merge. The output should be neither bucketed nor sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+set hive.merge.smallfiles.avgsize=2;
+set hive.exec.compress.output=false;
+
+-- Tests a reduce task followed by a move. The output should be bucketed and sorted.
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
Index: ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q
===================================================================
--- ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q (revision 0)
+++ ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q (revision 0)
@@ -0,0 +1,25 @@
+set hive.exec.infer.bucket.sort=true;
+set hive.exec.infer.bucket.sort.num.buckets.power.two=true;
+set hive.auto.convert.join=true;
+
+-- This tests inferring how data is bucketed/sorted from the operators in the reducer
+-- and populating that information in partitions' metadata. In particular, those cases
+-- where joins may be auto converted to map joins.
+
+CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING);
+
+-- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
+
+set hive.mapjoin.check.memory.rows=1;
+set hive.mapjoin.localtask.max.memory.usage = 0.0001;
+
+-- Tests a join which is not converted to a map join, the output should be bucketed and sorted
+
+INSERT OVERWRITE TABLE test_table PARTITION (part = '1')
+SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key;
+
+DESCRIBE FORMATTED test_table PARTITION (part = '1');
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1407273)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy)
@@ -110,6 +110,9 @@
parseCtx.getQB().getParseInfo().isInsertToTable();
HiveConf hconf = parseCtx.getConf();
+ // Mark this task as a final map reduce task (ignoring the optional merge task)
+ ((MapredWork)currTask.getWork()).setFinalMapRed(true);
+
// If this file sink desc has been processed due to a linked file sink desc,
// use that task
Map> fileSinkDescs = ctx.getLinkedFileDescTasks();
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/bucketingsorting/BucketingSortingCtx.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/bucketingsorting/BucketingSortingCtx.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/bucketingsorting/BucketingSortingCtx.java (revision 0)
@@ -0,0 +1,179 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.bucketingsorting;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * This class contains the bucketing sorting context that is passed
+ * while walking the operator tree in inferring bucket/sort columns. The context
+ * contains the mappings from operators and files to the columns their output is
+ * bucketed/sorted on.
+ */
+public class BucketingSortingCtx implements NodeProcessorCtx {
+
+ Map, List> bucketedColsByOp;
+ Map> bucketedColsByFile;
+
+ Map, List> sortedColsByOp;
+ Map> sortedColsByFile;
+
+ public BucketingSortingCtx() {
+ this.bucketedColsByOp = new HashMap, List>();
+ this.bucketedColsByFile = new HashMap>();
+ this.sortedColsByOp = new HashMap, List>();
+ this.sortedColsByFile = new HashMap>();
+ }
+
+
+ public List getBucketedCols(Operator extends OperatorDesc> op) {
+ return bucketedColsByOp.get(op);
+ }
+
+
+ public void setBucketedCols(Operator extends OperatorDesc> op, List bucketCols) {
+ this.bucketedColsByOp.put(op, bucketCols);
+ }
+
+ public Map> getBucketedColsByFile() {
+ return bucketedColsByFile;
+ }
+
+
+ public void setBucketedColsByFile(Map> bucketedColsByFile) {
+ this.bucketedColsByFile = bucketedColsByFile;
+ }
+
+
+ public List getSortedCols(Operator extends OperatorDesc> op) {
+ return sortedColsByOp.get(op);
+ }
+
+
+ public void setSortedCols(Operator extends OperatorDesc> op, List sortedCols) {
+ this.sortedColsByOp.put(op, sortedCols);
+ }
+
+ public Map> getSortedColsByFile() {
+ return sortedColsByFile;
+ }
+
+
+ public void setSortedColsByFile(Map> sortedColsByFile) {
+ this.sortedColsByFile = sortedColsByFile;
+ }
+
+ public static interface BucketSortCol {
+ public List getNames();
+
+ public List getIndexes();
+
+ public void addAlias(String name, Integer index);
+ }
+
+ public static final class BucketCol implements BucketSortCol, Serializable {
+ private static final long serialVersionUID = 1L;
+ // Equivalent aliases for the column
+ private final List names = new ArrayList();
+ // Indexes of those equivalent columns
+ private final List indexes = new ArrayList();
+
+ public BucketCol(String name, int index) {
+ addAlias(name, index);
+ }
+
+ public BucketCol() {
+
+ }
+
+ @Override
+ public List getNames() {
+ return names;
+ }
+
+ @Override
+ public List getIndexes() {
+ return indexes;
+ }
+
+ @Override
+ public void addAlias(String name, Integer index) {
+ names.add(name);
+ indexes.add(index);
+ }
+
+ @Override
+ public String toString() {
+ return "name: " + names.get(0) + " index: " + indexes.get(0);
+ }
+ }
+
+ public static final class SortCol implements BucketSortCol, Serializable {
+ private static final long serialVersionUID = 1L;
+ // Equivalent aliases for the column
+ private final List names = new ArrayList();
+ // Indexes of those equivalent columns
+ private final List indexes = new ArrayList();
+ // Sort order (+|-)
+ private final char sortOrder;
+
+ public SortCol(String name, int index, char sortOrder) {
+ this(sortOrder);
+ addAlias(name, index);
+ }
+
+ public SortCol(char sortOrder) {
+ this.sortOrder = sortOrder;
+ }
+
+
+ @Override
+ public List getNames() {
+ return names;
+ }
+
+ @Override
+ public List getIndexes() {
+ return indexes;
+ }
+
+ @Override
+ public void addAlias(String name, Integer index) {
+ names.add(name);
+ indexes.add(index);
+ }
+
+ public char getSortOrder() {
+ return sortOrder;
+ }
+
+ @Override
+ public String toString() {
+ return "name: " + names.get(0) + " index: " + indexes.get(0) + " order: " + sortOrder;
+ }
+ }
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/bucketingsorting/BucketingSortingOpProcFactory.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/bucketingsorting/BucketingSortingOpProcFactory.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/bucketingsorting/BucketingSortingOpProcFactory.java (revision 0)
@@ -0,0 +1,680 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.bucketingsorting;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.ForwardOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Utils;
+import org.apache.hadoop.hive.ql.optimizer.bucketingsorting.BucketingSortingCtx.BucketCol;
+import org.apache.hadoop.hive.ql.optimizer.bucketingsorting.BucketingSortingCtx.BucketSortCol;
+import org.apache.hadoop.hive.ql.optimizer.bucketingsorting.BucketingSortingCtx.SortCol;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * Operator factory for the rule processors for inferring bucketing/sorting columns.
+ */
+public class BucketingSortingOpProcFactory {
+
+ public static class DefaultInferrer implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ return null;
+ }
+
+ }
+
+ /**
+ * Infers bucket/sort columns for operators which simply forward rows from the parent
+ * E.g. Forward operators and SELECT *
+ * @param op
+ * @param bctx
+ * @param parent
+ * @throws SemanticException
+ */
+ private static void processForward(Operator extends OperatorDesc> op, BucketingSortingCtx bctx,
+ Operator extends OperatorDesc> parent) throws SemanticException {
+
+ List bucketCols = bctx.getBucketedCols(parent);
+ List sortCols = bctx.getSortedCols(parent);
+ ArrayList col_infos = op.getSchema().getSignature();
+
+ if (bucketCols == null && sortCols == null) {
+ bctx.setBucketedCols(op, null);
+ bctx.setSortedCols(op, null);
+ return;
+ }
+
+ List newBucketCols;
+ List newSortCols;
+
+ if (bucketCols == null) {
+ newBucketCols = null;
+ } else {
+ newBucketCols = getNewBucketCols(bucketCols, col_infos);
+ }
+
+ if (sortCols == null) {
+ newSortCols = null;
+ } else {
+ newSortCols = getNewSortCols(sortCols, col_infos);
+ }
+
+ bctx.setBucketedCols(op, newBucketCols);
+ bctx.setSortedCols(op, newSortCols);
+ }
+
+ /**
+ * Returns the parent operator in the walk path to the current operator.
+ *
+ * @param stack The stack encoding the path.
+ *
+ * @return Operator The parent operator in the current path.
+ */
+ @SuppressWarnings("unchecked")
+ protected static Operator extends OperatorDesc> getParent(Stack stack) {
+ return (Operator extends OperatorDesc>)Utils.getNthAncestor(stack, 1);
+ }
+
+ /**
+ * Processor for Join Operator.
+ */
+ public static class JoinInferrer extends DefaultInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ JoinOperator jop = (JoinOperator)nd;
+ ArrayList col_infos = jop.getSchema().getSignature();
+ Byte[] order = jop.getConf().getTagOrder();
+
+ BucketCol[] newBucketCols = null;
+ SortCol[] newSortCols = null;
+
+ bctx.setBucketedCols(jop, null);
+ bctx.setSortedCols(jop, null);
+
+ for (int i = 0; i < jop.getParentOperators().size(); i++) {
+ // If the parent is not a ReduceSinkOperator this is not a common join, and we don't
+ // know how to handle it, abort
+ if(!(jop.getParentOperators().get(i) instanceof ReduceSinkOperator)) {
+ return null;
+ }
+
+ ReduceSinkOperator parent = (ReduceSinkOperator)jop.getParentOperators().get(i);
+
+ String sortOrder = parent.getConf().getOrder();
+ List bucketCols = new ArrayList();
+ List sortCols = new ArrayList();
+ // Go through the Reduce keys and find the matching column(s) in the reduce values
+ for (int keyIndex = 0; keyIndex < parent.getConf().getKeyCols().size(); keyIndex++) {
+ for (int valueIndex = 0; valueIndex < parent.getConf().getValueCols().size();
+ valueIndex++) {
+
+ if (new ExprNodeDescEqualityWrapper(parent.getConf().getValueCols().get(valueIndex)).
+ equals(new ExprNodeDescEqualityWrapper(parent.getConf().getKeyCols().get(
+ keyIndex)))) {
+
+ String colName = parent.getSchema().getSignature().get(valueIndex).getInternalName();
+ bucketCols.add(new BucketCol(colName, keyIndex));
+ sortCols.add(new SortCol(colName, keyIndex, sortOrder.charAt(keyIndex)));
+ break;
+ }
+ }
+ }
+
+ if (bucketCols.isEmpty()) {
+ assert(sortCols.isEmpty());
+ continue;
+ }
+
+ if (newBucketCols == null) {
+ assert(newSortCols == null);
+ // The number of join keys is equal to the number of keys in every reducer, although
+ // not every key may map to a value in the reducer
+ newBucketCols = new BucketCol[parent.getConf().getKeyCols().size()];
+ newSortCols = new SortCol[parent.getConf().getKeyCols().size()];
+ } else {
+ assert(newSortCols != null);
+ }
+
+ byte tag = (byte)parent.getConf().getTag();
+ List exprs = jop.getConf().getExprs().get(tag);
+
+ int colInfosOffset = 0;
+ int orderValue = order[tag];
+ // Columns are output from the join from the different reduce sinks in the order of their
+ // offsets
+ for (byte orderIndex = 0; orderIndex < order.length; orderIndex++) {
+ if (order[orderIndex] < orderValue) {
+ colInfosOffset += jop.getConf().getExprs().get(orderIndex).size();
+ }
+ }
+
+ findBucketingSortingColumns(exprs, col_infos, bucketCols, sortCols, newBucketCols,
+ newSortCols, colInfosOffset);
+
+ }
+
+ // If the list of output bucket columns has been populated and every column has at least
+ // one representative in the output they can be inferred
+ if (newBucketCols != null) {
+ List newBucketColList = Arrays.asList(newBucketCols);
+ bctx.setBucketedCols(jop, !newBucketColList.contains(null) ?
+ newBucketColList : null);
+ }
+
+ // If the list of output sort columns has been populated and every column has at least
+ // one representative in the output they can be inferred
+ if (newSortCols != null) {
+ List newSortColList = Arrays.asList(newSortCols);
+ bctx.setSortedCols(jop, !newSortColList.contains(null) ?
+ newSortColList : null);
+ }
+
+ return null;
+ }
+
+ }
+
+ private static void findBucketingSortingColumns(List exprs,
+ List col_infos, List bucketCols, List sortCols,
+ BucketCol[] newBucketCols, SortCol[] newSortCols) {
+ findBucketingSortingColumns(exprs, col_infos, bucketCols, sortCols, newBucketCols,
+ newSortCols, 0);
+ }
+
+ /**
+ * For each expression, check if it represents a column known to be bucketed/sorted.
+ *
+ * @param exprs - list of expression
+ * @param col_infos - list of column infos
+ * @param bucketCols - list of bucketed columns from the input
+ * @param sortCols - list of sorted columns from the input
+ * @param newBucketCols - an array of bucket columns which should be the same length as
+ * bucketCols, updated such that the bucketed column(s) at index i in bucketCols became
+ * the bucketed column(s) at index i of newBucketCols in the output
+ * @param newSortCols - an array of sort columns which should be the same length as
+ * sortCols, updated such that the sorted column(s) at index i in sortCols became
+ * the sorted column(s) at index i of sortCols in the output
+ * @param colInfosOffset - the expressions are known to be represented by column infos
+ * beginning at this index
+ */
+ private static void findBucketingSortingColumns(List exprs,
+ List col_infos, List bucketCols, List sortCols,
+ BucketCol[] newBucketCols, SortCol[] newSortCols, int colInfosOffset) {
+ for(int cnt = 0; cnt < exprs.size(); cnt++) {
+ ExprNodeDesc expr = exprs.get(cnt);
+
+ // Only columns can be sorted/bucketed, in particular applying a function to a column
+ // voids any assumptions
+ if (!(expr instanceof ExprNodeColumnDesc)) {
+ continue;
+ }
+
+ ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc)expr;
+
+ int colInfosIndex = cnt + colInfosOffset;
+
+ if (newBucketCols != null) {
+ int bucketIndex = indexOfColName(bucketCols, columnExpr.getColumn());
+ if (bucketIndex != -1) {
+ if (newBucketCols[bucketIndex] == null) {
+ newBucketCols[bucketIndex] = new BucketCol();
+ }
+ newBucketCols[bucketIndex].addAlias(
+ col_infos.get(colInfosIndex).getInternalName(), colInfosIndex);
+ }
+ }
+
+ if (newSortCols != null) {
+ int sortIndex = indexOfColName(sortCols, columnExpr.getColumn());
+ if (sortIndex != -1) {
+ if (newSortCols[sortIndex] == null) {
+ newSortCols[sortIndex] = new SortCol(sortCols.get(sortIndex).getSortOrder());
+ }
+ newSortCols[sortIndex].addAlias(
+ col_infos.get(colInfosIndex).getInternalName(), colInfosIndex);
+ }
+ }
+ }
+ }
+
+ /**
+ * Processor for Select operator.
+ */
+ public static class SelectInferrer extends DefaultInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ SelectOperator sop = (SelectOperator)nd;
+
+ Operator extends OperatorDesc> parent = getParent(stack);
+
+ // if this is a selStarNoCompute then this select operator
+ // is treated like a default operator, so just call the super classes
+ // process method.
+ if (sop.getConf().isSelStarNoCompute()) {
+ processForward(sop, bctx, parent);
+ return null;
+ }
+
+ List bucketCols = bctx.getBucketedCols(parent);
+ List sortCols = bctx.getSortedCols(parent);
+ ArrayList col_infos = sop.getSchema().getSignature();
+
+ if (bucketCols == null && sortCols == null) {
+ return null;
+ }
+
+ BucketCol[] newBucketCols = null;
+ SortCol[] newSortCols = null;
+ if (bucketCols != null) {
+ newBucketCols = new BucketCol[bucketCols.size()];
+ }
+ if (sortCols != null) {
+ newSortCols = new SortCol[sortCols.size()];
+ }
+
+ findBucketingSortingColumns(sop.getConf().getColList(), col_infos, bucketCols, sortCols,
+ newBucketCols, newSortCols);
+
+ if (newBucketCols != null) {
+ List newBucketColList = Arrays.asList(newBucketCols);
+ bctx.setBucketedCols(sop, !newBucketColList.contains(null) ?
+ newBucketColList : null);
+ }
+
+ if (newSortCols != null) {
+ List newSortColList = Arrays.asList(newSortCols);
+ bctx.setSortedCols(sop, !newSortColList.contains(null) ?
+ newSortColList : null);
+ }
+
+ return null;
+ }
+
+ }
+
+ /**
+ * Find the BucketSortCol which has colName as one of its aliases. Returns the index of that
+ * BucketSortCol, or -1 if none exist
+ * @param bucketSortCols
+ * @param colName
+ * @return
+ */
+ private static int indexOfColName(List extends BucketSortCol> bucketSortCols, String colName) {
+ for (int index = 0; index < bucketSortCols.size(); index++) {
+ BucketSortCol bucketSortCol = bucketSortCols.get(index);
+ if (bucketSortCol.getNames().indexOf(colName) != -1) {
+ return index;
+ }
+ }
+
+ return -1;
+ }
+
+ /**
+ * This is used to construct new lists of bucketed columns where the order of the columns
+ * hasn't changed, only possibly the name
+ * @param bucketCols - input bucketed columns
+ * @param col_infos - List of column infos
+ * @return output bucketed columns
+ */
+ private static List getNewBucketCols(List bucketCols,
+ List col_infos) {
+
+ List newBucketCols = new ArrayList(bucketCols.size());
+ for (int i = 0; i < bucketCols.size(); i++) {
+ BucketCol bucketCol = new BucketCol();
+ for (Integer index : bucketCols.get(i).getIndexes()) {
+ // The only time this condition should be false is in the case of dynamic partitioning
+ if (index < col_infos.size()) {
+ bucketCol.addAlias(col_infos.get(index).getInternalName(), index);
+ } else {
+ return null;
+ }
+ }
+ newBucketCols.add(bucketCol);
+ }
+ return newBucketCols;
+ }
+
+ /**
+ * This is used to construct new lists of sorted columns where the order of the columns
+ * hasn't changed, only possibly the name
+ * @param bucketCols - input sorted columns
+ * @param col_infos - List of column infos
+ * @return output sorted columns
+ */
+ private static List getNewSortCols(List sortCols, List col_infos) {
+ List newSortCols = new ArrayList(sortCols.size());
+ for (int i = 0; i < sortCols.size(); i++) {
+ SortCol sortCol = new SortCol(sortCols.get(i).getSortOrder());
+ for (Integer index : sortCols.get(i).getIndexes()) {
+ // The only time this condition should be false is in the case of dynamic partitioning
+ if (index < col_infos.size()) {
+ sortCol.addAlias(col_infos.get(index).getInternalName(), index);
+ } else {
+ return null;
+ }
+ }
+ newSortCols.add(sortCol);
+ }
+ return newSortCols;
+ }
+
+ /**
+ * Processor for FileSink operator.
+ */
+ public static class FileSinkInferrer extends DefaultInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ FileSinkOperator fop = (FileSinkOperator)nd;
+
+ Operator extends OperatorDesc> parent = getParent(stack);
+ List bucketCols = bctx.getBucketedCols(parent);
+ List col_infos = fop.getSchema().getSignature();
+
+ // Set the inferred bucket columns for the file this FileSink produces
+ if (bucketCols != null) {
+ List newBucketCols = getNewBucketCols(bucketCols, col_infos);
+ bctx.getBucketedColsByFile().put(fop.getConf().getDirName(), newBucketCols);
+ }
+
+ List sortCols = bctx.getSortedCols(parent);
+
+ // Set the inferred sort columns for the file this FileSink produces
+ if (sortCols != null) {
+ List newSortCols = getNewSortCols(sortCols, col_infos);
+ bctx.getSortedColsByFile().put(fop.getConf().getDirName(), newSortCols);
+ }
+
+ return null;
+ }
+
+ }
+
+ /**
+ * Processor for Extract operator.
+ */
+ public static class ExtractInferrer extends DefaultInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ ExtractOperator exop = (ExtractOperator)nd;
+
+ // If this is not preceded by exactly one ReduceSinkOperator, this is an unknown situation,
+ // abort
+ if (exop.getParentOperators().size() != 1 ||
+ !(exop.getParentOperators().get(0) instanceof ReduceSinkOperator)) {
+ return null;
+ }
+
+ ReduceSinkOperator rop = (ReduceSinkOperator)exop.getParentOperators().get(0);
+
+ // Go through the set of partition columns, and find their representatives in the values
+ // These represent the bucketed columns
+ List bucketCols = new ArrayList();
+ for (int i = 0; i < rop.getConf().getPartitionCols().size(); i++) {
+ boolean valueColFound = false;
+ for (int j = 0; j < rop.getConf().getValueCols().size(); j++) {
+ if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals(
+ new ExprNodeDescEqualityWrapper(rop.getConf().getPartitionCols().get(i)))) {
+
+ bucketCols.add(new BucketCol(
+ rop.getSchema().getSignature().get(j).getInternalName(), j));
+ valueColFound = true;
+ break;
+ }
+ }
+
+ // If the partition columns can't all be found in the values then the data is not bucketed
+ if (!valueColFound) {
+ bucketCols.clear();
+ break;
+ }
+ }
+
+ // Go through the set of key columns, and find their representatives in the values
+ // These represent the sorted columns
+ String sortOrder = rop.getConf().getOrder();
+ List sortCols = new ArrayList();
+ for (int i = 0; i < rop.getConf().getKeyCols().size(); i++) {
+ boolean valueColFound = false;
+ for (int j = 0; j < rop.getConf().getValueCols().size(); j++) {
+ if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals(
+ new ExprNodeDescEqualityWrapper(rop.getConf().getKeyCols().get(i)))) {
+
+ sortCols.add(new SortCol(
+ rop.getSchema().getSignature().get(j).getInternalName(), j, sortOrder.charAt(i)));
+ valueColFound = true;
+ break;
+ }
+ }
+
+ // If the sorted columns can't all be found in the values then the data is only sorted on
+ // the columns seen up until now
+ if (!valueColFound) {
+ break;
+ }
+ }
+
+ if (!bucketCols.isEmpty()) {
+ List col_infos = exop.getSchema().getSignature();
+ List newBucketCols = getNewBucketCols(bucketCols, col_infos);
+ bctx.setBucketedCols(exop, newBucketCols);
+ }
+
+ if (!sortCols.isEmpty()) {
+ ArrayList col_infos = exop.getSchema().getSignature();
+ List newSortCols = getNewSortCols(sortCols, col_infos);
+ bctx.setSortedCols(exop, newSortCols);
+ }
+
+ return null;
+ }
+
+ }
+
+ /**
+ * Processor for GroupBy operator.
+ */
+ public static class GroupByInferrer extends DefaultInferrer implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
+ GroupByOperator gop = (GroupByOperator)nd;
+ ArrayList col_infos = gop.getSchema().getSignature();
+
+ ReduceSinkOperator rop;
+
+
+ if (gop.getParentOperators().size() != 1) {
+ return null;
+ }
+
+ if (gop.getParentOperators().get(0) instanceof ReduceSinkOperator) {
+ // This should cover most cases
+ rop = (ReduceSinkOperator)gop.getParentOperators().get(0);
+ } else if (gop.getParentOperators().get(0) instanceof ForwardOperator) {
+ // Cover the multi-group by single reducer optimization where a forward operator forwards
+ // rows from the reducer to multiple group by operators
+ ForwardOperator forwardOp = (ForwardOperator)gop.getParentOperators().get(0);
+ if (forwardOp.getParentOperators().size() != 1) {
+ return null;
+ }
+ rop = (ReduceSinkOperator)forwardOp.getParentOperators().get(0);
+ } else {
+ // Unknown situation or map aggregation, abort
+ return null;
+ }
+
+ String sortOrder = rop.getConf().getOrder();
+ List bucketCols = new ArrayList();
+ List sortCols = new ArrayList();
+ // Group by operators select the key cols, so no need to find them in the values
+ for (int i = 0; i < rop.getConf().getKeyCols().size(); i++) {
+ if (i < rop.getSchema().getSignature().size()) {
+ String colName = rop.getSchema().getSignature().get(i).getInternalName();
+ bucketCols.add(new BucketCol(colName, i));
+ sortCols.add(new SortCol(colName, i, sortOrder.charAt(i)));
+ } else {
+ return null;
+ }
+ }
+ bctx.setBucketedCols(rop, bucketCols);
+ bctx.setSortedCols(rop, sortCols);
+
+ if (bucketCols.isEmpty()) {
+ assert sortCols.isEmpty();
+ return null;
+ }
+
+ BucketCol[] newBucketCols = new BucketCol[bucketCols.size()];
+ SortCol[] newSortCols = new SortCol[sortCols.size()];
+
+ findBucketingSortingColumns(gop.getConf().getKeys(), col_infos, bucketCols, sortCols,
+ newBucketCols, newSortCols);
+
+ List newBucketColList = Arrays.asList(newBucketCols);
+ bctx.setBucketedCols(
+ gop, !newBucketColList.contains(null) ? newBucketColList : null);
+
+ List newSortColList = Arrays.asList(newSortCols);
+ bctx.setSortedCols(gop, !newSortColList.contains(null) ? newSortColList : null);
+
+ return null;
+ }
+
+ }
+
+ /**
+ * ReduceSink processor.
+ */
+ public static class ReduceSinkInferrer extends DefaultInferrer implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ return null;
+ }
+ }
+
+ /**
+ * Filter processor
+ */
+ public static class ForwardingInferrer extends DefaultInferrer implements NodeProcessor {
+ @SuppressWarnings("unchecked")
+ @Override
+ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ // If this is the first operation in the reducer, return null
+ if (stack.size() == 1) {
+ return null;
+ }
+
+ processForward((Operator extends OperatorDesc>)nd, (BucketingSortingCtx)procCtx,
+ getParent(stack));
+
+ return null;
+ }
+ }
+
+ public static NodeProcessor getDefaultProc() {
+ return new DefaultInferrer();
+ }
+
+ public static NodeProcessor getJoinProc() {
+ return new JoinInferrer();
+ }
+
+ public static NodeProcessor getSelProc() {
+ return new SelectInferrer();
+ }
+
+ public static NodeProcessor getGroupByProc() {
+ return new GroupByInferrer();
+ }
+
+ public static NodeProcessor getReduceSinkProc() {
+ return new ReduceSinkInferrer();
+ }
+
+ public static NodeProcessor getFileSinkProc() {
+ return new FileSinkInferrer();
+ }
+
+ public static NodeProcessor getExtractProc() {
+ return new ExtractInferrer();
+ }
+
+ public static NodeProcessor getFilterProc() {
+ return new ForwardingInferrer();
+ }
+
+ public static NodeProcessor getLimitProc() {
+ return new ForwardingInferrer();
+ }
+
+ public static NodeProcessor getLateralViewForwardProc() {
+ return new ForwardingInferrer();
+ }
+
+ public static NodeProcessor getLateralViewJoinProc() {
+ return new ForwardingInferrer();
+ }
+
+ public static NodeProcessor getForwardProc() {
+ return new ForwardingInferrer();
+ }
+}
Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (revision 1407273)
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (working copy)
@@ -31,6 +31,8 @@
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.optimizer.bucketingsorting.BucketingSortingCtx.BucketCol;
+import org.apache.hadoop.hive.ql.optimizer.bucketingsorting.BucketingSortingCtx.SortCol;
import org.apache.hadoop.hive.ql.parse.OpParseContext;
import org.apache.hadoop.hive.ql.parse.QBJoinTree;
import org.apache.hadoop.hive.ql.parse.SplitSample;
@@ -94,6 +96,15 @@
private transient boolean useBucketizedHiveInputFormat;
+ // if this is true, this means that this is the map reduce task which writes the final data,
+ // ignoring the optional merge task
+ private boolean finalMapRed = false;
+
+ private final Map> bucketedColsByFile =
+ new HashMap>();
+ private final Map> sortedColsByFile =
+ new HashMap>();
+
public MapredWork() {
aliasToPartnInfo = new LinkedHashMap();
}
@@ -298,6 +309,16 @@
this.numReduceTasks = numReduceTasks;
}
+ @Explain(displayName = "Path -> Bucketed Columns", normalExplain = false)
+ public Map> getBucketedColsByFile() {
+ return bucketedColsByFile;
+ }
+
+ @Explain(displayName = "Path -> Sorted Columns", normalExplain = false)
+ public Map> getSortedColsByFile() {
+ return sortedColsByFile;
+ }
+
@SuppressWarnings("nls")
public void addMapWork(String path, String alias, Operator> work,
PartitionDesc pd) {
@@ -544,4 +565,12 @@
public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat) {
this.useBucketizedHiveInputFormat = useBucketizedHiveInputFormat;
}
+
+ public boolean isFinalMapRed() {
+ return finalMapRed;
+ }
+
+ public void setFinalMapRed(boolean finalMapRed) {
+ this.finalMapRed = finalMapRed;
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1407273)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy)
@@ -57,12 +57,18 @@
import org.apache.hadoop.hive.ql.exec.ColumnStatsTask;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.ExecDriver;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.ForwardOperator;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -89,6 +95,7 @@
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
@@ -112,6 +119,8 @@
import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
+import org.apache.hadoop.hive.ql.optimizer.bucketingsorting.BucketingSortingCtx;
+import org.apache.hadoop.hive.ql.optimizer.bucketingsorting.BucketingSortingOpProcFactory;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
@@ -7759,6 +7768,10 @@
physicalContext, conf);
physicalOptimizer.optimize();
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT)) {
+ inferBucketingSorting(Utilities.getMRTasks(rootTasks));
+ }
+
// For each operator, generate the counters if needed
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEJOBPROGRESS)) {
for (Task extends Serializable> rootTask : rootTasks) {
@@ -7820,6 +7833,62 @@
}
/**
+ * For each map reduce task, if it has a reducer, infer whether or not the final output of the
+ * reducer is bucketed and/or sorted
+ *
+ * @param mapRedTasks
+ * @throws SemanticException
+ */
+ private void inferBucketingSorting(List mapRedTasks) throws SemanticException {
+ for (ExecDriver mapRedTask : mapRedTasks) {
+ Operator reducer = mapRedTask.getWork().getReducer();
+ if (reducer == null) {
+ continue;
+ }
+
+ BucketingSortingCtx bCtx = new BucketingSortingCtx();
+
+ Map opRules = new LinkedHashMap();
+ opRules.put(new RuleRegExp("R1", SelectOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getSelProc());
+ opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getGroupByProc());
+ opRules.put(new RuleRegExp("R3", JoinOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getJoinProc());
+ opRules.put(new RuleRegExp("R4", ReduceSinkOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getReduceSinkProc());
+ opRules.put(new RuleRegExp("R5", FileSinkOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getFileSinkProc());
+ opRules.put(new RuleRegExp("R6", ExtractOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getExtractProc());
+ opRules.put(new RuleRegExp("R7", FilterOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getFilterProc());
+ opRules.put(new RuleRegExp("R8", LimitOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getLimitProc());
+ opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getLateralViewForwardProc());
+ opRules.put(new RuleRegExp("R10", LateralViewJoinOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getLateralViewJoinProc());
+ opRules.put(new RuleRegExp("R11", ForwardOperator.getOperatorName() + "%"),
+ BucketingSortingOpProcFactory.getForwardProc());
+
+ // The dispatcher fires the processor corresponding to the closest matching rule and passes
+ // the context along
+ Dispatcher disp = new DefaultRuleDispatcher(BucketingSortingOpProcFactory.getDefaultProc(),
+ opRules, bCtx);
+ GraphWalker ogw = new PreOrderWalker(disp);
+
+ // Create a list of topop nodes
+ ArrayList topNodes = new ArrayList();
+ topNodes.add(reducer);
+ ogw.startWalking(topNodes, null);
+
+ mapRedTask.getWork().getBucketedColsByFile().putAll(bCtx.getBucketedColsByFile());
+ mapRedTask.getWork().getSortedColsByFile().putAll(bCtx.getSortedColsByFile());
+ }
+ }
+
+ /**
* Find all leaf tasks of the list of root tasks.
*/
private void getLeafTasks(List> rootTasks,
Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (revision 1407273)
+++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (working copy)
@@ -403,7 +403,7 @@
}
/**
- * Updates the existing table metadata with the new metadata.
+ * Updates the existing partition metadata with the new metadata.
*
* @param tblName
* name of the existing table
@@ -416,13 +416,30 @@
public void alterPartition(String tblName, Partition newPart)
throws InvalidOperationException, HiveException {
Table t = newTable(tblName);
+ alterPartition(t.getDbName(), t.getTableName(), newPart);
+ }
+
+ /**
+ * Updates the existing partition metadata with the new metadata.
+ *
+ * @param dbName
+ * name of the exiting table's database
+ * @param tblName
+ * name of the existing table
+ * @param newPart
+ * new partition
+ * @throws InvalidOperationException
+ * if the changes in metadata is not acceptable
+ * @throws TException
+ */
+ public void alterPartition(String dbName, String tblName, Partition newPart)
+ throws InvalidOperationException, HiveException {
try {
// Remove the DDL time so that it gets refreshed
if (newPart.getParameters() != null) {
newPart.getParameters().remove(hive_metastoreConstants.DDL_TIME);
}
- getMSC().alter_partition(t.getDbName(), t.getTableName(),
- newPart.getTPartition());
+ getMSC().alter_partition(dbName, tblName, newPart.getTPartition());
} catch (MetaException e) {
throw new HiveException("Unable to alter partition.", e);
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java (revision 1407273)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ConditionalTask.java (working copy)
@@ -92,6 +92,12 @@
//recursively remove this task from its children's parent task
tsk.removeFromChildrenTasks();
} else {
+ if (getParentTasks() != null) {
+ // This makes it so that we can go back up the tree later
+ for (Task extends Serializable> task : getParentTasks()) {
+ task.addDependentTask(tsk);
+ }
+ }
// resolved task
if (!driverContext.getRunnable().contains(tsk)) {
driverContext.addToRunnable(tsk);
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java (revision 1407273)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java (working copy)
@@ -49,4 +49,16 @@
public OperatorType getType() {
return OperatorType.EXTRACT;
}
+
+ /**
+ * @return the name of the operator
+ */
+ @Override
+ public String getName() {
+ return getOperatorName();
+ }
+
+ static public String getOperatorName() {
+ return "EX";
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (revision 1407273)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (working copy)
@@ -447,6 +447,30 @@
int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer);
reducers = Math.max(1, reducers);
reducers = Math.min(maxReducers, reducers);
+
+ // If this map reduce job writes final data to a table and bucketing is being inferred,
+ // and the user has configured Hive to do this, make sure the number of reducers is a
+ // power of two
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO) &&
+ work.isFinalMapRed() && !work.getBucketedColsByFile().isEmpty()) {
+
+ int reducersLog = (int)(Math.log(reducers) / Math.log(2)) + 1;
+ int reducersPowerTwo = (int)Math.pow(2, reducersLog);
+
+ // If the original number of reducers was a power of two, use that
+ if (reducersPowerTwo / 2 == reducers) {
+ return reducers;
+ } else if (reducersPowerTwo > maxReducers) {
+ // If the next power of two greater than the original number of reducers is greater
+ // than the max number of reducers, use the preceding power of two, which is strictly
+ // less than the original number of reducers and hence the max
+ reducers = reducersPowerTwo / 2;
+ } else {
+ // Otherwise use the smallest power of two greater than the original number of reducers
+ reducers = reducersPowerTwo;
+ }
+ }
+
return reducers;
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java (revision 1407273)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ForwardOperator.java (working copy)
@@ -40,4 +40,16 @@
public OperatorType getType() {
return OperatorType.FORWARD;
}
+
+ /**
+ * @return the name of the operator
+ */
+ @Override
+ public String getName() {
+ return getOperatorName();
+ }
+
+ static public String getOperatorName() {
+ return "FOR";
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (revision 1407273)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (working copy)
@@ -35,18 +35,25 @@
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
+import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.DriverContext;
+import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
-import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
+import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.bucketingsorting.BucketingSortingCtx.BucketCol;
+import org.apache.hadoop.hive.ql.optimizer.bucketingsorting.BucketingSortingCtx.SortCol;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
@@ -238,6 +245,41 @@
}
} else {
LOG.info("Partition is: " + tbd.getPartitionSpec().toString());
+
+ // Check if the bucketing and/or sorting columns were inferred
+ List bucketCols = null;
+ List sortCols = null;
+ int numBuckets = -1;
+ Task task = this;
+ String path = tbd.getSourceDir();
+ // Find the first ancestor of this MoveTask which is some form of map reduce task
+ // (Either standard, local, or a merge)
+ while (task.getParentTasks() != null && task.getParentTasks().size() == 1) {
+ task = (Task)task.getParentTasks().get(0);
+ // If it was a merge task or a local map reduce task, nothing can be inferred
+ if (task instanceof BlockMergeTask || task instanceof MapredLocalTask) {
+ break;
+ }
+
+ // If it's a standard map reduce task, check what, if anything, it inferred about
+ // the directory this move task is moving
+ if (task instanceof MapRedTask) {
+ MapredWork work = (MapredWork)task.getWork();
+ bucketCols = work.getBucketedColsByFile().get(path);
+ sortCols = work.getSortedColsByFile().get(path);
+ numBuckets = work.getNumReduceTasks();
+ break;
+ }
+
+ // If it's a move task, get the path the files were moved from, this is what any
+ // preceding map reduce task inferred information about, and moving does not invalidate
+ // those assumptions
+ if (task instanceof MoveTask) {
+ if (((MoveTask)task).getWork().getLoadFileWork() != null) {
+ path = ((MoveTask)task).getWork().getLoadFileWork().getSourceDir();
+ }
+ }
+ }
// deal with dynamic partitions
DynamicPartitionCtx dpCtx = tbd.getDPCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions
@@ -275,6 +317,10 @@
for (LinkedHashMap partSpec: dp) {
Partition partn = db.getPartition(table, partSpec, false);
+ if (bucketCols != null || sortCols != null) {
+ updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols);
+ }
+
WriteEntity enty = new WriteEntity(partn, true);
if (work.getOutputs() != null) {
work.getOutputs().add(enty);
@@ -304,6 +350,11 @@
db.loadPartition(new Path(tbd.getSourceDir()), tbd.getTable().getTableName(),
tbd.getPartitionSpec(), tbd.getReplace(), tbd.getHoldDDLTime(), tbd.getInheritTableSpecs());
Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
+
+ if (bucketCols != null || sortCols != null) {
+ updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols);
+ }
+
dc = new DataContainer(table.getTTable(), partn.getTPartition());
// add this partition to post-execution hook
if (work.getOutputs() != null) {
@@ -325,6 +376,77 @@
}
}
+ /**
+ * Alters the bucketing and/or sorting columns of the partition provided they meet some
+ * validation criteria, e.g. the number of buckets match the number of files, and the
+ * columns are not partition columns
+ * @param table
+ * @param partn
+ * @param bucketCols
+ * @param numBuckets
+ * @param sortCols
+ * @throws IOException
+ * @throws InvalidOperationException
+ * @throws HiveException
+ */
+ private void updatePartitionBucketSortColumns(Table table, Partition partn,
+ List bucketCols, int numBuckets, List sortCols)
+ throws IOException, InvalidOperationException, HiveException {
+
+ boolean updateBucketCols = false;
+ if (bucketCols != null) {
+ FileSystem fileSys = partn.getPartitionPath().getFileSystem(conf);
+ FileStatus[] fileStatus = Utilities.getFileStatusRecurse(
+ partn.getPartitionPath(), 1, fileSys);
+ // Verify the number of buckets equals the number of files
+ if (fileStatus.length == numBuckets) {
+ List newBucketCols = new ArrayList();
+ updateBucketCols = true;
+ for (BucketCol bucketCol : bucketCols) {
+ if (bucketCol.getIndexes().get(0) < partn.getCols().size()) {
+ newBucketCols.add(partn.getCols().get(
+ bucketCol.getIndexes().get(0)).getName());
+ } else {
+ // If the table is bucketed on a partition column, not valid for bucketing
+ updateBucketCols = false;
+ break;
+ }
+ }
+ if (updateBucketCols) {
+ partn.getBucketCols().clear();
+ partn.getBucketCols().addAll(newBucketCols);
+ partn.getTPartition().getSd().setNumBuckets(numBuckets);
+ }
+ }
+ }
+
+ boolean updateSortCols = false;
+ if (sortCols != null) {
+ List newSortCols = new ArrayList();
+ updateSortCols = true;
+ for (SortCol sortCol : sortCols) {
+ if (sortCol.getIndexes().get(0) < partn.getCols().size()) {
+ newSortCols.add(new Order(
+ partn.getCols().get(sortCol.getIndexes().get(0)).getName(),
+ sortCol.getSortOrder() == '+' ? BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC :
+ BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC));
+ } else {
+ // If the table is sorted on a partition column, not valid for sorting
+ updateSortCols = false;
+ break;
+ }
+ }
+ if (updateSortCols) {
+ partn.getSortCols().clear();
+ partn.getSortCols().addAll(newSortCols);
+ }
+ }
+
+ if (updateBucketCols || updateSortCols) {
+ db.alterPartition(table.getDbName(), table.getTableName(), partn);
+ }
+ }
+
/*
* Does the move task involve moving to a local file system
*/