diff --git a/hcatalog/core/.gitignore b/hcatalog/core/.gitignore index 0a7a9c5..3b2ad03 100644 --- a/hcatalog/core/.gitignore +++ b/hcatalog/core/.gitignore @@ -1 +1,2 @@ mapred +/bin/ diff --git a/hcatalog/webhcat/svr/.gitignore b/hcatalog/webhcat/svr/.gitignore index 916e17c..3285bd9 100644 --- a/hcatalog/webhcat/svr/.gitignore +++ b/hcatalog/webhcat/svr/.gitignore @@ -1 +1,2 @@ dependency-reduced-pom.xml +/bin/ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java index 0755943..3c5cac2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java @@ -21,7 +21,15 @@ import org.apache.calcite.plan.RelOptCostFactory; import org.apache.calcite.plan.RelOptUtil; -// TODO: This should inherit from VolcanoCost and should just override isLE method. +/*** + * NOTE:
+ * 1. Hivecost normalizes cpu and io in to time.
+ * 2. CPU, IO cost is added together to find the query latency.
+ * 3. If query latency is equal then row count is compared. + */ + +// TODO: This should inherit from VolcanoCost and should just override isLE +// method. public class HiveCost implements RelOptCost { // ~ Static fields/initializers --------------------------------------------- @@ -114,8 +122,10 @@ public boolean equals(RelOptCost other) { } public boolean isEqWithEpsilon(RelOptCost other) { - return (this == other) || (Math.abs((this.cpu + this.io) - - (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON); + return (this == other) + || ((Math.abs(this.io - other.getIo()) < RelOptUtil.EPSILON) + && (Math.abs(this.cpu - other.getCpu()) < RelOptUtil.EPSILON) && (Math + .abs(this.rowCount - other.getRows()) < RelOptUtil.EPSILON)); } public RelOptCost minus(RelOptCost other) { diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index 4a5d02d..66e0e9f 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -808,32 +808,32 @@ STAGE PLANS: alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -843,10 +843,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col2 + outputColumnNames: _col1 Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: int) + expressions: _col1 (type: int) outputColumnNames: _col0 Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out index 9e1c1e3..96f8b6a 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out @@ -555,16 +555,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: encryptedtable - Statistics: Num rows: 12 Data size: 2777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2767 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2767 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 2777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2767 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: string), _col1 (type: string) auto parallelism: false @@ -595,7 +595,7 @@ STAGE PLANS: serialization.ddl struct encryptedtable { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1392 + totalSize 1385 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -643,7 +643,7 @@ STAGE PLANS: serialization.ddl struct encryptedtable { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 1385 + totalSize 1382 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -675,14 +675,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 2777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2767 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A PARTIAL masked pattern was here #### data/warehouse/unencryptedtable/ds=today/.hive-staging NumFilesPerFileSink: 1 Static Partition Specification: ds=today/ - Statistics: Num rows: 12 Data size: 2777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2767 Basic stats: COMPLETE Column stats: NONE #### A PARTIAL masked pattern was here #### data/warehouse/unencryptedtable/ds=today/.hive-staging table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out index d9e8dd3..a05a356 100644 --- a/ql/src/test/results/clientpositive/join32.q.out +++ b/ql/src/test/results/clientpositive/join32.q.out @@ -109,71 +109,25 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_0:y Fetch Operator limit: -1 - Partition Description: - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_0:y TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -187,7 +141,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -196,31 +150,31 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -229,11 +183,11 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -402,7 +356,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-0 Move Operator @@ -451,8 +405,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/join32_lessSize.q.out b/ql/src/test/results/clientpositive/join32_lessSize.q.out index 9e3d06d..136c306 100644 --- a/ql/src/test/results/clientpositive/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/join32_lessSize.q.out @@ -130,7 +130,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -139,31 +139,31 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -175,8 +175,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col3 - columns.types string,string,string + columns _col0,_col3 + columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -190,7 +190,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -200,14 +200,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -220,23 +220,26 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -244,106 +247,59 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_0:y Fetch Operator limit: -1 - Partition Description: - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_0:y TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -362,11 +318,11 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -406,8 +362,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col3 - columns.types string,string,string + columns _col0,_col3 + columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -415,19 +371,16 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col3 - columns.types string,string,string + columns _col0,_col3 + columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -435,13 +388,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -451,21 +402,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: #### A masked pattern was here #### @@ -516,8 +470,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -703,28 +657,27 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-11 Stage-10 depends on stages: Stage-8 Stage-7 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 + Stage-6 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_2:x - Fetch Operator - limit: -1 - $hdt$_1:$hdt$_3:x + $hdt$_1:$hdt$_2:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -733,9 +686,12 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 0 - $hdt$_1:$hdt$_3:x + 1 _col0 (type: string) + Position of Big Table: 1 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -748,62 +704,32 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - alias: w - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - Position of Big Table: 0 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col4 - columns.types string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -811,7 +737,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -821,14 +747,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -841,21 +767,106 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [$hdt$_1:$hdt$_2:$hdt$_3:x] + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$hdt$_1:w + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:$hdt$_1:w + TableScan + alias: w + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 1 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col4 + Position of Big Table: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col4 + columns.types string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -865,14 +876,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -885,22 +896,22 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:w] +#### A masked pattern was here #### - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:w @@ -926,7 +937,7 @@ STAGE PLANS: 1 _col1 (type: string) Position of Big Table: 1 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -939,17 +950,17 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col1, _col3, _col6 Position of Big Table: 1 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -982,7 +993,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10002 + base file name: -mr-10001 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out index d9e8dd3..a05a356 100644 --- a/ql/src/test/results/clientpositive/join33.q.out +++ b/ql/src/test/results/clientpositive/join33.q.out @@ -109,71 +109,25 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_0:y Fetch Operator limit: -1 - Partition Description: - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_0:y TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -187,7 +141,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -196,31 +150,31 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -229,11 +183,11 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -402,7 +356,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-0 Move Operator @@ -451,8 +405,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/join_alt_syntax.q.out index 007e4c6..cc908c1 100644 --- a/ql/src/test/results/clientpositive/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/join_alt_syntax.q.out @@ -359,13 +359,13 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -379,36 +379,34 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -420,60 +418,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -488,14 +432,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -504,6 +455,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -521,56 +519,54 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -582,60 +578,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - TableScan - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -650,14 +592,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -666,6 +615,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out index ff92d9f..98008ad 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out @@ -141,13 +141,13 @@ from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -161,36 +161,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -202,44 +202,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - TableScan - Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col9 (type: int), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -256,29 +218,20 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + 1 _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -287,6 +240,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col9 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out index f608cfd..a1dd24e 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out @@ -145,13 +145,13 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -165,36 +165,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -206,44 +206,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - TableScan - Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col9 (type: int), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -260,29 +222,20 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + 1 _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -291,6 +244,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col9 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/spark/join32.q.out b/ql/src/test/results/clientpositive/spark/join32.q.out index 0f16678..a9d50b4 100644 --- a/ql/src/test/results/clientpositive/spark/join32.q.out +++ b/ql/src/test/results/clientpositive/spark/join32.q.out @@ -113,16 +113,16 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -136,12 +136,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -149,13 +146,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -165,23 +160,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Map 3 Map Operator Tree: TableScan @@ -190,7 +188,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -199,7 +197,7 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -260,24 +258,24 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -288,13 +286,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -330,9 +328,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -340,11 +341,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -354,26 +357,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-0 Move Operator @@ -422,8 +422,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index 54f47f9..dac9610 100644 --- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -121,16 +121,16 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -144,12 +144,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -157,13 +154,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -173,23 +168,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Map 3 Map Operator Tree: TableScan @@ -198,7 +196,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -207,7 +205,7 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -268,24 +266,24 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -296,13 +294,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -338,9 +336,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -348,11 +349,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -362,26 +365,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-0 Move Operator @@ -430,8 +430,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -613,34 +613,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: w - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 1 Local Work: Map Reduce Local Work @@ -649,7 +650,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -659,14 +660,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -679,39 +680,44 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [w] - Map 3 + /src1 [x] + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: w + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col1 (type: string) - Position of Big Table: 0 + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -719,7 +725,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -729,14 +735,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -749,20 +755,20 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] + /src [w] Map 4 Map Operator Tree: TableScan @@ -777,11 +783,22 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - Position of Big Table: 0 + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -858,68 +875,57 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col1 (type: string) - outputColumnNames: _col1 + outputColumnNames: _col1, _col4 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col4 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3, _col6 input vertices: - 1 Map 4 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1, _col3, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 + Select Operator + expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string #### A masked pattern was here #### - name default.dest_j1 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 + name default.dest_j1 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index 0f16678..a9d50b4 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -113,16 +113,16 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -136,12 +136,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -149,13 +146,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -165,23 +160,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Map 3 Map Operator Tree: TableScan @@ -190,7 +188,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -199,7 +197,7 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -260,24 +258,24 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -288,13 +286,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -330,9 +328,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -340,11 +341,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -354,26 +357,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-0 Move Operator @@ -422,8 +422,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index f0d96c4..dabdcb8 100644 --- a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -390,9 +390,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -401,71 +401,69 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: (p_partkey is not null and p_name is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -473,27 +471,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -503,22 +485,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -544,9 +542,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -555,36 +553,35 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -598,28 +595,27 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -627,27 +623,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -657,22 +637,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out index ff1626f..26e05ac 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -150,9 +150,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -161,72 +161,72 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: (p_partkey is not null and p_name is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -234,27 +234,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col9 (type: int), _col10 (type: string) + 1 _col9 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -264,22 +248,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + 1 _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out index 417ba4f..c821fe4 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out @@ -154,9 +154,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -165,72 +165,72 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: (p_partkey is not null and p_name is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -238,27 +238,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col9 (type: int), _col10 (type: string) + 1 _col9 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -268,22 +252,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + 1 _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out index f7026a8..f84524b 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -53,11 +53,11 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@ss POSTHOOK: Lineage: ss.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: INSERT OVERWRITE TABLE sr SELECT x.key,x.value,y.key,y.value,z.key,z.value FROM src1 x @@ -81,11 +81,11 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@sr POSTHOOK: Lineage: sr.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: sr.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: sr.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sr.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sr.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: sr.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: sr.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: sr.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sr.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sr.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: INSERT OVERWRITE TABLE cs SELECT x.key,x.value,y.key,y.value,z.key,z.value FROM src1 x @@ -195,7 +195,7 @@ Stage-0 Merge Join Operator [MERGEJOIN_29] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"1":"_col3 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col0","_col4","_col5"] + | outputColumnNames:["_col1","_col2","_col5"] | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [SIMPLE_EDGE] | Reduce Output Operator [RS_14] @@ -203,14 +203,15 @@ Stage-0 | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_2] - | outputColumnNames:["_col0"] + | value expressions:_col1 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_25] - | predicate:value is not null (type: boolean) + | predicate:key is not null (type: boolean) | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_0] - | alias:z + | alias:y | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Reducer 4 [SIMPLE_EDGE] Reduce Output Operator [RS_16] @@ -218,11 +219,11 @@ Stage-0 Map-reduce partition columns:_col3 (type: string) sort order:+ Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions:_col1 (type: string), _col2 (type: string) + value expressions:_col0 (type: string) Merge Join Operator [MERGEJOIN_28] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3"] + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col3"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE |<-Map 3 [SIMPLE_EDGE] | Reduce Output Operator [RS_8] @@ -230,28 +231,27 @@ Stage-0 | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) | Select Operator [SEL_4] - | outputColumnNames:["_col0","_col1"] + | outputColumnNames:["_col0"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_26] - | predicate:key is not null (type: boolean) + | predicate:value is not null (type: boolean) | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_3] - | alias:y + | TableScan [TS_2] + | alias:z | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Map 5 [SIMPLE_EDGE] Reduce Output Operator [RS_10] - key expressions:_col0 (type: string) - Map-reduce partition columns:_col0 (type: string) + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) sort order:+ Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - value expressions:_col1 (type: string) + value expressions:_col0 (type: string) Select Operator [SEL_6] outputColumnNames:["_col0","_col1"] Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Filter Operator [FIL_27] - predicate:(key is not null and value is not null) (type: boolean) + predicate:(value is not null and key is not null) (type: boolean) Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE TableScan [TS_5] alias:x @@ -315,21 +315,21 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 9 <- Reducer 16 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 + Reducer 5 File Output Operator [FS_71] compressed:false Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE @@ -339,236 +339,236 @@ Stage-0 Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_69] | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 5 [SIMPLE_EDGE] + | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] Reduce Output Operator [RS_68] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_66] | aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 4 [SIMPLE_EDGE] + | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] Reduce Output Operator [RS_65] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_64] aggregations:["count(_col3)","count(_col4)","count(_col5)"] keys:_col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_62] outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator [MERGEJOIN_111] + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_113] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col8 (type: string), _col10 (type: string)","0":"_col8 (type: string), _col10 (type: string)"} - | outputColumnNames:["_col2","_col3","_col8","_col9","_col20","_col21"] - | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 12 [SIMPLE_EDGE] - | Reduce Output Operator [RS_60] - | key expressions:_col8 (type: string), _col10 (type: string) - | Map-reduce partition columns:_col8 (type: string), _col10 (type: string) + | keys:{"1":"_col15 (type: string), _col17 (type: string)","0":"_col1 (type: string), _col3 (type: string)"} + | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"] + | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_58] + | key expressions:_col1 (type: string), _col3 (type: string) + | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) | sort order:++ - | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col6 (type: string), _col7 (type: string) - | Select Operator [SEL_46] - | outputColumnNames:["_col10","_col6","_col7","_col8"] - | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - | Merge Join Operator [MERGEJOIN_109] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col5 (type: string)","0":"_col1 (type: string)"} - | | outputColumnNames:["_col6","_col7","_col8","_col10"] - | | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - | |<-Map 11 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_42] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_19] - | | outputColumnNames:["_col1"] - | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_101] - | | predicate:((key = 'src1key') and value is not null) (type: boolean) - | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_17] - | | alias:src1 - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 14 [SIMPLE_EDGE] - | Reduce Output Operator [RS_44] - | key expressions:_col5 (type: string) - | Map-reduce partition columns:_col5 (type: string) - | sort order:+ - | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col4 (type: string), _col6 (type: string), _col8 (type: string) - | Merge Join Operator [MERGEJOIN_108] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col4","_col5","_col6","_col8"] - | | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - | |<-Map 13 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_36] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_22] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_102] - | | predicate:((value = 'd1value') and key is not null) (type: boolean) - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_20] - | | alias:d1 - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 16 [SIMPLE_EDGE] - | Reduce Output Operator [RS_38] - | key expressions:_col2 (type: string) - | Map-reduce partition columns:_col2 (type: string) - | sort order:+ - | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col3 (type: string), _col4 (type: string), _col6 (type: string) - | Merge Join Operator [MERGEJOIN_107] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col3 (type: string)","0":"_col1 (type: string)"} - | | outputColumnNames:["_col2","_col3","_col4","_col6"] - | | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - | |<-Map 15 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_30] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_25] - | | outputColumnNames:["_col1"] - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_103] - | | predicate:((key = 'srcpartkey') and value is not null) (type: boolean) - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_23] - | | alias:srcpart - | | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - | |<-Map 17 [SIMPLE_EDGE] - | Reduce Output Operator [RS_32] - | key expressions:_col3 (type: string) - | Map-reduce partition columns:_col3 (type: string) - | sort order:+ - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) - | Select Operator [SEL_28] - | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_104] - | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_26] - | alias:ss - | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_58] - key expressions:_col8 (type: string), _col10 (type: string) - Map-reduce partition columns:_col8 (type: string), _col10 (type: string) + | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col2 (type: string) + | Merge Join Operator [MERGEJOIN_107] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3"] + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_53] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_99] + | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:cs + | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE + | |<-Map 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_55] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_4] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_100] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_2] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_60] + key expressions:_col15 (type: string), _col17 (type: string) + Map-reduce partition columns:_col15 (type: string), _col17 (type: string) sort order:++ - Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - value expressions:_col2 (type: string), _col3 (type: string), _col9 (type: string) - Merge Join Operator [MERGEJOIN_110] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col3 (type: string), _col5 (type: string)","0":"_col1 (type: string), _col3 (type: string)"} - | outputColumnNames:["_col2","_col3","_col8","_col9","_col10"] - | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 2 [SIMPLE_EDGE] - | Reduce Output Operator [RS_53] - | key expressions:_col1 (type: string), _col3 (type: string) - | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) - | sort order:++ - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col2 (type: string) - | Merge Join Operator [MERGEJOIN_105] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 1 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_48] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1","_col2","_col3"] - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_97] - | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_0] - | | alias:cs - | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE - | |<-Map 7 [SIMPLE_EDGE] - | Reduce Output Operator [RS_50] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_4] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_98] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_2] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 9 [SIMPLE_EDGE] - Reduce Output Operator [RS_55] - key expressions:_col3 (type: string), _col5 (type: string) - Map-reduce partition columns:_col3 (type: string), _col5 (type: string) - sort order:++ - Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions:_col2 (type: string), _col4 (type: string) - Merge Join Operator [MERGEJOIN_106] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} - | outputColumnNames:["_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - |<-Map 10 [SIMPLE_EDGE] - | Reduce Output Operator [RS_14] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_10] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_100] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_8] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map 8 [SIMPLE_EDGE] - Reduce Output Operator [RS_12] - key expressions:_col0 (type: string) - Map-reduce partition columns:_col0 (type: string) - sort order:+ - Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - Select Operator [SEL_7] - outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_99] - predicate:((((((v1 = 'srv1') and k1 is not null) and v2 is not null) and v3 is not null) and k2 is not null) and k3 is not null) (type: boolean) - Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_5] - alias:sr - Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + value expressions:_col6 (type: string), _col7 (type: string), _col14 (type: string) + Select Operator [SEL_51] + outputColumnNames:["_col14","_col15","_col17","_col6","_col7"] + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_112] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col2 (type: string), _col4 (type: string)","0":"_col8 (type: string), _col10 (type: string)"} + | outputColumnNames:["_col6","_col7","_col14","_col15","_col17"] + | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 16 [SIMPLE_EDGE] + | Reduce Output Operator [RS_49] + | key expressions:_col2 (type: string), _col4 (type: string) + | Map-reduce partition columns:_col2 (type: string), _col4 (type: string) + | sort order:++ + | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col3 (type: string), _col5 (type: string) + | Merge Join Operator [MERGEJOIN_111] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} + | | outputColumnNames:["_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | |<-Map 15 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_36] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + | | Select Operator [SEL_31] + | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_105] + | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_29] + | | alias:sr + | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + | |<-Map 17 [SIMPLE_EDGE] + | Reduce Output Operator [RS_38] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_34] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_106] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_32] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_47] + key expressions:_col8 (type: string), _col10 (type: string) + Map-reduce partition columns:_col8 (type: string), _col10 (type: string) + sort order:++ + Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + value expressions:_col6 (type: string), _col7 (type: string) + Merge Join Operator [MERGEJOIN_110] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col5 (type: string)","0":"_col1 (type: string)"} + | outputColumnNames:["_col6","_col7","_col8","_col10"] + | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + |<-Map 7 [SIMPLE_EDGE] + | Reduce Output Operator [RS_42] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_7] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_101] + | predicate:((key = 'src1key') and value is not null) (type: boolean) + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_5] + | alias:src1 + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 11 [SIMPLE_EDGE] + Reduce Output Operator [RS_44] + key expressions:_col5 (type: string) + Map-reduce partition columns:_col5 (type: string) + sort order:+ + Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + value expressions:_col4 (type: string), _col6 (type: string), _col8 (type: string) + Merge Join Operator [MERGEJOIN_109] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col2 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col4","_col5","_col6","_col8"] + | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [SIMPLE_EDGE] + | Reduce Output Operator [RS_24] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_102] + | predicate:((value = 'd1value') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_8] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 13 [SIMPLE_EDGE] + Reduce Output Operator [RS_26] + key expressions:_col2 (type: string) + Map-reduce partition columns:_col2 (type: string) + sort order:+ + Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions:_col3 (type: string), _col4 (type: string), _col6 (type: string) + Merge Join Operator [MERGEJOIN_108] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"1":"_col3 (type: string)","0":"_col1 (type: string)"} + | outputColumnNames:["_col2","_col3","_col4","_col6"] + | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + |<-Map 12 [SIMPLE_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_13] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_103] + | predicate:((key = 'srcpartkey') and value is not null) (type: boolean) + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_11] + | alias:srcpart + | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + |<-Map 14 [SIMPLE_EDGE] + Reduce Output Operator [RS_20] + key expressions:_col3 (type: string) + Map-reduce partition columns:_col3 (type: string) + sort order:+ + Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) + Select Operator [SEL_16] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_104] + predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) + Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_14] + alias:ss + Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -1298,7 +1298,7 @@ Stage-0 Map Join Operator [MAPJOIN_29] | condition map:[{"":"Inner Join 0 to 1"}] | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col3 (type: string)"} - | outputColumnNames:["_col0","_col4","_col5"] + | outputColumnNames:["_col1","_col2","_col5"] | Statistics:Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE |<-Map 1 [BROADCAST_EDGE] | Reduce Output Operator [RS_14] @@ -1306,44 +1306,45 @@ Stage-0 | Map-reduce partition columns:_col0 (type: string) | sort order:+ | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_2] - | outputColumnNames:["_col0"] + | value expressions:_col1 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_25] - | predicate:value is not null (type: boolean) + | predicate:key is not null (type: boolean) | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_0] - | alias:z + | alias:y | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Map Join Operator [MAPJOIN_28] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 2":"_col0 (type: string)","Map 3":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3"] + | keys:{"Map 2":"_col0 (type: string)","Map 3":"_col1 (type: string)"} + | outputColumnNames:["_col0","_col3"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE |<-Map 3 [BROADCAST_EDGE] | Reduce Output Operator [RS_10] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) + | value expressions:_col0 (type: string) | Select Operator [SEL_6] | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE | Filter Operator [FIL_27] - | predicate:(key is not null and value is not null) (type: boolean) + | predicate:(value is not null and key is not null) (type: boolean) | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE | TableScan [TS_5] | alias:x | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE |<-Select Operator [SEL_4] - outputColumnNames:["_col0","_col1"] + outputColumnNames:["_col0"] Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator [FIL_26] - predicate:key is not null (type: boolean) + predicate:value is not null (type: boolean) Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_3] - alias:y + TableScan [TS_2] + alias:z Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: EXPLAIN select @@ -1404,17 +1405,17 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) -Map 4 <- Map 3 (BROADCAST_EDGE) -Map 7 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE) +Map 10 <- Map 9 (BROADCAST_EDGE) +Map 5 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 9 + Reducer 7 File Output Operator [FS_71] compressed:false Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE @@ -1424,190 +1425,190 @@ Stage-0 Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_69] | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 8 [SIMPLE_EDGE] + | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 6 [SIMPLE_EDGE] Reduce Output Operator [RS_68] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_66] | aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 365 Data size: 3882 Basic stats: COMPLETE Column stats: NONE - |<-Map 7 [SIMPLE_EDGE] + | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + |<-Map 5 [SIMPLE_EDGE] Reduce Output Operator [RS_65] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) Group By Operator [GBY_64] aggregations:["count(_col3)","count(_col4)","count(_col5)"] keys:_col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE Select Operator [SEL_62] outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - Map Join Operator [MAPJOIN_111] + Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_113] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 2":"_col8 (type: string), _col10 (type: string)","Map 7":"_col8 (type: string), _col10 (type: string)"} - | outputColumnNames:["_col2","_col3","_col8","_col9","_col20","_col21"] - | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + | keys:{"Map 2":"_col1 (type: string), _col3 (type: string)","Map 5":"_col15 (type: string), _col17 (type: string)"} + | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"] + | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE |<-Map 2 [BROADCAST_EDGE] | Reduce Output Operator [RS_58] - | key expressions:_col8 (type: string), _col10 (type: string) - | Map-reduce partition columns:_col8 (type: string), _col10 (type: string) + | key expressions:_col1 (type: string), _col3 (type: string) + | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) | sort order:++ - | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col2 (type: string), _col3 (type: string), _col9 (type: string) - | Map Join Operator [MAPJOIN_110] + | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col2 (type: string) + | Map Join Operator [MAPJOIN_107] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 2":"_col1 (type: string), _col3 (type: string)","Map 4":"_col3 (type: string), _col5 (type: string)"} - | | outputColumnNames:["_col2","_col3","_col8","_col9","_col10"] - | | Statistics:Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - | |<-Map 4 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_55] - | | key expressions:_col3 (type: string), _col5 (type: string) - | | Map-reduce partition columns:_col3 (type: string), _col5 (type: string) - | | sort order:++ - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col2 (type: string), _col4 (type: string) - | | Map Join Operator [MAPJOIN_106] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Map 3":"_col0 (type: string)","Map 4":"_col0 (type: string)"} - | | | outputColumnNames:["_col2","_col3","_col4","_col5"] - | | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 3 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_12] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - | | | Select Operator [SEL_7] - | | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] - | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_99] - | | | predicate:((((((v1 = 'srv1') and k1 is not null) and v2 is not null) and v3 is not null) and k2 is not null) and k3 is not null) (type: boolean) - | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_5] - | | | alias:sr - | | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - | | |<-Select Operator [SEL_10] - | | outputColumnNames:["_col0"] - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_100] - | | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_8] - | | alias:d1 - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Map Join Operator [MAPJOIN_105] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 1 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_48] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1","_col2","_col3"] - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_97] - | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_0] - | | alias:cs - | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_4] - | outputColumnNames:["_col0"] + | | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3"] + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | |<-Map 1 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_53] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_99] + | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:cs + | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_4] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_100] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_98] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_2] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_46] - outputColumnNames:["_col10","_col6","_col7","_col8"] - Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - Map Join Operator [MAPJOIN_109] + | TableScan [TS_2] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_51] + outputColumnNames:["_col14","_col15","_col17","_col6","_col7"] + Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_112] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 5":"_col1 (type: string)","Map 7":"_col5 (type: string)"} - | outputColumnNames:["_col6","_col7","_col8","_col10"] - | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - |<-Map 5 [BROADCAST_EDGE] - | Reduce Output Operator [RS_42] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) - | sort order:+ - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_19] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_101] - | predicate:((key = 'src1key') and value is not null) (type: boolean) - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_17] - | alias:src1 - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_108] + | keys:{"Map 10":"_col2 (type: string), _col4 (type: string)","Map 5":"_col8 (type: string), _col10 (type: string)"} + | outputColumnNames:["_col6","_col7","_col14","_col15","_col17"] + | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [BROADCAST_EDGE] + | Reduce Output Operator [RS_49] + | key expressions:_col2 (type: string), _col4 (type: string) + | Map-reduce partition columns:_col2 (type: string), _col4 (type: string) + | sort order:++ + | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col3 (type: string), _col5 (type: string) + | Map Join Operator [MAPJOIN_111] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 10":"_col0 (type: string)","Map 9":"_col0 (type: string)"} + | | outputColumnNames:["_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + | |<-Map 9 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_36] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + | | Select Operator [SEL_31] + | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_105] + | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_29] + | | alias:sr + | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_34] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_106] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_32] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_110] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 7":"_col2 (type: string)","Map 6":"_col0 (type: string)"} - | outputColumnNames:["_col4","_col5","_col6","_col8"] - | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - |<-Map 6 [BROADCAST_EDGE] - | Reduce Output Operator [RS_36] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | keys:{"Map 3":"_col1 (type: string)","Map 5":"_col5 (type: string)"} + | outputColumnNames:["_col6","_col7","_col8","_col10"] + | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE + |<-Map 3 [BROADCAST_EDGE] + | Reduce Output Operator [RS_42] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_22] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_102] - | predicate:((value = 'd1value') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_20] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_107] + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_7] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_101] + | predicate:((key = 'src1key') and value is not null) (type: boolean) + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_5] + | alias:src1 + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_109] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 10":"_col3 (type: string)","Map 7":"_col1 (type: string)"} - | outputColumnNames:["_col2","_col3","_col4","_col6"] - | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - |<-Map 10 [BROADCAST_EDGE] - | Reduce Output Operator [RS_32] - | key expressions:_col3 (type: string) - | Map-reduce partition columns:_col3 (type: string) + | keys:{"Map 5":"_col2 (type: string)","Map 4":"_col0 (type: string)"} + | outputColumnNames:["_col4","_col5","_col6","_col8"] + | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + |<-Map 4 [BROADCAST_EDGE] + | Reduce Output Operator [RS_24] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) - | Select Operator [SEL_28] - | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_104] - | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_26] - | alias:ss - | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_25] - outputColumnNames:["_col1"] - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_103] - predicate:((key = 'srcpartkey') and value is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_102] + | predicate:((value = 'd1value') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_8] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_108] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 5":"_col1 (type: string)","Map 8":"_col3 (type: string)"} + | outputColumnNames:["_col2","_col3","_col4","_col6"] + | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + |<-Map 8 [BROADCAST_EDGE] + | Reduce Output Operator [RS_20] + | key expressions:_col3 (type: string) + | Map-reduce partition columns:_col3 (type: string) + | sort order:+ + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) + | Select Operator [SEL_16] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_104] + | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_14] + | alias:ss + | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_13] + outputColumnNames:["_col1"] Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_23] - alias:srcpart - Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_103] + predicate:((key = 'srcpartkey') and value is not null) (type: boolean) + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_11] + alias:srcpart + Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -2785,112 +2786,32 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS), Union 5 (SIMPLE_EDGE) Map 12 <- Union 9 (CONTAINS) Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS) Map 13 <- Union 9 (CONTAINS) +Map 21 <- Map 20 (BROADCAST_EDGE) Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) +Map 19 <- Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 16 <- Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 18 <- Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) +Map 17 <- Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS), Union 5 (SIMPLE_EDGE) Map 4 <- Map 7 (BROADCAST_EDGE), Union 5 (CONTAINS) -Map 19 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) Map 6 <- Map 7 (BROADCAST_EDGE), Union 5 (CONTAINS) -Map 16 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) Map 8 <- Union 9 (CONTAINS) -Map 18 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 17 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 3 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 Union 3 - |<-Reducer 2 [CONTAINS] - | File Output Operator [FS_76] - | compressed:false - | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_21] - | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_120] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col3"] - | |<-Map 1 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_17] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_104] - | | predicate:key is not null (type: boolean) - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_0] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Union 5 [SIMPLE_EDGE] - | |<-Map 4 [CONTAINS] - | | Reduce Output Operator [RS_19] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Map Join Operator [MAPJOIN_119] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Map 4":"_col0 (type: string)","Map 7":"_col1 (type: string)"} - | | | outputColumnNames:["_col1"] - | | |<-Map 7 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_13] - | | | key expressions:_col1 (type: string) - | | | Map-reduce partition columns:_col1 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col0 (type: string) - | | | Select Operator [SEL_9] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_107] - | | | predicate:(value is not null and key is not null) (type: boolean) - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_8] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | | Reduce Output Operator [RS_125] - | | | key expressions:_col1 (type: string) - | | | Map-reduce partition columns:_col1 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col0 (type: string) - | | | Please refer to the previous Select Operator [SEL_9] - | | |<-Select Operator [SEL_3] - | | outputColumnNames:["_col0"] - | | Filter Operator [FIL_105] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_2] - | | alias:x - | |<-Map 6 [CONTAINS] - | Reduce Output Operator [RS_19] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) - | sort order:+ - | Map Join Operator [MAPJOIN_119] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 7":"_col1 (type: string)","Map 6":"_col0 (type: string)"} - | | outputColumnNames:["_col1"] - | |<- Please refer to the previous Map 7 [BROADCAST_EDGE] - | |<-Select Operator [SEL_5] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_106] - | predicate:value is not null (type: boolean) - | TableScan [TS_4] - | alias:y |<-Reducer 11 [CONTAINS] - | File Output Operator [FS_76] + | File Output Operator [FS_77] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} | Select Operator [SEL_45] | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_122] + | Merge Join Operator [MERGEJOIN_120] | | condition map:[{"":"Inner Join 0 to 1"}] | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} | | outputColumnNames:["_col1","_col4"] @@ -2904,7 +2825,7 @@ Stage-0 | | Select Operator [SEL_34] | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_112] + | | Filter Operator [FIL_110] | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | | TableScan [TS_33] @@ -2916,7 +2837,7 @@ Stage-0 | Map-reduce partition columns:_col1 (type: string) | sort order:+ | Statistics:Num rows: 564 Data size: 5952 Basic stats: COMPLETE Column stats: NONE - | Merge Join Operator [MERGEJOIN_121] + | Merge Join Operator [MERGEJOIN_119] | | condition map:[{"":"Inner Join 0 to 1"}] | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} | | outputColumnNames:["_col1"] @@ -2931,7 +2852,7 @@ Stage-0 | | Select Operator [SEL_32] | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_111] + | | Filter Operator [FIL_109] | | predicate:(value is not null and key is not null) (type: boolean) | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE | | TableScan [TS_31] @@ -2945,7 +2866,7 @@ Stage-0 | | sort order:+ | | Select Operator [SEL_25] | | outputColumnNames:["_col0"] - | | Filter Operator [FIL_109] + | | Filter Operator [FIL_107] | | predicate:value is not null (type: boolean) | | TableScan [TS_24] | | alias:y @@ -2956,7 +2877,7 @@ Stage-0 | | sort order:+ | | Select Operator [SEL_29] | | outputColumnNames:["_col0"] - | | Filter Operator [FIL_110] + | | Filter Operator [FIL_108] | | predicate:value is not null (type: boolean) | | TableScan [TS_28] | | alias:y @@ -2967,170 +2888,216 @@ Stage-0 | sort order:+ | Select Operator [SEL_23] | outputColumnNames:["_col0"] - | Filter Operator [FIL_108] + | Filter Operator [FIL_106] | predicate:value is not null (type: boolean) | TableScan [TS_22] | alias:x |<-Map 19 [CONTAINS] - | File Output Operator [FS_76] + | File Output Operator [FS_77] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_74] + | Select Operator [SEL_75] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_124] + | Map Join Operator [MAPJOIN_122] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col0 (type: string)","Map 19":"_col1 (type: string)"} + | | keys:{"Map 21":"_col1 (type: string)","Map 19":"_col0 (type: string)"} | | outputColumnNames:["_col1","_col4"] | |<-Map 21 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_72] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | Reduce Output Operator [RS_73] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Select Operator [SEL_63] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_118] - | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: string) + | | Map Join Operator [MAPJOIN_121] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 21":"_col0 (type: string)","Map 20":"_col0 (type: string)"} + | | | outputColumnNames:["_col0","_col1","_col3"] + | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 20 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_65] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: string) + | | | Select Operator [SEL_61] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_115] + | | | predicate:(key is not null and value is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_60] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_63] + | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_62] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | Reduce Output Operator [RS_131] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | Filter Operator [FIL_116] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_62] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | Reduce Output Operator [RS_126] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Please refer to the previous Select Operator [SEL_63] - | | Reduce Output Operator [RS_132] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: string) + | | Please refer to the previous Map Join Operator [MAPJOIN_121] + | | Reduce Output Operator [RS_127] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Please refer to the previous Select Operator [SEL_63] - | | Reduce Output Operator [RS_133] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: string) + | | Please refer to the previous Map Join Operator [MAPJOIN_121] + | | Reduce Output Operator [RS_128] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Please refer to the previous Select Operator [SEL_63] - | |<-Map Join Operator [MAPJOIN_123] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 20":"_col1 (type: string)","Map 19":"_col0 (type: string)"} - | | outputColumnNames:["_col1"] - | |<-Map 20 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_67] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string) - | | Select Operator [SEL_61] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_117] - | | predicate:(value is not null and key is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_60] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | Reduce Output Operator [RS_127] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string) - | | Please refer to the previous Select Operator [SEL_61] - | | Reduce Output Operator [RS_128] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string) - | | Please refer to the previous Select Operator [SEL_61] - | | Reduce Output Operator [RS_129] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string) - | | Please refer to the previous Select Operator [SEL_61] - | |<-Select Operator [SEL_58] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_116] - | predicate:value is not null (type: boolean) - | TableScan [TS_57] - | alias:y + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: string) + | | Please refer to the previous Map Join Operator [MAPJOIN_121] + | |<-Select Operator [SEL_58] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_114] + | predicate:value is not null (type: boolean) + | TableScan [TS_57] + | alias:y |<-Map 16 [CONTAINS] - | File Output Operator [FS_76] + | File Output Operator [FS_77] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_74] + | Select Operator [SEL_75] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_124] + | Map Join Operator [MAPJOIN_122] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col0 (type: string)","Map 16":"_col1 (type: string)"} + | | keys:{"Map 21":"_col1 (type: string)","Map 16":"_col0 (type: string)"} | | outputColumnNames:["_col1","_col4"] | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] - | |<-Map Join Operator [MAPJOIN_123] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 20":"_col1 (type: string)","Map 16":"_col0 (type: string)"} - | | outputColumnNames:["_col1"] - | |<- Please refer to the previous Map 20 [BROADCAST_EDGE] - | |<-Select Operator [SEL_49] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_113] - | predicate:value is not null (type: boolean) - | TableScan [TS_48] - | alias:x + | |<-Select Operator [SEL_49] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_111] + | predicate:value is not null (type: boolean) + | TableScan [TS_48] + | alias:x |<-Map 18 [CONTAINS] - | File Output Operator [FS_76] + | File Output Operator [FS_77] | compressed:false | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - | Select Operator [SEL_74] + | Select Operator [SEL_75] | outputColumnNames:["_col0","_col1"] - | Map Join Operator [MAPJOIN_124] + | Map Join Operator [MAPJOIN_122] | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col0 (type: string)","Map 18":"_col1 (type: string)"} + | | keys:{"Map 21":"_col1 (type: string)","Map 18":"_col0 (type: string)"} | | outputColumnNames:["_col1","_col4"] | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] - | |<-Map Join Operator [MAPJOIN_123] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 20":"_col1 (type: string)","Map 18":"_col0 (type: string)"} - | | outputColumnNames:["_col1"] - | |<- Please refer to the previous Map 20 [BROADCAST_EDGE] - | |<-Select Operator [SEL_55] - | outputColumnNames:["_col0"] - | Filter Operator [FIL_115] - | predicate:value is not null (type: boolean) - | TableScan [TS_54] - | alias:y + | |<-Select Operator [SEL_55] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_113] + | predicate:value is not null (type: boolean) + | TableScan [TS_54] + | alias:y |<-Map 17 [CONTAINS] - File Output Operator [FS_76] + | File Output Operator [FS_77] + | compressed:false + | table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + | Select Operator [SEL_75] + | outputColumnNames:["_col0","_col1"] + | Map Join Operator [MAPJOIN_122] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 21":"_col1 (type: string)","Map 17":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col4"] + | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] + | |<-Select Operator [SEL_51] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_112] + | predicate:value is not null (type: boolean) + | TableScan [TS_50] + | alias:y + |<-Reducer 2 [CONTAINS] + File Output Operator [FS_77] compressed:false table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Select Operator [SEL_74] + Select Operator [SEL_21] outputColumnNames:["_col0","_col1"] - Map Join Operator [MAPJOIN_124] + Merge Join Operator [MERGEJOIN_118] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 21":"_col0 (type: string)","Map 17":"_col1 (type: string)"} - | outputColumnNames:["_col1","_col4"] - |<- Please refer to the previous Map 21 [BROADCAST_EDGE] - |<-Map Join Operator [MAPJOIN_123] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 20":"_col1 (type: string)","Map 17":"_col0 (type: string)"} - | outputColumnNames:["_col1"] - |<- Please refer to the previous Map 20 [BROADCAST_EDGE] - |<-Select Operator [SEL_51] - outputColumnNames:["_col0"] - Filter Operator [FIL_114] - predicate:value is not null (type: boolean) - TableScan [TS_50] - alias:y + | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col3"] + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_17] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_102] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Union 5 [SIMPLE_EDGE] + |<-Map 4 [CONTAINS] + | Reduce Output Operator [RS_19] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Map Join Operator [MAPJOIN_117] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 4":"_col0 (type: string)","Map 7":"_col1 (type: string)"} + | | outputColumnNames:["_col1"] + | |<-Map 7 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_13] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_9] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_105] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_8] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | Reduce Output Operator [RS_123] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Please refer to the previous Select Operator [SEL_9] + | |<-Select Operator [SEL_3] + | outputColumnNames:["_col0"] + | Filter Operator [FIL_103] + | predicate:value is not null (type: boolean) + | TableScan [TS_2] + | alias:x + |<-Map 6 [CONTAINS] + Reduce Output Operator [RS_19] + key expressions:_col1 (type: string) + Map-reduce partition columns:_col1 (type: string) + sort order:+ + Map Join Operator [MAPJOIN_117] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Map 7":"_col1 (type: string)","Map 6":"_col0 (type: string)"} + | outputColumnNames:["_col1"] + |<- Please refer to the previous Map 7 [BROADCAST_EDGE] + |<-Select Operator [SEL_5] + outputColumnNames:["_col0"] + Filter Operator [FIL_104] + predicate:value is not null (type: boolean) + TableScan [TS_4] + alias:y PREHOOK: query: explain SELECT x.key, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -3167,6 +3134,7 @@ Map 23 <- Union 24 (CONTAINS) Map 32 <- Union 28 (CONTAINS) Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) Map 31 <- Union 26 (CONTAINS) +Map 34 <- Map 33 (BROADCAST_EDGE) Map 20 <- Union 15 (CONTAINS) Map 10 <- Union 8 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) @@ -3176,7 +3144,7 @@ Reducer 9 <- Map 11 (BROADCAST_EDGE), Union 8 (SIMPLE_EDGE) Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 29 <- Map 33 (BROADCAST_EDGE), Map 34 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 29 <- Map 34 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 16 <- Union 15 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 3 (CONTAINS) Map 7 <- Union 8 (CONTAINS) @@ -3186,21 +3154,21 @@ Stage-0 limit:-1 Stage-1 Reducer 6 - File Output Operator [FS_120] + File Output Operator [FS_121] compressed:false - Statistics:Num rows: 270 Data size: 2865 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 258 Data size: 2737 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Group By Operator [GBY_118] + Group By Operator [GBY_119] | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 270 Data size: 2865 Basic stats: COMPLETE Column stats: NONE + | Statistics:Num rows: 258 Data size: 2737 Basic stats: COMPLETE Column stats: NONE |<-Union 5 [SIMPLE_EDGE] |<-Reducer 4 [CONTAINS] - | Reduce Output Operator [RS_117] + | Reduce Output Operator [RS_118] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) | sort order:++ - | Group By Operator [GBY_116] + | Group By Operator [GBY_117] | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1"] | Group By Operator [GBY_67] @@ -3217,7 +3185,7 @@ Stage-0 | | outputColumnNames:["_col0","_col1"] | | Select Operator [SEL_61] | | outputColumnNames:["_col0","_col1"] - | | Merge Join Operator [MERGEJOIN_166] + | | Merge Join Operator [MERGEJOIN_164] | | | condition map:[{"":"Inner Join 0 to 1"}] | | | keys:{"1":"_col0 (type: string)","0":"_col1 (type: string)"} | | | outputColumnNames:["_col1","_col4"] @@ -3231,7 +3199,7 @@ Stage-0 | | | Select Operator [SEL_50] | | | outputColumnNames:["_col0","_col1"] | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_156] + | | | Filter Operator [FIL_154] | | | predicate:key is not null (type: boolean) | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | | | TableScan [TS_49] @@ -3243,7 +3211,7 @@ Stage-0 | | Map-reduce partition columns:_col1 (type: string) | | sort order:+ | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - | | Merge Join Operator [MERGEJOIN_165] + | | Merge Join Operator [MERGEJOIN_163] | | | condition map:[{"":"Inner Join 0 to 1"}] | | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} | | | outputColumnNames:["_col1"] @@ -3258,7 +3226,7 @@ Stage-0 | | | Select Operator [SEL_48] | | | outputColumnNames:["_col0","_col1"] | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_155] + | | | Filter Operator [FIL_153] | | | predicate:(value is not null and key is not null) (type: boolean) | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE | | | TableScan [TS_47] @@ -3300,7 +3268,7 @@ Stage-0 | | | | outputColumnNames:["_col0","_col1"] | | | | Select Operator [SEL_28] | | | | outputColumnNames:["_col0","_col1"] - | | | | Filter Operator [FIL_152] + | | | | Filter Operator [FIL_150] | | | | predicate:value is not null (type: boolean) | | | | TableScan [TS_27] | | | | alias:x @@ -3314,7 +3282,7 @@ Stage-0 | | | outputColumnNames:["_col0","_col1"] | | | Select Operator [SEL_30] | | | outputColumnNames:["_col0","_col1"] - | | | Filter Operator [FIL_153] + | | | Filter Operator [FIL_151] | | | predicate:value is not null (type: boolean) | | | TableScan [TS_29] | | | alias:y @@ -3328,7 +3296,7 @@ Stage-0 | | outputColumnNames:["_col0","_col1"] | | Select Operator [SEL_39] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_154] + | | Filter Operator [FIL_152] | | predicate:value is not null (type: boolean) | | TableScan [TS_38] | | alias:y @@ -3342,7 +3310,7 @@ Stage-0 | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_26] | outputColumnNames:["_col0","_col1"] - | Merge Join Operator [MERGEJOIN_164] + | Merge Join Operator [MERGEJOIN_162] | | condition map:[{"":"Inner Join 0 to 1"}] | | keys:{"1":"_col1 (type: string)","0":"_col0 (type: string)"} | | outputColumnNames:["_col1","_col3"] @@ -3356,7 +3324,7 @@ Stage-0 | | Select Operator [SEL_1] | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_148] + | | Filter Operator [FIL_146] | | predicate:key is not null (type: boolean) | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE | | TableScan [TS_0] @@ -3368,7 +3336,7 @@ Stage-0 | Map-reduce partition columns:_col1 (type: string) | sort order:+ | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_163] + | Map Join Operator [MAPJOIN_161] | | condition map:[{"":"Inner Join 0 to 1"}] | | keys:{"Map 11":"_col1 (type: string)","Reducer 9":"_col0 (type: string)"} | | outputColumnNames:["_col1"] @@ -3383,7 +3351,7 @@ Stage-0 | | Select Operator [SEL_14] | | outputColumnNames:["_col0","_col1"] | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_151] + | | Filter Operator [FIL_149] | | predicate:(value is not null and key is not null) (type: boolean) | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE | | TableScan [TS_13] @@ -3407,7 +3375,7 @@ Stage-0 | | outputColumnNames:["_col0","_col1"] | | Select Operator [SEL_5] | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_150] + | | Filter Operator [FIL_148] | | predicate:value is not null (type: boolean) | | TableScan [TS_4] | | alias:y @@ -3421,146 +3389,147 @@ Stage-0 | outputColumnNames:["_col0","_col1"] | Select Operator [SEL_3] | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_149] + | Filter Operator [FIL_147] | predicate:value is not null (type: boolean) | TableScan [TS_2] | alias:x |<-Reducer 29 [CONTAINS] - Reduce Output Operator [RS_117] + Reduce Output Operator [RS_118] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) sort order:++ - Group By Operator [GBY_116] + Group By Operator [GBY_117] keys:_col0 (type: string), _col1 (type: string) outputColumnNames:["_col0","_col1"] - Select Operator [SEL_112] + Select Operator [SEL_113] outputColumnNames:["_col0","_col1"] - Map Join Operator [MAPJOIN_168] + Map Join Operator [MAPJOIN_166] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 34":"_col0 (type: string)","Reducer 29":"_col1 (type: string)"} + | keys:{"Map 34":"_col1 (type: string)","Reducer 29":"_col0 (type: string)"} | outputColumnNames:["_col1","_col4"] |<-Map 34 [BROADCAST_EDGE] - | Reduce Output Operator [RS_110] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) + | Reduce Output Operator [RS_111] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) | sort order:+ - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Select Operator [SEL_101] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_162] - | predicate:key is not null (type: boolean) + | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col3 (type: string) + | Map Join Operator [MAPJOIN_165] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Map 34":"_col0 (type: string)","Map 33":"_col0 (type: string)"} + | | outputColumnNames:["_col0","_col1","_col3"] + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | |<-Map 33 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_103] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_99] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_159] + | | predicate:(key is not null and value is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_98] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_101] + | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_100] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_167] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 33":"_col1 (type: string)","Reducer 29":"_col0 (type: string)"} - | outputColumnNames:["_col1"] - |<-Map 33 [BROADCAST_EDGE] - | Reduce Output Operator [RS_105] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) - | sort order:+ - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col0 (type: string) - | Select Operator [SEL_99] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_161] - | predicate:(value is not null and key is not null) (type: boolean) - | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_98] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_97] - outputColumnNames:["_col0"] - Group By Operator [GBY_96] - | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | outputColumnNames:["_col0","_col1"] - |<-Union 28 [SIMPLE_EDGE] - |<-Map 32 [CONTAINS] - | Reduce Output Operator [RS_95] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_94] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_90] - | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_160] - | predicate:value is not null (type: boolean) - | TableScan [TS_89] - | alias:y - |<-Reducer 27 [CONTAINS] - Reduce Output Operator [RS_95] - key expressions:_col0 (type: string), _col1 (type: string) - Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - sort order:++ - Group By Operator [GBY_94] - keys:_col0 (type: string), _col1 (type: string) - outputColumnNames:["_col0","_col1"] - Group By Operator [GBY_87] - | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | outputColumnNames:["_col0","_col1"] - |<-Union 26 [SIMPLE_EDGE] - |<-Reducer 25 [CONTAINS] - | Reduce Output Operator [RS_86] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_85] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Group By Operator [GBY_78] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | |<-Union 24 [SIMPLE_EDGE] - | |<-Map 30 [CONTAINS] - | | Reduce Output Operator [RS_77] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Group By Operator [GBY_76] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Select Operator [SEL_72] - | | outputColumnNames:["_col0","_col1"] - | | Filter Operator [FIL_158] - | | predicate:value is not null (type: boolean) - | | TableScan [TS_71] - | | alias:y - | |<-Map 23 [CONTAINS] - | Reduce Output Operator [RS_77] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Group By Operator [GBY_76] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Select Operator [SEL_70] - | outputColumnNames:["_col0","_col1"] - | Filter Operator [FIL_157] - | predicate:value is not null (type: boolean) - | TableScan [TS_69] - | alias:x - |<-Map 31 [CONTAINS] - Reduce Output Operator [RS_86] - key expressions:_col0 (type: string), _col1 (type: string) - Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - sort order:++ - Group By Operator [GBY_85] - keys:_col0 (type: string), _col1 (type: string) + | Filter Operator [FIL_160] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_100] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_97] + outputColumnNames:["_col0"] + Group By Operator [GBY_96] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + |<-Union 28 [SIMPLE_EDGE] + |<-Map 32 [CONTAINS] + | Reduce Output Operator [RS_95] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_94] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_90] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_158] + | predicate:value is not null (type: boolean) + | TableScan [TS_89] + | alias:y + |<-Reducer 27 [CONTAINS] + Reduce Output Operator [RS_95] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_94] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Group By Operator [GBY_87] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + |<-Union 26 [SIMPLE_EDGE] + |<-Reducer 25 [CONTAINS] + | Reduce Output Operator [RS_86] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_85] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Group By Operator [GBY_78] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | |<-Union 24 [SIMPLE_EDGE] + | |<-Map 30 [CONTAINS] + | | Reduce Output Operator [RS_77] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Group By Operator [GBY_76] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Select Operator [SEL_72] + | | outputColumnNames:["_col0","_col1"] + | | Filter Operator [FIL_156] + | | predicate:value is not null (type: boolean) + | | TableScan [TS_71] + | | alias:y + | |<-Map 23 [CONTAINS] + | Reduce Output Operator [RS_77] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Group By Operator [GBY_76] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Select Operator [SEL_70] + | outputColumnNames:["_col0","_col1"] + | Filter Operator [FIL_155] + | predicate:value is not null (type: boolean) + | TableScan [TS_69] + | alias:x + |<-Map 31 [CONTAINS] + Reduce Output Operator [RS_86] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Group By Operator [GBY_85] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Select Operator [SEL_81] outputColumnNames:["_col0","_col1"] - Select Operator [SEL_81] - outputColumnNames:["_col0","_col1"] - Filter Operator [FIL_159] - predicate:value is not null (type: boolean) - TableScan [TS_80] - alias:y + Filter Operator [FIL_157] + predicate:value is not null (type: boolean) + TableScan [TS_80] + alias:y PREHOOK: query: CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default