diff --git a/hcatalog/core/.gitignore b/hcatalog/core/.gitignore index 0a7a9c5..3b2ad03 100644 --- a/hcatalog/core/.gitignore +++ b/hcatalog/core/.gitignore @@ -1 +1,2 @@ mapred +/bin/ diff --git a/hcatalog/webhcat/svr/.gitignore b/hcatalog/webhcat/svr/.gitignore index 916e17c..3285bd9 100644 --- a/hcatalog/webhcat/svr/.gitignore +++ b/hcatalog/webhcat/svr/.gitignore @@ -1 +1,2 @@ dependency-reduced-pom.xml +/bin/ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java index 0755943..3c5cac2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCost.java @@ -21,7 +21,15 @@ import org.apache.calcite.plan.RelOptCostFactory; import org.apache.calcite.plan.RelOptUtil; -// TODO: This should inherit from VolcanoCost and should just override isLE method. +/*** + * NOTE:
+ * 1. Hivecost normalizes cpu and io in to time.
+ * 2. CPU, IO cost is added together to find the query latency.
+ * 3. If query latency is equal then row count is compared. + */ + +// TODO: This should inherit from VolcanoCost and should just override isLE +// method. public class HiveCost implements RelOptCost { // ~ Static fields/initializers --------------------------------------------- @@ -114,8 +122,10 @@ public boolean equals(RelOptCost other) { } public boolean isEqWithEpsilon(RelOptCost other) { - return (this == other) || (Math.abs((this.cpu + this.io) - - (other.getCpu() + other.getIo())) < RelOptUtil.EPSILON); + return (this == other) + || ((Math.abs(this.io - other.getIo()) < RelOptUtil.EPSILON) + && (Math.abs(this.cpu - other.getCpu()) < RelOptUtil.EPSILON) && (Math + .abs(this.rowCount - other.getRows()) < RelOptUtil.EPSILON)); } public RelOptCost minus(RelOptCost other) { diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index 4a5d02d..66e0e9f 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -808,32 +808,32 @@ STAGE PLANS: alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -843,10 +843,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col2 + outputColumnNames: _col1 Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: int) + expressions: _col1 (type: int) outputColumnNames: _col0 Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out index d9e8dd3..a05a356 100644 --- a/ql/src/test/results/clientpositive/join32.q.out +++ b/ql/src/test/results/clientpositive/join32.q.out @@ -109,71 +109,25 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_0:y Fetch Operator limit: -1 - Partition Description: - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_0:y TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -187,7 +141,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -196,31 +150,31 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -229,11 +183,11 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -402,7 +356,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-0 Move Operator @@ -451,8 +405,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/join32_lessSize.q.out b/ql/src/test/results/clientpositive/join32_lessSize.q.out index 9e3d06d..136c306 100644 --- a/ql/src/test/results/clientpositive/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/join32_lessSize.q.out @@ -130,7 +130,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -139,31 +139,31 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -175,8 +175,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col3 - columns.types string,string,string + columns _col0,_col3 + columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -190,7 +190,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -200,14 +200,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -220,23 +220,26 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -244,106 +247,59 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_0:y Fetch Operator limit: -1 - Partition Description: - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_0:y TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -362,11 +318,11 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -406,8 +362,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col3 - columns.types string,string,string + columns _col0,_col3 + columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -415,19 +371,16 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col1,_col2,_col3 - columns.types string,string,string + columns _col0,_col3 + columns.types string,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -435,13 +388,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -451,21 +402,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: #### A masked pattern was here #### @@ -516,8 +470,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -703,28 +657,27 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-11 Stage-10 depends on stages: Stage-8 Stage-7 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 + Stage-6 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_2:x - Fetch Operator - limit: -1 - $hdt$_1:$hdt$_3:x + $hdt$_1:$hdt$_2:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_2:x + $hdt$_1:$hdt$_2:$hdt$_2:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -733,9 +686,12 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 0 - $hdt$_1:$hdt$_3:x + 1 _col0 (type: string) + Position of Big Table: 1 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -748,62 +704,32 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - alias: w - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col4 - Position of Big Table: 0 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col1,_col4 - columns.types string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -811,7 +737,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -821,14 +747,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -841,21 +767,106 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [$hdt$_1:$hdt$_2:$hdt$_3:x] + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$hdt$_1:w + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:$hdt$_1:w + TableScan + alias: w + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 1 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col4 + Position of Big Table: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col1,_col4 + columns.types string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col3 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -865,14 +876,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -885,22 +896,22 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:w] +#### A masked pattern was here #### - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:w @@ -926,7 +937,7 @@ STAGE PLANS: 1 _col1 (type: string) Position of Big Table: 1 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -939,17 +950,17 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col1, _col3, _col6 Position of Big Table: 1 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -982,7 +993,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10002 + base file name: -mr-10001 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out index d9e8dd3..a05a356 100644 --- a/ql/src/test/results/clientpositive/join33.q.out +++ b/ql/src/test/results/clientpositive/join33.q.out @@ -109,71 +109,25 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:z + $hdt$_0:y Fetch Operator limit: -1 - Partition Description: - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:z + $hdt$_0:y TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -187,7 +141,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -196,31 +150,31 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -229,11 +183,11 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -402,7 +356,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:$hdt$_1:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-0 Move Operator @@ -451,8 +405,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/join_alt_syntax.q.out index 007e4c6..cc908c1 100644 --- a/ql/src/test/results/clientpositive/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/join_alt_syntax.q.out @@ -359,13 +359,13 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -379,36 +379,34 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -420,60 +418,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -488,14 +432,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -504,6 +455,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -521,56 +519,54 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -582,60 +578,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - TableScan - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -650,14 +592,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -666,6 +615,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out index ff92d9f..98008ad 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out @@ -141,13 +141,13 @@ from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -161,36 +161,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -202,44 +202,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - TableScan - Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col9 (type: int), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -256,29 +218,20 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + 1 _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -287,6 +240,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col9 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out index f608cfd..a1dd24e 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out @@ -145,13 +145,13 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -165,36 +165,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -206,44 +206,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - TableScan - Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col9 (type: int), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -260,29 +222,20 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + 1 _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -291,6 +244,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col9 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/spark/join32.q.out b/ql/src/test/results/clientpositive/spark/join32.q.out index 0f16678..a9d50b4 100644 --- a/ql/src/test/results/clientpositive/spark/join32.q.out +++ b/ql/src/test/results/clientpositive/spark/join32.q.out @@ -113,16 +113,16 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -136,12 +136,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -149,13 +146,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -165,23 +160,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Map 3 Map Operator Tree: TableScan @@ -190,7 +188,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -199,7 +197,7 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -260,24 +258,24 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -288,13 +286,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -330,9 +328,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -340,11 +341,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -354,26 +357,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-0 Move Operator @@ -422,8 +422,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index 54f47f9..dac9610 100644 --- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -121,16 +121,16 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -144,12 +144,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -157,13 +154,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -173,23 +168,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Map 3 Map Operator Tree: TableScan @@ -198,7 +196,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -207,7 +205,7 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -268,24 +266,24 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -296,13 +294,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -338,9 +336,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -348,11 +349,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -362,26 +365,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-0 Move Operator @@ -430,8 +430,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -613,34 +613,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: w - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 1 Local Work: Map Reduce Local Work @@ -649,7 +650,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -659,14 +660,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -679,39 +680,44 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [w] - Map 3 + /src1 [x] + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: w + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col1 (type: string) - Position of Big Table: 0 + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -719,7 +725,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -729,14 +735,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -749,20 +755,20 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] + /src [w] Map 4 Map Operator Tree: TableScan @@ -777,11 +783,22 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - Position of Big Table: 0 + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -858,68 +875,57 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col1 (type: string) - outputColumnNames: _col1 + outputColumnNames: _col1, _col4 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col4 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3, _col6 input vertices: - 1 Map 4 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1, _col3, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 + Select Operator + expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 332 Data size: 3534 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string #### A masked pattern was here #### - name default.dest_j1 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 + name default.dest_j1 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index 0f16678..a9d50b4 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -113,16 +113,16 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -136,12 +136,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -149,13 +146,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -165,23 +160,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Map 3 Map Operator Tree: TableScan @@ -190,7 +188,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) + predicate: (value is not null and key is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -199,7 +197,7 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -260,24 +258,24 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 input vertices: 1 Map 3 Position of Big Table: 0 @@ -288,13 +286,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col0, _col4, _col5 + outputColumnNames: _col1, _col2, _col5 input vertices: 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -330,9 +328,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -340,11 +341,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -354,26 +357,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-0 Move Operator @@ -422,8 +422,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index f0d96c4..dabdcb8 100644 --- a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -390,9 +390,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -401,71 +401,69 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: (p_partkey is not null and p_name is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -473,27 +471,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -503,22 +485,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -544,9 +542,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -555,36 +553,35 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -598,28 +595,27 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + key expressions: _col1 (type: string), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -627,27 +623,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + expressions: _col4 (type: string), _col6 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -657,22 +637,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + value expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out index ff1626f..26e05ac 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -150,9 +150,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -161,72 +161,72 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: (p_partkey is not null and p_name is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -234,27 +234,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col9 (type: int), _col10 (type: string) + 1 _col9 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -264,22 +248,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + 1 _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out index 417ba4f..c821fe4 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out @@ -154,9 +154,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -165,72 +165,72 @@ STAGE PLANS: alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 7 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: (p_partkey is not null and p_name is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -238,27 +238,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col9 (type: int), _col10 (type: string) + 1 _col9 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -268,22 +252,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + 1 _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int), _col10 (type: string) - sort order: ++ - Map-reduce partition columns: _col9 (type: int), _col10 (type: string) + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator