Index: ql/src/test/results/clientpositive/ppd_join_filter.q.out =================================================================== --- ql/src/test/results/clientpositive/ppd_join_filter.q.out (revision 0) +++ ql/src/test/results/clientpositive/ppd_join_filter.q.out (revision 0) @@ -0,0 +1,1228 @@ +PREHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) k) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 1) k1) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 2) k2) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 3) k3)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) k1) 5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + b:src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: min(key) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: (_col1 + 1) + type: double + expr: (_col1 + 2) + type: double + expr: (_col1 + 3) + type: double + outputColumnNames: _col0, _col2, _col3, _col4 + Filter Operator + isSamplingPred: false + predicate: + expr: (_col2 < 5.0) + type: boolean + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col3 + type: double + expr: _col4 + type: double + a + TableScan + alias: a + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col3} {VALUE._col4} + handleSkewJoin: false + outputColumnNames: _col0, _col7, _col8 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col7 + type: double + expr: _col8 + type: double + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 2.0 3.0 +0 2.0 3.0 +0 2.0 3.0 +2 4.0 5.0 +PREHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) k) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 1) k1) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 2) k2) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 3) k3)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) k1) 5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + b:src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: min(key) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: (_col1 + 1) + type: double + expr: (_col1 + 2) + type: double + expr: (_col1 + 3) + type: double + outputColumnNames: _col0, _col2, _col3, _col4 + Filter Operator + isSamplingPred: false + predicate: + expr: (_col2 < 5.0) + type: boolean + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col3 + type: double + expr: _col4 + type: double + a + TableScan + alias: a + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col3} {VALUE._col4} + handleSkewJoin: false + outputColumnNames: _col0, _col7, _col8 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col7 + type: double + expr: _col8 + type: double + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 2.0 3.0 +0 2.0 3.0 +0 2.0 3.0 +2 4.0 5.0 +PREHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) k) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 1) k1) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 2) k2) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 3) k3)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) k1) 5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + b:src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: min(key) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: (_col1 + 1) + type: double + expr: (_col1 + 2) + type: double + expr: (_col1 + 3) + type: double + outputColumnNames: _col0, _col2, _col3, _col4 + Filter Operator + isSamplingPred: false + predicate: + expr: (_col2 < 5.0) + type: boolean + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col3 + type: double + expr: _col4 + type: double + a + TableScan + alias: a + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col3} {VALUE._col4} + handleSkewJoin: false + outputColumnNames: _col0, _col7, _col8 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col7 + type: double + expr: _col8 + type: double + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 2.0 3.0 +0 2.0 3.0 +0 2.0 3.0 +2 4.0 5.0 +PREHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) k) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 1) k1) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 2) k2) (TOK_SELEXPR (+ (TOK_FUNCTION min (TOK_TABLE_OR_COL key)) 3) k3)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (< (. (TOK_TABLE_OR_COL b) k1) 5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + b:src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: min(key) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: (_col1 + 1) + type: double + expr: (_col1 + 2) + type: double + expr: (_col1 + 3) + type: double + outputColumnNames: _col0, _col2, _col3, _col4 + Filter Operator + isSamplingPred: false + predicate: + expr: (_col2 < 5.0) + type: boolean + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col3 + type: double + expr: _col4 + type: double + a + TableScan + alias: a + GatherStats: false + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + Needs Tagging: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col2,_col3,_col4 + columns.types string,double,double,double + escape.delim \ +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col3} {VALUE._col4} + handleSkewJoin: false + outputColumnNames: _col0, _col7, _col8 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col7 + type: double + expr: _col8 + type: double + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 2.0 3.0 +0 2.0 3.0 +0 2.0 3.0 +2 4.0 5.0 Index: ql/src/test/queries/clientpositive/ppd_join_filter.q =================================================================== --- ql/src/test/queries/clientpositive/ppd_join_filter.q (revision 0) +++ ql/src/test/queries/clientpositive/ppd_join_filter.q (revision 0) @@ -0,0 +1,115 @@ +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=false; + +explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5; + +select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5; + +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; + +explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5; + +select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5; + +set hive.optimize.ppd=false; +set hive.ppd.remove.duplicatefilters=false; + +explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5; + +select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5; + +set hive.optimize.ppd=faluse; +set hive.ppd.remove.duplicatefilters=true; + +explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5; + +select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5; \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (revision 1370664) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (working copy) @@ -87,8 +87,11 @@ // get list of columns used in the filter List cl = condn.getCols(); // merge it with the downstream col list + List filterOpPrunedColLists = Utilities.mergeUniqElems(cppCtx.genColLists(op), cl); + List filterOpPrunedColListsOrderPreserved = preserveColumnOrder(op, + filterOpPrunedColLists); cppCtx.getPrunedColLists().put(op, - Utilities.mergeUniqElems(cppCtx.genColLists(op), cl)); + filterOpPrunedColListsOrderPreserved); pruneOperator(cppCtx, op, cppCtx.getPrunedColLists().get(op)); @@ -591,6 +594,32 @@ } } + /** + * The pruning needs to preserve the order of columns in the input schema + * @param op + * @param cols + * @return + * @throws SemanticException + */ + private static List preserveColumnOrder(Operator op, + List cols) + throws SemanticException { + RowSchema inputSchema = op.getSchema(); + if (inputSchema != null) { + ArrayList rs = new ArrayList(); + ArrayList inputCols = inputSchema.getSignature(); + for (ColumnInfo i: inputCols) { + if (cols.contains(i.getInternalName())) { + rs.add(i.getInternalName()); + } + } + return rs; + } else { + return cols; + } + } + + private static void pruneJoinOperator(NodeProcessorCtx ctx, CommonJoinOperator op, JoinDesc conf, Map columnExprMap,