diff --git ql/src/test/queries/clientpositive/union_remove_12.q ql/src/test/queries/clientpositive/union_remove_12.q index e964ee6..168eac3 100644 --- ql/src/test/queries/clientpositive/union_remove_12.q +++ ql/src/test/queries/clientpositive/union_remove_12.q @@ -1,6 +1,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.auto.convert.join=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; @@ -31,7 +32,7 @@ SELECT * FROM ( select key, 1 as values from inputTbl1 union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c; @@ -40,7 +41,7 @@ SELECT * FROM ( select key, 1 as values from inputTbl1 union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c; diff --git ql/src/test/queries/clientpositive/union_remove_13.q ql/src/test/queries/clientpositive/union_remove_13.q index 725343c..a88a83e 100644 --- ql/src/test/queries/clientpositive/union_remove_13.q +++ ql/src/test/queries/clientpositive/union_remove_13.q @@ -1,6 +1,7 @@ set hive.stats.autogather=false; set hive.optimize.union.remove=true; set hive.mapred.supports.subdirectories=true; +set hive.auto.convert.join=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; @@ -31,7 +32,7 @@ SELECT * FROM ( select key, count(1) as values from inputTbl1 group by key union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c; @@ -40,7 +41,7 @@ SELECT * FROM ( select key, count(1) as values from inputTbl1 group by key union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c; diff --git ql/src/test/results/clientpositive/union_remove_12.q.out ql/src/test/results/clientpositive/union_remove_12.q.out index aae03a6..8debcc8 100644 --- ql/src/test/results/clientpositive/union_remove_12.q.out +++ ql/src/test/results/clientpositive/union_remove_12.q.out @@ -46,7 +46,7 @@ SELECT * FROM ( select key, 1 as values from inputTbl1 union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c PREHOOK: type: QUERY @@ -56,27 +56,26 @@ SELECT * FROM ( select key, 1 as values from inputTbl1 union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME inputTbl1) a) (TOK_TABREF (TOK_TABNAME inputTbl1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) values))))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 values)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME inputTbl1) a) (TOK_TABREF (TOK_TABNAME inputTbl1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) values))))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-7 depends on stages: Stage-2, Stage-9 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8, Stage-9 , consists of Stage-3, Stage-2, Stage-4 Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-3, Stage-2, Stage-5 + Stage-2 + Stage-4 + Stage-5 depends on stages: Stage-4 Stage-10 is a root stage - Stage-1 depends on stages: Stage-10 - Stage-2 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-10 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-8 Map Reduce Alias -> Map Operator Tree: null-subquery1:c-subquery1:inputtbl1 @@ -86,43 +85,22 @@ STAGE PLANS: expressions: expr: key type: string - expr: 1 - type: int + expr: UDFToLong(UDFToString(1)) + type: bigint outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToString(_col1) - type: string - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToLong(_col1) - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 - Stage: Stage-7 + Stage: Stage-6 Conditional Operator - Stage: Stage-4 + Stage: Stage-3 Move Operator files: hdfs directory: true @@ -138,13 +116,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 - Stage: Stage-3 + Stage: Stage-2 Block level merge - Stage: Stage-5 + Stage: Stage-4 Block level merge - Stage: Stage-6 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -170,7 +148,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 1 - Stage: Stage-1 + Stage: Stage-9 Map Reduce Alias -> Map Operator Tree: null-subquery2:c-subquery2:b @@ -188,55 +166,23 @@ STAGE PLANS: 1 [Column[key]] outputColumnNames: _col0, _col5 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string - expr: _col1 - type: string + expr: UDFToLong(_col5) + type: bigint outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToLong(_col1) - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Local Work: + Map Reduce Local Work PREHOOK: query: insert overwrite table outputTbl1 @@ -244,7 +190,7 @@ SELECT * FROM ( select key, 1 as values from inputTbl1 union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c PREHOOK: type: QUERY @@ -255,7 +201,7 @@ SELECT * FROM ( select key, 1 as values from inputTbl1 union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/union_remove_13.q.out ql/src/test/results/clientpositive/union_remove_13.q.out index c221c79..6d35db4 100644 --- ql/src/test/results/clientpositive/union_remove_13.q.out +++ ql/src/test/results/clientpositive/union_remove_13.q.out @@ -46,7 +46,7 @@ SELECT * FROM ( select key, count(1) as values from inputTbl1 group by key union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c PREHOOK: type: QUERY @@ -56,27 +56,26 @@ SELECT * FROM ( select key, count(1) as values from inputTbl1 group by key union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME inputTbl1) a) (TOK_TABREF (TOK_TABNAME inputTbl1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) values))))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME inputTbl1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) values)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME inputTbl1) a) (TOK_TABREF (TOK_TABNAME inputTbl1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) val) values))))) c)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-7 depends on stages: Stage-2, Stage-9 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8, Stage-9 , consists of Stage-3, Stage-2, Stage-4 Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-3, Stage-2, Stage-5 + Stage-2 + Stage-4 + Stage-5 depends on stages: Stage-4 Stage-10 is a root stage - Stage-1 depends on stages: Stage-10 - Stage-2 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-10 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-8 Map Reduce Alias -> Map Operator Tree: null-subquery1:c-subquery1:inputtbl1 @@ -122,43 +121,22 @@ STAGE PLANS: expressions: expr: _col0 type: string - expr: _col1 + expr: UDFToLong(UDFToString(_col1)) type: bigint outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToString(_col1) - type: string - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToLong(_col1) - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 - Stage: Stage-7 + Stage: Stage-6 Conditional Operator - Stage: Stage-4 + Stage: Stage-3 Move Operator files: hdfs directory: true @@ -174,13 +152,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 - Stage: Stage-3 + Stage: Stage-2 Block level merge - Stage: Stage-5 + Stage: Stage-4 Block level merge - Stage: Stage-6 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -206,7 +184,7 @@ STAGE PLANS: 1 [Column[key]] Position of Big Table: 1 - Stage: Stage-1 + Stage: Stage-9 Map Reduce Alias -> Map Operator Tree: null-subquery2:c-subquery2:b @@ -224,55 +202,23 @@ STAGE PLANS: 1 [Column[key]] outputColumnNames: _col0, _col5 Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: -#### A masked pattern was here #### - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string - expr: _col1 - type: string + expr: UDFToLong(_col5) + type: bigint outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToLong(_col1) - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Local Work: + Map Reduce Local Work PREHOOK: query: insert overwrite table outputTbl1 @@ -280,7 +226,7 @@ SELECT * FROM ( select key, count(1) as values from inputTbl1 group by key union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c PREHOOK: type: QUERY @@ -291,7 +237,7 @@ SELECT * FROM ( select key, count(1) as values from inputTbl1 group by key union all -select /*+ mapjoin(a) */ a.key as key, b.val as values +select a.key as key, b.val as values FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c POSTHOOK: type: QUERY