Index: ql/src/test/results/clientpositive/union32.q.out =================================================================== --- ql/src/test/results/clientpositive/union32.q.out (revision 0) +++ ql/src/test/results/clientpositive/union32.q.out (revision 0) @@ -0,0 +1,943 @@ +PREHOOK: query: -- This tests various union queries which have columns on one side of the query +-- being of double type and those on the other side another + +CREATE TABLE t1 AS SELECT * FROM src WHERE key < 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: -- This tests various union queries which have columns on one side of the query +-- being of double type and those on the other side another + +CREATE TABLE t1 AS SELECT * FROM src WHERE key < 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE t2 AS SELECT * FROM src WHERE key < 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: CREATE TABLE t2 AS SELECT * FROM src WHERE key < 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t2 +PREHOOK: query: -- Test simple union with double +EXPLAIN +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t1 +UNION ALL +SELECT CAST(key AS BIGINT) AS key FROM t2) a +ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- Test simple union with double +EXPLAIN +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t1 +UNION ALL +SELECT CAST(key AS BIGINT) AS key FROM t2) a +ORDER BY key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (TOK_TABLE_OR_COL key)) key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:a-subquery1:t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: UDFToDouble(key) + type: double + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: double + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: double + null-subquery2:a-subquery2:t2 + TableScan + alias: t2 + Select Operator + expressions: + expr: UDFToLong(key) + type: bigint + outputColumnNames: _col0 + Select Operator + expressions: + expr: UDFToDouble(_col0) + type: double + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: double + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: double + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t1 +UNION ALL +SELECT CAST(key AS BIGINT) AS key FROM t2) a +ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t1 +UNION ALL +SELECT CAST(key AS BIGINT) AS key FROM t2) a +ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +2.0 +2.0 +4.0 +4.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +8.0 +8.0 +9.0 +9.0 +PREHOOK: query: -- Test union with join on the left +EXPLAIN +SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key FROM t2) a +ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- Test union with join on the left +EXPLAIN +SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key FROM t2) a +ORDER BY key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (. (TOK_TABLE_OR_COL a) key)) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:a-subquery1:a + TableScan + alias: a + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + null-subquery1:a-subquery1:b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: UDFToLong(_col0) + type: bigint + outputColumnNames: _col0 + Select Operator + expressions: + expr: UDFToDouble(_col0) + type: double + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: double + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: double + null-subquery2:a-subquery2:t2 + TableScan + alias: t2 + Select Operator + expressions: + expr: UDFToDouble(key) + type: double + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: double + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: double + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key FROM t2) a +ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key FROM t2) a +ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +2.0 +2.0 +4.0 +4.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +8.0 +8.0 +9.0 +9.0 +PREHOOK: query: -- Test union with join on the right +EXPLAIN +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- Test union with join on the right +EXPLAIN +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) key)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (. (TOK_TABLE_OR_COL a) key)) key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery2:a-subquery2:a + TableScan + alias: a + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + null-subquery2:a-subquery2:b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + handleSkewJoin: false + outputColumnNames: _col0 + Select Operator + expressions: + expr: UDFToLong(_col0) + type: bigint + outputColumnNames: _col0 + Select Operator + expressions: + expr: UDFToDouble(_col0) + type: double + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: double + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: double + null-subquery1:a-subquery1:t2 + TableScan + alias: t2 + Select Operator + expressions: + expr: UDFToDouble(key) + type: double + outputColumnNames: _col0 + Union + Select Operator + expressions: + expr: _col0 + type: double + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: double + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +0.0 +2.0 +2.0 +4.0 +4.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +5.0 +8.0 +8.0 +9.0 +9.0 +PREHOOK: query: -- Test union with join on the left selecting multiple columns +EXPLAIN +SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a +ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- Test union with join on the left selecting multiple columns +EXPLAIN +SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a +ORDER BY key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (. (TOK_TABLE_OR_COL a) key)) key) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (. (TOK_TABLE_OR_COL b) key)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL key)) value))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:a-subquery1:a + TableScan + alias: a + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + null-subquery1:a-subquery1:b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col4 + Select Operator + expressions: + expr: UDFToLong(_col0) + type: bigint + expr: UDFToDouble(_col4) + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToDouble(_col0) + type: double + expr: UDFToString(_col1) + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: double + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + null-subquery2:a-subquery2:t2 + TableScan + alias: t2 + Select Operator + expressions: + expr: UDFToDouble(key) + type: double + expr: key + type: string + outputColumnNames: _col0, _col1 + Union + Select Operator + expressions: + expr: _col0 + type: double + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a +ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a +ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0.0 0 +0.0 0 +0.0 0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +2.0 2.0 +2.0 2 +4.0 4 +4.0 4.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5 +5.0 5 +5.0 5 +8.0 8 +8.0 8.0 +9.0 9.0 +9.0 9 +PREHOOK: query: -- Test union with join on the right selecting multiple columns +EXPLAIN +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- Test union with join on the right selecting multiple columns +EXPLAIN +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME t2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_TABLE_OR_COL key)) value)))) (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1) a) (TOK_TABREF (TOK_TABNAME t2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_BIGINT (. (TOK_TABLE_OR_COL a) key)) key) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (. (TOK_TABLE_OR_COL b) key)) value))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery2:a-subquery2:a + TableScan + alias: a + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + null-subquery2:a-subquery2:b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col4 + Select Operator + expressions: + expr: UDFToLong(_col0) + type: bigint + expr: UDFToDouble(_col4) + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToDouble(_col0) + type: double + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Union + Select Operator + expressions: + expr: _col0 + type: double + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: double + null-subquery1:a-subquery1:t2 + TableScan + alias: t2 + Select Operator + expressions: + expr: UDFToDouble(key) + type: double + expr: key + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: double + expr: UDFToDouble(_col1) + type: double + outputColumnNames: _col0, _col1 + Union + Select Operator + expressions: + expr: _col0 + type: double + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: double + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +0.0 0.0 +2.0 2.0 +2.0 2.0 +4.0 4.0 +4.0 4.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +5.0 5.0 +8.0 8.0 +8.0 8.0 +9.0 9.0 +9.0 9.0 Index: ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out =================================================================== --- ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out (revision 1399780) +++ ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out (working copy) @@ -1207,20 +1207,27 @@ expr: l_orderkey type: int outputColumnNames: _col0, _col1 - Union - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToLong(_col1) + type: bigint + outputColumnNames: _col0, _col1 + Union + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator Index: ql/src/test/results/clientpositive/groupby_sort_1.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_1.q.out (revision 1399780) +++ ql/src/test/results/clientpositive/groupby_sort_1.q.out (working copy) @@ -2671,50 +2671,57 @@ expr: _col1 type: bigint outputColumnNames: _col0, _col1 - Union - Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToDouble(_col0) + type: double + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Union Select Operator expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expr: _col0 + type: double + expr: _col1 + type: bigint outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.types int:int + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,cnt + columns.types int:int #### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numPartitions 0 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + name default.outputtbl1 + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -2897,16 +2904,16 @@ POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -NULL 1 -NULL 1 -NULL 1 -NULL 1 -NULL 2 1 1 2 1 +2 1 3 1 +4 1 +6 1 7 1 8 2 +14 1 +16 2 PREHOOK: query: -- group by followed by a join EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 @@ -3131,11 +3138,11 @@ numFiles 2 numPartitions 0 numRows 10 - rawDataSize 35 + rawDataSize 32 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 45 + totalSize 42 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 @@ -3162,11 +3169,11 @@ numFiles 2 numPartitions 0 numRows 10 - rawDataSize 35 + rawDataSize 32 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 45 + totalSize 42 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Index: ql/src/test/results/clientpositive/skewjoinopt11.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoinopt11.q.out (revision 1399780) +++ ql/src/test/results/clientpositive/skewjoinopt11.q.out (working copy) @@ -25,7 +25,7 @@ PREHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed -- by a union. Both sides of a union consist of a join, which should have used -- skew join compile time optimization. --- adding a order by at the end to make the results deterministic +-- adding an order by at the end to make the results deterministic EXPLAIN select * from @@ -38,7 +38,7 @@ POSTHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed -- by a union. Both sides of a union consist of a join, which should have used -- skew join compile time optimization. --- adding a order by at the end to make the results deterministic +-- adding an order by at the end to make the results deterministic EXPLAIN select * from Index: ql/src/test/results/clientpositive/type_widening.q.out =================================================================== --- ql/src/test/results/clientpositive/type_widening.q.out (revision 1399780) +++ ql/src/test/results/clientpositive/type_widening.q.out (working copy) @@ -68,21 +68,26 @@ expr: 0 type: int outputColumnNames: _col0 - Union - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - Reduce Output Operator - key expressions: + Select Operator + expressions: + expr: UDFToLong(_col0) + type: bigint + outputColumnNames: _col0 + Union + Select Operator + expressions: expr: _col0 type: bigint - sort order: + - tag: -1 - value expressions: - expr: _col0 - type: bigint + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: bigint + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: bigint null-subquery2:a-subquery2:src TableScan alias: src Index: ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out (revision 1399780) +++ ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out (working copy) @@ -3012,50 +3012,57 @@ expr: _col1 type: bigint outputColumnNames: _col0, _col1 - Union - Select Operator - expressions: - expr: _col0 - type: double - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToDouble(_col0) + type: double + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Union Select Operator expressions: - expr: UDFToInteger(_col0) - type: int - expr: UDFToInteger(_col1) - type: int + expr: _col0 + type: double + expr: _col1 + type: bigint outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,cnt - columns.types int:int + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,cnt + columns.types int:int #### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numPartitions 0 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + name default.outputtbl1 + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### @@ -3238,16 +3245,16 @@ POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -NULL 1 -NULL 1 -NULL 1 -NULL 1 -NULL 2 1 1 2 1 +2 1 3 1 +4 1 +6 1 7 1 8 2 +14 1 +16 2 PREHOOK: query: -- group by followed by a join EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 @@ -3472,11 +3479,11 @@ numFiles 2 numPartitions 0 numRows 10 - rawDataSize 35 + rawDataSize 32 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 45 + totalSize 42 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 @@ -3503,11 +3510,11 @@ numFiles 2 numPartitions 0 numRows 10 - rawDataSize 35 + rawDataSize 32 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 45 + totalSize 42 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Index: ql/src/test/results/compiler/plan/union.q.xml =================================================================== --- ql/src/test/results/compiler/plan/union.q.xml (revision 1399780) +++ ql/src/test/results/compiler/plan/union.q.xml (working copy) @@ -92,21 +92,21 @@ - CNTR_NAME_FS_665_NUM_INPUT_ROWS + CNTR_NAME_FS_12_NUM_INPUT_ROWS - CNTR_NAME_FS_665_NUM_OUTPUT_ROWS + CNTR_NAME_FS_12_NUM_OUTPUT_ROWS - CNTR_NAME_FS_665_TIME_TAKEN + CNTR_NAME_FS_12_TIME_TAKEN - CNTR_NAME_FS_665_FATAL_ERROR + CNTR_NAME_FS_12_FATAL_ERROR - FS_665 + FS_12 @@ -160,21 +160,21 @@ - CNTR_NAME_TS_664_NUM_INPUT_ROWS + CNTR_NAME_TS_11_NUM_INPUT_ROWS - CNTR_NAME_TS_664_NUM_OUTPUT_ROWS + CNTR_NAME_TS_11_NUM_OUTPUT_ROWS - CNTR_NAME_TS_664_TIME_TAKEN + CNTR_NAME_TS_11_TIME_TAKEN - CNTR_NAME_TS_664_FATAL_ERROR + CNTR_NAME_TS_11_FATAL_ERROR - TS_664 + TS_11 @@ -800,21 +800,21 @@ - CNTR_NAME_FS_661_NUM_INPUT_ROWS + CNTR_NAME_FS_8_NUM_INPUT_ROWS - CNTR_NAME_FS_661_NUM_OUTPUT_ROWS + CNTR_NAME_FS_8_NUM_OUTPUT_ROWS - CNTR_NAME_FS_661_TIME_TAKEN + CNTR_NAME_FS_8_TIME_TAKEN - CNTR_NAME_FS_661_FATAL_ERROR + CNTR_NAME_FS_8_FATAL_ERROR - FS_661 + FS_8 @@ -892,21 +892,21 @@ - CNTR_NAME_SEL_660_NUM_INPUT_ROWS + CNTR_NAME_SEL_7_NUM_INPUT_ROWS - CNTR_NAME_SEL_660_NUM_OUTPUT_ROWS + CNTR_NAME_SEL_7_NUM_OUTPUT_ROWS - CNTR_NAME_SEL_660_TIME_TAKEN + CNTR_NAME_SEL_7_TIME_TAKEN - CNTR_NAME_SEL_660_FATAL_ERROR + CNTR_NAME_SEL_7_FATAL_ERROR - SEL_660 + SEL_7 @@ -965,21 +965,21 @@ - CNTR_NAME_UNION_659_NUM_INPUT_ROWS + CNTR_NAME_UNION_6_NUM_INPUT_ROWS - CNTR_NAME_UNION_659_NUM_OUTPUT_ROWS + CNTR_NAME_UNION_6_NUM_OUTPUT_ROWS - CNTR_NAME_UNION_659_TIME_TAKEN + CNTR_NAME_UNION_6_TIME_TAKEN - CNTR_NAME_UNION_659_FATAL_ERROR + CNTR_NAME_UNION_6_FATAL_ERROR - UNION_659 + UNION_6 @@ -1057,21 +1057,21 @@ - CNTR_NAME_SEL_658_NUM_INPUT_ROWS + CNTR_NAME_SEL_5_NUM_INPUT_ROWS - CNTR_NAME_SEL_658_NUM_OUTPUT_ROWS + CNTR_NAME_SEL_5_NUM_OUTPUT_ROWS - CNTR_NAME_SEL_658_TIME_TAKEN + CNTR_NAME_SEL_5_TIME_TAKEN - CNTR_NAME_SEL_658_FATAL_ERROR + CNTR_NAME_SEL_5_FATAL_ERROR - SEL_658 + SEL_5 @@ -1136,21 +1136,21 @@ - CNTR_NAME_FIL_663_NUM_INPUT_ROWS + CNTR_NAME_FIL_10_NUM_INPUT_ROWS - CNTR_NAME_FIL_663_NUM_OUTPUT_ROWS + CNTR_NAME_FIL_10_NUM_OUTPUT_ROWS - CNTR_NAME_FIL_663_TIME_TAKEN + CNTR_NAME_FIL_10_TIME_TAKEN - CNTR_NAME_FIL_663_FATAL_ERROR + CNTR_NAME_FIL_10_FATAL_ERROR - FIL_663 + FIL_10 @@ -1176,16 +1176,16 @@ - CNTR_NAME_TS_656_NUM_INPUT_ROWS + CNTR_NAME_TS_3_NUM_INPUT_ROWS - CNTR_NAME_TS_656_NUM_OUTPUT_ROWS + CNTR_NAME_TS_3_NUM_OUTPUT_ROWS - CNTR_NAME_TS_656_TIME_TAKEN + CNTR_NAME_TS_3_TIME_TAKEN - CNTR_NAME_TS_656_FATAL_ERROR + CNTR_NAME_TS_3_FATAL_ERROR @@ -1200,7 +1200,7 @@ - TS_656 + TS_3 @@ -1330,7 +1330,7 @@ - + _col0 @@ -1343,7 +1343,7 @@ - + _col1 @@ -1422,21 +1422,21 @@ - CNTR_NAME_SEL_655_NUM_INPUT_ROWS + CNTR_NAME_SEL_2_NUM_INPUT_ROWS - CNTR_NAME_SEL_655_NUM_OUTPUT_ROWS + CNTR_NAME_SEL_2_NUM_OUTPUT_ROWS - CNTR_NAME_SEL_655_TIME_TAKEN + CNTR_NAME_SEL_2_TIME_TAKEN - CNTR_NAME_SEL_655_FATAL_ERROR + CNTR_NAME_SEL_2_FATAL_ERROR - SEL_655 + SEL_2 @@ -1450,10 +1450,30 @@ - + + + _col0 + + + src + + + + + - + + + _col1 + + + src + + + + + @@ -1507,21 +1527,21 @@ - CNTR_NAME_FIL_662_NUM_INPUT_ROWS + CNTR_NAME_FIL_9_NUM_INPUT_ROWS - CNTR_NAME_FIL_662_NUM_OUTPUT_ROWS + CNTR_NAME_FIL_9_NUM_OUTPUT_ROWS - CNTR_NAME_FIL_662_TIME_TAKEN + CNTR_NAME_FIL_9_TIME_TAKEN - CNTR_NAME_FIL_662_FATAL_ERROR + CNTR_NAME_FIL_9_FATAL_ERROR - FIL_662 + FIL_9 @@ -1613,16 +1633,16 @@ - CNTR_NAME_TS_653_NUM_INPUT_ROWS + CNTR_NAME_TS_0_NUM_INPUT_ROWS - CNTR_NAME_TS_653_NUM_OUTPUT_ROWS + CNTR_NAME_TS_0_NUM_OUTPUT_ROWS - CNTR_NAME_TS_653_TIME_TAKEN + CNTR_NAME_TS_0_TIME_TAKEN - CNTR_NAME_TS_653_FATAL_ERROR + CNTR_NAME_TS_0_FATAL_ERROR @@ -1637,7 +1657,7 @@ - TS_653 + TS_0 Index: ql/src/test/queries/clientpositive/union32.q =================================================================== --- ql/src/test/queries/clientpositive/union32.q (revision 0) +++ ql/src/test/queries/clientpositive/union32.q (revision 0) @@ -0,0 +1,75 @@ +-- This tests various union queries which have columns on one side of the query +-- being of double type and those on the other side another + +CREATE TABLE t1 AS SELECT * FROM src WHERE key < 10; +CREATE TABLE t2 AS SELECT * FROM src WHERE key < 10; + +-- Test simple union with double +EXPLAIN +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t1 +UNION ALL +SELECT CAST(key AS BIGINT) AS key FROM t2) a +ORDER BY key; + +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t1 +UNION ALL +SELECT CAST(key AS BIGINT) AS key FROM t2) a +ORDER BY key; + +-- Test union with join on the left +EXPLAIN +SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key FROM t2) a +ORDER BY key; + +SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key FROM t2) a +ORDER BY key; + +-- Test union with join on the right +EXPLAIN +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key; + +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key; + +-- Test union with join on the left selecting multiple columns +EXPLAIN +SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a +ORDER BY key; + +SELECT * FROM +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +UNION ALL +SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a +ORDER BY key; + +-- Test union with join on the right selecting multiple columns +EXPLAIN +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key; + +SELECT * FROM +(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 +UNION ALL +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +ORDER BY key; Index: ql/src/test/queries/clientpositive/skewjoinopt11.q =================================================================== --- ql/src/test/queries/clientpositive/skewjoinopt11.q (revision 1399780) +++ ql/src/test/queries/clientpositive/skewjoinopt11.q (working copy) @@ -13,7 +13,7 @@ -- This test is to verify the skew join compile optimization when the join is followed -- by a union. Both sides of a union consist of a join, which should have used -- skew join compile time optimization. --- adding a order by at the end to make the results deterministic +-- adding an order by at the end to make the results deterministic EXPLAIN select * from Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (revision 1399780) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (working copy) @@ -452,7 +452,7 @@ } // by now, 'prunedCols' are columns used by child operators, and 'columns' // are columns used by this select operator. - ArrayList originalOutputColumnNames = conf.getOutputColumnNames(); + List originalOutputColumnNames = conf.getOutputColumnNames(); if (cols.size() < originalOutputColumnNames.size()) { ArrayList newColList = new ArrayList(); ArrayList newOutputColumnNames = new ArrayList(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java (revision 1399780) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java (working copy) @@ -113,7 +113,7 @@ public List getColsFromSelectExpr(SelectOperator op) { List cols = new ArrayList(); SelectDesc conf = op.getConf(); - ArrayList exprList = conf.getColList(); + List exprList = conf.getColList(); for (ExprNodeDesc expr : exprList) { cols = Utilities.mergeUniqElems(cols, expr.getCols()); } @@ -140,13 +140,13 @@ return cols; } - ArrayList selectExprs = conf.getColList(); + List selectExprs = conf.getColList(); // The colList is the output columns used by child operators, they are // different // from input columns of the current operator. we need to find out which // input columns are used. - ArrayList outputColumnNames = conf.getOutputColumnNames(); + List outputColumnNames = conf.getOutputColumnNames(); for (int i = 0; i < outputColumnNames.size(); i++) { if (colList.contains(outputColumnNames.get(i))) { ExprNodeDesc expr = selectExprs.get(i); Index: ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java (revision 1399780) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java (working copy) @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.List; /** @@ -28,8 +29,8 @@ @Explain(displayName = "Select Operator") public class SelectDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; - private ArrayList colList; - private ArrayList outputColumnNames; + private List colList; + private List outputColumnNames; private boolean selectStar; private boolean selStarNoCompute; @@ -41,14 +42,14 @@ } public SelectDesc( - final ArrayList colList, - final ArrayList outputColumnNames) { + final List colList, + final List outputColumnNames) { this(colList, outputColumnNames, false); } public SelectDesc( - final ArrayList colList, - ArrayList outputColumnNames, + final List colList, + List outputColumnNames, final boolean selectStar) { this.colList = colList; this.selectStar = selectStar; @@ -56,7 +57,7 @@ } public SelectDesc( - final ArrayList colList, + final List colList, final boolean selectStar, final boolean selStarNoCompute) { this.colList = colList; this.selectStar = selectStar; @@ -66,30 +67,30 @@ @Override public Object clone() { SelectDesc ret = new SelectDesc(); - ret.setColList((ArrayList)getColList().clone()); - ret.setOutputColumnNames((ArrayList)getOutputColumnNames().clone()); + ret.setColList(new ArrayList(getColList())); + ret.setOutputColumnNames(new ArrayList(getOutputColumnNames())); ret.setSelectStar(selectStar); ret.setSelStarNoCompute(selStarNoCompute); return ret; } @Explain(displayName = "expressions") - public ArrayList getColList() { + public List getColList() { return colList; } public void setColList( - final ArrayList colList) { + final List colList) { this.colList = colList; } @Explain(displayName = "outputColumnNames") - public ArrayList getOutputColumnNames() { + public List getOutputColumnNames() { return outputColumnNames; } public void setOutputColumnNames( - ArrayList outputColumnNames) { + List outputColumnNames) { this.outputColumnNames = outputColumnNames; } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1399780) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -75,6 +75,7 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; @@ -6430,11 +6431,20 @@ String field = lEntry.getKey(); ColumnInfo lInfo = lEntry.getValue(); ColumnInfo rInfo = rightmap.get(field); - lInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), + ColumnInfo unionColInfo = new ColumnInfo(lInfo); + unionColInfo.setType(FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType())); - unionoutRR.put(unionalias, field, lInfo); + unionoutRR.put(unionalias, field, unionColInfo); } + if (!(leftOp instanceof UnionOperator)) { + leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias); + } + + if (!(rightOp instanceof UnionOperator)) { + rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias); + } + // If one of the children is a union, merge with it // else create a new one if ((leftOp instanceof UnionOperator) || (rightOp instanceof UnionOperator)) { @@ -6495,6 +6505,66 @@ } /** + * Generates a select operator which can go between the original input operator and the union + * operator. This select casts columns to match the type of the associated column in the union, + * other columns pass through unchanged. The new operator's only parent is the original input + * operator to the union, and it's only child is the union. If the input does not need to be + * cast, the original operator is returned, and no new select operator is added. + * + * @param origInputOp + * The original input operator to the union. + * @param origInputFieldMap + * A map from field name to ColumnInfo for the original input operator. + * @param origInputAlias + * The alias associated with the original input operator. + * @param unionoutRR + * The union's output row resolver. + * @param unionalias + * The alias of the union. + * @return + * @throws UDFArgumentException + */ + private Operator genInputSelectForUnion( + Operator origInputOp, Map origInputFieldMap, + String origInputAlias, RowResolver unionoutRR, String unionalias) + throws UDFArgumentException { + + List columns = new ArrayList(); + boolean needsCast = false; + for (Map.Entry unionEntry: unionoutRR.getFieldMap(unionalias).entrySet()) { + String field = unionEntry.getKey(); + ColumnInfo lInfo = origInputFieldMap.get(field); + ExprNodeDesc column = new ExprNodeColumnDesc(lInfo.getType(), lInfo.getInternalName(), + lInfo.getTabAlias(), lInfo.getIsVirtualCol(), lInfo.isSkewedCol());; + if (!lInfo.getType().equals(unionEntry.getValue().getType())) { + needsCast = true; + column = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc( + unionEntry.getValue().getType().getTypeName(), column); + } + columns.add(column); + } + + // If none of the columns need to be cast there's no need for an additional select operator + if (!needsCast) { + return origInputOp; + } + + RowResolver rowResolver = new RowResolver(); + List colName = new ArrayList(); + for (int i = 0; i < columns.size(); i++) { + String name = getColumnInternalName(i); + rowResolver.put(origInputAlias, name, new ColumnInfo(name, columns.get(i) + .getTypeInfo(), "", false)); + colName.add(name); + } + + Operator newInputOp = OperatorFactory.getAndMakeChild( + new SelectDesc(columns, colName), new RowSchema(rowResolver.getColumnInfos()), + origInputOp); + return putOpInsertMap(newInputOp, rowResolver); + } + + /** * Generates the sampling predicate from the TABLESAMPLE clause information. * This function uses the bucket column list to decide the expression inputs * to the predicate hash function in case useBucketCols is set to true, Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java (revision 1399780) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java (working copy) @@ -96,6 +96,16 @@ this.isHiddenVirtualCol = isHiddenVirtualCol; } + public ColumnInfo(ColumnInfo columnInfo) { + this.internalName = columnInfo.getInternalName(); + this.alias = columnInfo.getAlias(); + this.isSkewedCol = columnInfo.isSkewedCol(); + this.tabAlias = columnInfo.getTabAlias(); + this.isVirtualCol = columnInfo.getIsVirtualCol(); + this.isHiddenVirtualCol = columnInfo.isHiddenVirtualCol(); + this.setType(columnInfo.getType()); + } + public TypeInfo getType() { return TypeInfoUtils.getTypeInfoFromObjectInspector(objectInspector); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java (revision 1399780) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java (working copy) @@ -19,7 +19,7 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; -import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -47,7 +47,7 @@ return; } - ArrayList colList = conf.getColList(); + List colList = conf.getColList(); eval = new ExprNodeEvaluator[colList.size()]; for (int i = 0; i < colList.size(); i++) { assert (colList.get(i) != null);