diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java index 36c34cc6b8..1d20a6aa34 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java @@ -18,8 +18,12 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; +import java.util.List; +import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlSpecialOperator; +import org.apache.calcite.sql.SqlWriter; import org.apache.calcite.sql.type.InferTypes; import org.apache.calcite.sql.type.ReturnTypes; @@ -31,5 +35,31 @@ private HiveConcat() { InferTypes.RETURN_TYPE, null ); } -} + @Override + public void unparse( + SqlWriter writer, + SqlCall call, + int leftPrec, + int rightPrec) { + List opList = call.getOperandList(); + assert (opList.size() >= 1); + final SqlWriter.Frame frame = + writer.startList(SqlWriter.FrameTypeEnum.SIMPLE); + SqlNode sqlNode = opList.get(0); + sqlNode.unparse(writer, leftPrec, getLeftPrec()); + for (SqlNode op : opList.subList(1, opList.size() - 1)) { + writer.setNeedWhitespace(true); + writer.sep("||"); + writer.setNeedWhitespace(true); + op.unparse(writer, 0, 0); + } + sqlNode = opList.get(opList.size() - 1); + writer.setNeedWhitespace(true); + writer.sep("||"); + writer.setNeedWhitespace(true); + sqlNode.unparse(writer, getRightPrec(), rightPrec); + writer.endList(frame); + } + +} diff --git a/ql/src/test/queries/clientpositive/concat_op.q b/ql/src/test/queries/clientpositive/concat_op.q index 9b0487be10..7246e16ce6 100644 --- a/ql/src/test/queries/clientpositive/concat_op.q +++ b/ql/src/test/queries/clientpositive/concat_op.q @@ -44,3 +44,8 @@ explain select 'a' || 'b' || 'c'; -- true and (false or false) and (true or true) => false should not happen select true and false or false and true or true; +explain formatted select key || value from src; + +explain formatted select key || value || key from src; + +explain formatted select key || value || key || value from src; diff --git a/ql/src/test/results/clientpositive/concat_op.q.out b/ql/src/test/results/clientpositive/concat_op.q.out index 7bbb0415a4..9c4fca2442 100644 --- a/ql/src/test/results/clientpositive/concat_op.q.out +++ b/ql/src/test/results/clientpositive/concat_op.q.out @@ -303,3 +303,30 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### true +PREHOOK: query: explain formatted select key || value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain formatted select key || value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"optimizedSQL":"SELECT `key` || `value` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"concat(key, value) (type: string)","columnExprMap:":{"_col0":"concat(key, value)"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_4"}}}}}}],"Execution mode:":"vectorized"}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_5"}}}}}} +PREHOOK: query: explain formatted select key || value || key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain formatted select key || value || key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"optimizedSQL":"SELECT `key` || `value` || `key` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"concat(concat(key, value), key) (type: string)","columnExprMap:":{"_col0":"concat(concat(key, value), key)"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_4"}}}}}}],"Execution mode:":"vectorized"}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_5"}}}}}} +PREHOOK: query: explain formatted select key || value || key || value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain formatted select key || value || key || value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"optimizedSQL":"SELECT `key` || `value` || `key` || `value` AS `_o__c0`\nFROM `default`.`src`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","columns:":["key","value"],"database:":"default","Statistics:":"Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE","table:":"src","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"concat(concat(concat(key, value), key), value) (type: string)","columnExprMap:":{"_col0":"concat(concat(concat(key, value), key), value)"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_4"}}}}}}],"Execution mode:":"vectorized"}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_5"}}}}}} diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out index b247113156..66988ee04a 100644 --- a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out @@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n144 +OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' +GROUP BY SUBSTR(`key`, 1, 1) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out index b357d0eaaa..9f2a587ada 100644 --- a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out @@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n174 +OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`, SUM(DISTINCT SUBSTR(`value`, 5)) AS `_o__c3`, COUNT(DISTINCT `value`) AS `_o__c4` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' +GROUP BY SUBSTR(`key`, 1, 1) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/groupby_ppr.q.out b/ql/src/test/results/clientpositive/groupby_ppr.q.out index e9c20af0ef..d84c649e2b 100644 --- a/ql/src/test/results/clientpositive/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppr.q.out @@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n79 +OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' +GROUP BY SUBSTR(`key`, 1, 1) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out index 2885282c98..db358d9a53 100644 --- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out @@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1 +OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`, SUM(DISTINCT SUBSTR(`value`, 5)) AS `_o__c3`, COUNT(DISTINCT `value`) AS `_o__c4` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' +GROUP BY SUBSTR(`key`, 1, 1) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out index 5bc1854309..a360c42081 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out @@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n144 +OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' +GROUP BY SUBSTR(`key`, 1, 1) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out index 2e96ff45c1..558e57b6de 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out @@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n174 +OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`, SUM(DISTINCT SUBSTR(`value`, 5)) AS `_o__c3`, COUNT(DISTINCT `value`) AS `_o__c4` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' +GROUP BY SUBSTR(`key`, 1, 1) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out index f90890c68b..5fb1666826 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out @@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1_n79 +OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' +GROUP BY SUBSTR(`key`, 1, 1) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out index afdb17aa75..338f7538f8 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out @@ -28,6 +28,10 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@dest1 +OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5)) AS `_o__c2`, SUM(DISTINCT SUBSTR(`value`, 5)) AS `_o__c3`, COUNT(DISTINCT `value`) AS `_o__c4` +FROM `default`.`srcpart` +WHERE `ds` = '2008-04-08' +GROUP BY SUBSTR(`key`, 1, 1) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1