diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 5ab3076..7510ddc 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -753,6 +753,7 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\ beeline.positive.include=drop_with_concurrency.q,\ escape_comments.q,\ + mapjoin2.q,\ smb_mapjoin_1.q,\ smb_mapjoin_10.q,\ smb_mapjoin_11.q,\ @@ -762,7 +763,8 @@ beeline.positive.include=drop_with_concurrency.q,\ smb_mapjoin_2.q,\ smb_mapjoin_3.q,\ smb_mapjoin_7.q,\ - select_dummy_source.q + select_dummy_source.q,\ + udf_unix_timestamp.q minimr.query.negative.files=cluster_tasklog_retrieval.q,\ file_with_header_footer_negative.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppenderForTest.java ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppenderForTest.java index 966c264..465844d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppenderForTest.java +++ ql/src/java/org/apache/hadoop/hive/ql/log/LogDivertAppenderForTest.java @@ -54,7 +54,10 @@ private LogDivertAppenderForTest() { public Result filter(LogEvent event) { if (event.getLevel().equals(Level.INFO) && "SessionState".equals(event.getLoggerName())) { if (event.getMessage().getFormattedMessage().startsWith("PREHOOK:") - || event.getMessage().getFormattedMessage().startsWith("POSTHOOK:")) { + || event.getMessage().getFormattedMessage().startsWith("POSTHOOK:") + || event.getMessage().getFormattedMessage().startsWith("unix_timestamp(void)") + || event.getMessage().getFormattedMessage().startsWith("Warning: ") + ) { return Result.ACCEPT; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 9faccd7..1d49568 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -396,7 +396,7 @@ private void updateColStats(Set projIndxLst, boolean allowNullColumnFor HiveConf conf = SessionState.getSessionConf(); if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_SHOW_WARNINGS)) { LogHelper console = SessionState.getConsole(); - console.printInfoNoLog(logMsg); + console.printInfo(logMsg); } } else { LOG.error(logMsg); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java index 9ad33fd..f40ff69 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java @@ -128,8 +128,7 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) } private void warn(String msg) { - SessionState.getConsole().getInfoStream().println( - String.format("Warning: %s", msg)); + SessionState.getConsole().printInfo("Warning: " + msg); } private void checkMapJoins(MapRedTask mrTsk) throws SemanticException { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java index 92d2191..b949008 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java @@ -85,8 +85,7 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { } private void warn(String msg) { - SessionState.getConsole().getInfoStream().println( - String.format("Warning: %s", msg)); + SessionState.getConsole().printInfo("Warning: " + msg); } private void checkShuffleJoin(SparkWork sparkWork) throws SemanticException { diff --git ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 7692512..ba505e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -1068,57 +1068,126 @@ public LogHelper(Logger LOG, boolean isSilent) { this.isSilent = isSilent; } + /** + * Get the console output stream for HiveServer2 or HiveCli. + * @return The output stream + */ public PrintStream getOutStream() { SessionState ss = SessionState.get(); return ((ss != null) && (ss.out != null)) ? ss.out : System.out; } + /** + * Get the console info stream for HiveServer2 or HiveCli. + * @return The info stream + */ public static PrintStream getInfoStream() { SessionState ss = SessionState.get(); return ((ss != null) && (ss.info != null)) ? ss.info : getErrStream(); } + /** + * Get the console error stream for HiveServer2 or HiveCli. + * @return The error stream + */ public static PrintStream getErrStream() { SessionState ss = SessionState.get(); return ((ss != null) && (ss.err != null)) ? ss.err : System.err; } + /** + * Get the child process output stream for HiveServer2 or HiveCli. + * @return The child process output stream + */ public PrintStream getChildOutStream() { SessionState ss = SessionState.get(); return ((ss != null) && (ss.childOut != null)) ? ss.childOut : System.out; } + /** + * Get the child process error stream for HiveServer2 or HiveCli. + * @return The child process error stream + */ public PrintStream getChildErrStream() { SessionState ss = SessionState.get(); return ((ss != null) && (ss.childErr != null)) ? ss.childErr : System.err; } + /** + * Is the logging to the info stream is enabled, or not. + * @return True if the logging is enabled to the HiveServer2 or HiveCli info stream + */ public boolean getIsSilent() { SessionState ss = SessionState.get(); // use the session or the one supplied in constructor return (ss != null) ? ss.getIsSilent() : isSilent; } + /** + * Logs into the log file. + * BeeLine uses the operation log file to show the logs to the user, so depending on the + * BeeLine settings it could be shown to the user. + * @param info The log message + */ public void logInfo(String info) { logInfo(info, null); } + /** + * Logs into the log file. Handles an extra detail which will not be printed if null. + * BeeLine uses the operation log file to show the logs to the user, so depending on the + * BeeLine settings it could be shown to the user. + * @param info The log message + * @param detail Extra detail to log which will be not printed if null + */ public void logInfo(String info, String detail) { LOG.info(info + StringUtils.defaultString(detail)); } + /** + * Logs info into the log file, and if the LogHelper is not silent then into the HiveServer2 or + * HiveCli info stream too. + * BeeLine uses the operation log file to show the logs to the user, so depending on the + * BeeLine settings it could be shown to the user. + * @param info The log message + */ public void printInfo(String info) { printInfo(info, null); } + /** + * Logs info into the log file, and if not silent then into the HiveServer2 or HiveCli info + * stream too. The isSilent parameter is used instead of the LogHelper isSilent attribute. + * BeeLine uses the operation log file to show the logs to the user, so depending on the + * BeeLine settings it could be shown to the user. + * @param info The log message + * @param isSilent If true, then the message will be printed to the info stream too + */ public void printInfo(String info, boolean isSilent) { printInfo(info, null, isSilent); } + /** + * Logs info into the log file, and if the LogHelper is not silent then into the HiveServer2 or + * HiveCli info stream too. Handles an extra detail which will not be printed if null. + * BeeLine uses the operation log file to show the logs to the user, so depending on the + * BeeLine settings it could be shown to the user. + * @param info The log message + * @param detail Extra detail to log which will be not printed if null + */ public void printInfo(String info, String detail) { printInfo(info, detail, getIsSilent()); } + /** + * Logs info into the log file, and if not silent then into the HiveServer2 or HiveCli info + * stream too. Handles an extra detail which will not be printed if null. + * BeeLine uses the operation log file to show the logs to the user, so depending on the + * BeeLine settings it could be shown to the user. + * @param info The log message + * @param detail Extra detail to log which will be not printed if null + * @param isSilent If true, then the message will be printed to the info stream too + */ public void printInfo(String info, String detail, boolean isSilent) { if (!isSilent) { getInfoStream().println(info); @@ -1126,16 +1195,24 @@ public void printInfo(String info, String detail, boolean isSilent) { LOG.info(info + StringUtils.defaultString(detail)); } - public void printInfoNoLog(String info) { - if (!getIsSilent()) { - getInfoStream().println(info); - } - } - + /** + * Logs an error into the log file, and into the HiveServer2 or HiveCli error stream too. + * BeeLine uses the operation log file to show the logs to the user, so depending on the + * BeeLine settings it could be shown to the user. + * @param error The log message + */ public void printError(String error) { printError(error, null); } + /** + * Logs an error into the log file, and into the HiveServer2 or HiveCli error stream too. + * Handles an extra detail which will not be printed if null. + * BeeLine uses the operation log file to show the logs to the user, so depending on the + * BeeLine settings it could be shown to the user. + * @param error The log message + * @param detail Extra detail to log which will be not printed if null + */ public void printError(String error, String detail) { getErrStream().println(error); LOG.error(error + StringUtils.defaultString(detail)); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java index 118acdc..45d2ae8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnixTimeStamp.java @@ -49,11 +49,7 @@ protected void initializeInput(ObjectInspector[] arguments) throws UDFArgumentEx currentTimestamp = new LongWritable(0); setValueFromTs(currentTimestamp, SessionState.get().getQueryCurrentTimestamp()); String msg = "unix_timestamp(void) is deprecated. Use current_timestamp instead."; - LOG.warn(msg); - PrintStream stream = LogHelper.getInfoStream(); - if (stream != null) { - stream.println(msg); - } + SessionState.getConsole().printInfo(msg); } } } diff --git ql/src/test/results/clientpositive/beeline/mapjoin2.q.out ql/src/test/results/clientpositive/beeline/mapjoin2.q.out new file mode 100644 index 0000000..08e398a --- /dev/null +++ ql/src/test/results/clientpositive/beeline/mapjoin2.q.out @@ -0,0 +1,91 @@ +PREHOOK: query: create table tbl (n bigint, t string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl +POSTHOOK: query: create table tbl (n bigint, t string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl +PREHOOK: query: insert into tbl values (1, 'one') +PREHOOK: type: QUERY +PREHOOK: Output: default@tbl +POSTHOOK: query: insert into tbl values (1, 'one') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tbl +POSTHOOK: Lineage: tbl.n EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tbl.t SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into tbl values(2, 'two') +PREHOOK: type: QUERY +PREHOOK: Output: default@tbl +POSTHOOK: query: insert into tbl values(2, 'two') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tbl +POSTHOOK: Lineage: tbl.n EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tbl.t SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a left outer join (select * from tbl where 1 = 2) b on a.n = b.n +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a left outer join (select * from tbl where 1 = 2) b on a.n = b.n +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +1 one true true +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +true true 2 two +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +false false true true +true true false false +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 +PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +11 1 1 0 0 diff --git ql/src/test/results/clientpositive/beeline/udf_unix_timestamp.q.out ql/src/test/results/clientpositive/beeline/udf_unix_timestamp.q.out new file mode 100644 index 0000000..fc0de7d --- /dev/null +++ ql/src/test/results/clientpositive/beeline/udf_unix_timestamp.q.out @@ -0,0 +1,117 @@ +PREHOOK: query: DESCRIBE FUNCTION unix_timestamp +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION unix_timestamp +POSTHOOK: type: DESCFUNCTION +unix_timestamp(date[, pattern]) - Converts the time to a number +PREHOOK: query: DESCRIBE FUNCTION EXTENDED unix_timestamp +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED unix_timestamp +POSTHOOK: type: DESCFUNCTION +unix_timestamp(date[, pattern]) - Converts the time to a number +Converts the specified time to number of seconds since 1970-01-01. The unix_timestamp(void) overload is deprecated, use current_timestamp. +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp +Function type:BUILTIN +PREHOOK: query: create table oneline(key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@oneline +POSTHOOK: query: create table oneline(key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@oneline +PREHOOK: query: load data local inpath '../../data/files/things.txt' into table oneline +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@oneline +POSTHOOK: query: load data local inpath '../../data/files/things.txt' into table oneline +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@oneline +PREHOOK: query: SELECT + '2009-03-20 11:30:01', + unix_timestamp('2009-03-20 11:30:01') +FROM oneline +PREHOOK: type: QUERY +PREHOOK: Input: default@oneline +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009-03-20 11:30:01', + unix_timestamp('2009-03-20 11:30:01') +FROM oneline +POSTHOOK: type: QUERY +POSTHOOK: Input: default@oneline +#### A masked pattern was here #### +2009-03-20 11:30:01 1237573801 +PREHOOK: query: SELECT + '2009-03-20', + unix_timestamp('2009-03-20', 'yyyy-MM-dd') +FROM oneline +PREHOOK: type: QUERY +PREHOOK: Input: default@oneline +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009-03-20', + unix_timestamp('2009-03-20', 'yyyy-MM-dd') +FROM oneline +POSTHOOK: type: QUERY +POSTHOOK: Input: default@oneline +#### A masked pattern was here #### +2009-03-20 1237532400 +PREHOOK: query: SELECT + '2009 Mar 20 11:30:01 am', + unix_timestamp('2009 Mar 20 11:30:01 am', 'yyyy MMM dd h:mm:ss a') +FROM oneline +PREHOOK: type: QUERY +PREHOOK: Input: default@oneline +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009 Mar 20 11:30:01 am', + unix_timestamp('2009 Mar 20 11:30:01 am', 'yyyy MMM dd h:mm:ss a') +FROM oneline +POSTHOOK: type: QUERY +POSTHOOK: Input: default@oneline +#### A masked pattern was here #### +2009 Mar 20 11:30:01 am 1237573801 +unix_timestamp(void) is deprecated. Use current_timestamp instead. +unix_timestamp(void) is deprecated. Use current_timestamp instead. +PREHOOK: query: create table foo as SELECT + 'deprecated' as a, + unix_timestamp() as b +FROM oneline +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@oneline +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo as SELECT + 'deprecated' as a, + unix_timestamp() as b +FROM oneline +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@oneline +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.a SIMPLE [] +POSTHOOK: Lineage: foo.b SIMPLE [] +PREHOOK: query: drop table foo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo +PREHOOK: Output: default@foo +POSTHOOK: query: drop table foo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@foo +PREHOOK: query: SELECT + 'random_string', + unix_timestamp('random_string') +FROM oneline +PREHOOK: type: QUERY +PREHOOK: Input: default@oneline +#### A masked pattern was here #### +POSTHOOK: query: SELECT + 'random_string', + unix_timestamp('random_string') +FROM oneline +POSTHOOK: type: QUERY +POSTHOOK: Input: default@oneline +#### A masked pattern was here #### +random_string NULL