diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 4e83867..bb011ad 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1079,6 +1079,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal + " expressed as multiple of Local FS write cost"), HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;" + " expressed as multiple of Local FS read cost"), + HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", false, + "Toggle display of CBO warnings like missing column stats"), AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"), SEMIJOIN_CONVERSION("hive.enable.semijoin.conversion", true, "convert group by followed by inner equi join into semijoin"), HIVE_COLUMN_ALIGNMENT("hive.order.columnalignment", true, "Flag to control whether we want to try to align" + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 009d9e5..7d71285 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -56,6 +57,8 @@ import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -391,6 +394,11 @@ private void updateColStats(Set projIndxLst, boolean allowNullColumnFor noColsMissingStats.getAndAdd(colNamesFailedStats.size()); if (allowNullColumnForMissingStats) { LOG.warn(logMsg); + HiveConf conf = SessionState.getSessionConf(); + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_SHOW_WARNINGS)) { + LogHelper console = SessionState.getConsole(); + console.printInfoNoLog(logMsg); + } } else { LOG.error(logMsg); throw new RuntimeException(logMsg); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 453e0a5..ff13438 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -1099,6 +1099,12 @@ public void printInfo(String info, String detail) { LOG.info(info + StringUtils.defaultString(detail)); } + public void printInfoNoLog(String info) { + if (!getIsSilent()) { + getInfoStream().println(info); + } + } + public void printError(String error) { printError(error, null); } diff --git a/ql/src/test/queries/clientpositive/stats_missing_warning.q b/ql/src/test/queries/clientpositive/stats_missing_warning.q new file mode 100644 index 0000000..b6cf049 --- /dev/null +++ b/ql/src/test/queries/clientpositive/stats_missing_warning.q @@ -0,0 +1,55 @@ +set hive.stats.autogather=false; + +-- Explictily DROP vs. CREATE IF NOT EXISTS to ensure stats are not carried over +DROP TABLE IF EXISTS missing_stats_t1; +DROP TABLE IF EXISTS missing_stats_t2; +DROP TABLE IF EXISTS missing_stats_t3; +CREATE TABLE missing_stats_t1 (key STRING, value STRING); +CREATE TABLE missing_stats_t2 (key STRING, value STRING); +CREATE TABLE missing_stats_t3 (key STRING, value STRING); + +INSERT INTO missing_stats_t1 (key, value) + SELECT key, value + FROM src; + +INSERT INTO missing_stats_t2 (key, value) + SELECT key, value + FROM src; + +INSERT INTO missing_stats_t3 (key, value) + SELECT key, value + FROM src; + +-- Default should be FALSE +set hive.cbo.show.warnings=true; + +set hive.cbo.enable=true; + +-- Should print warning +set hive.cbo.show.warnings=true; + +SELECT COUNT(*) +FROM missing_stats_t1 t1 +JOIN missing_stats_t2 t2 ON t1.value = t2.key +JOIN missing_stats_t3 t3 ON t2.key = t3.value; + +-- Should not print warning +set hive.cbo.show.warnings=false; + +SELECT COUNT(*) +FROM missing_stats_t1 t1 +JOIN missing_stats_t2 t2 ON t1.value = t2.key +JOIN missing_stats_t3 t3 ON t2.key = t3.value; + +ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS; +ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS; +ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS; + + +-- Warning should be gone +set hive.cbo.show.warnings=true; + +SELECT COUNT(*) +FROM missing_stats_t1 t1 +JOIN missing_stats_t2 t2 ON t1.value = t2.key +JOIN missing_stats_t3 t3 ON t2.key = t3.value; diff --git a/ql/src/test/results/clientpositive/stats_missing_warning.q.out b/ql/src/test/results/clientpositive/stats_missing_warning.q.out new file mode 100644 index 0000000..0ed70a0 --- /dev/null +++ b/ql/src/test/results/clientpositive/stats_missing_warning.q.out @@ -0,0 +1,159 @@ +PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE missing_stats_t1 (key STRING, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@missing_stats_t1 +POSTHOOK: query: CREATE TABLE missing_stats_t1 (key STRING, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@missing_stats_t1 +PREHOOK: query: CREATE TABLE missing_stats_t2 (key STRING, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@missing_stats_t2 +POSTHOOK: query: CREATE TABLE missing_stats_t2 (key STRING, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@missing_stats_t2 +PREHOOK: query: CREATE TABLE missing_stats_t3 (key STRING, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@missing_stats_t3 +POSTHOOK: query: CREATE TABLE missing_stats_t3 (key STRING, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@missing_stats_t3 +PREHOOK: query: INSERT INTO missing_stats_t1 (key, value) + SELECT key, value + FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@missing_stats_t1 +POSTHOOK: query: INSERT INTO missing_stats_t1 (key, value) + SELECT key, value + FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@missing_stats_t1 +POSTHOOK: Lineage: missing_stats_t1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: missing_stats_t1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT INTO missing_stats_t2 (key, value) + SELECT key, value + FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@missing_stats_t2 +POSTHOOK: query: INSERT INTO missing_stats_t2 (key, value) + SELECT key, value + FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@missing_stats_t2 +POSTHOOK: Lineage: missing_stats_t2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: missing_stats_t2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT INTO missing_stats_t3 (key, value) + SELECT key, value + FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@missing_stats_t3 +POSTHOOK: query: INSERT INTO missing_stats_t3 (key, value) + SELECT key, value + FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@missing_stats_t3 +POSTHOOK: Lineage: missing_stats_t3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: missing_stats_t3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT COUNT(*) +FROM missing_stats_t1 t1 +JOIN missing_stats_t2 t2 ON t1.value = t2.key +JOIN missing_stats_t3 t3 ON t2.key = t3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@missing_stats_t1 +PREHOOK: Input: default@missing_stats_t2 +PREHOOK: Input: default@missing_stats_t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM missing_stats_t1 t1 +JOIN missing_stats_t2 t2 ON t1.value = t2.key +JOIN missing_stats_t3 t3 ON t2.key = t3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@missing_stats_t1 +POSTHOOK: Input: default@missing_stats_t2 +POSTHOOK: Input: default@missing_stats_t3 +#### A masked pattern was here #### +0 +PREHOOK: query: SELECT COUNT(*) +FROM missing_stats_t1 t1 +JOIN missing_stats_t2 t2 ON t1.value = t2.key +JOIN missing_stats_t3 t3 ON t2.key = t3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@missing_stats_t1 +PREHOOK: Input: default@missing_stats_t2 +PREHOOK: Input: default@missing_stats_t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM missing_stats_t1 t1 +JOIN missing_stats_t2 t2 ON t1.value = t2.key +JOIN missing_stats_t3 t3 ON t2.key = t3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@missing_stats_t1 +POSTHOOK: Input: default@missing_stats_t2 +POSTHOOK: Input: default@missing_stats_t3 +#### A masked pattern was here #### +0 +PREHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@missing_stats_t1 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@missing_stats_t1 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@missing_stats_t2 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@missing_stats_t2 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@missing_stats_t3 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@missing_stats_t3 +#### A masked pattern was here #### +PREHOOK: query: SELECT COUNT(*) +FROM missing_stats_t1 t1 +JOIN missing_stats_t2 t2 ON t1.value = t2.key +JOIN missing_stats_t3 t3 ON t2.key = t3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@missing_stats_t1 +PREHOOK: Input: default@missing_stats_t2 +PREHOOK: Input: default@missing_stats_t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) +FROM missing_stats_t1 t1 +JOIN missing_stats_t2 t2 ON t1.value = t2.key +JOIN missing_stats_t3 t3 ON t2.key = t3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@missing_stats_t1 +POSTHOOK: Input: default@missing_stats_t2 +POSTHOOK: Input: default@missing_stats_t3 +#### A masked pattern was here #### +0