diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index ab131e2..ff07b42 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse; import java.io.IOException; +import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -36,8 +37,14 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.session.OperationLog; +import org.apache.hadoop.hive.ql.session.OperationLog.LoggingLevel; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * ColumnStatsSemanticAnalyzer. @@ -48,6 +55,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer { private static final Logger LOG = LoggerFactory .getLogger(ColumnStatsSemanticAnalyzer.class); + static final private LogHelper console = new LogHelper(LOG); private ASTNode originalTree; private ASTNode rewrittenTree; @@ -211,16 +219,26 @@ private String getColTypeOf (String partKey) throws SemanticException{ private List getColumnTypes(List colNames) throws SemanticException{ - List colTypes = new LinkedList(); + List colTypes = new ArrayList(); List cols = tbl.getCols(); + List copyColNames = new ArrayList<>(); + copyColNames.addAll(colNames); - for (String colName : colNames) { - for (FieldSchema col: cols) { + for (String colName : copyColNames) { + for (FieldSchema col : cols) { if (colName.equalsIgnoreCase(col.getName())) { - colTypes.add(new String(col.getType())); + String type = col.getType(); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); + if (typeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) { + logTypeWarning(colName, type); + colNames.remove(colName); + } else { + colTypes.add(type); + } } } } + return colTypes; } @@ -312,6 +330,18 @@ private void checkForPartitionColumns(List specifiedCols, List p } } + private void logTypeWarning(String colName, String colType) { + String warning = "Only primitive type arguments are accepted but " + colType + + " is passed for " + colName + "."; + warning = "WARNING: " + warning; + console.printInfo(warning); + // Propagate warning to beeline via operation log. + OperationLog ol = OperationLog.getCurrentOperationLog(); + if (ol != null) { + ol.writeOperationLog(LoggingLevel.EXECUTION, warning + "\n"); + } + } + @Override public void analyze(ASTNode ast, Context origCtx) throws SemanticException { QB qb; diff --git a/ql/src/test/queries/clientpositive/partial_column_stats.q b/ql/src/test/queries/clientpositive/partial_column_stats.q new file mode 100644 index 0000000..8ff65ac --- /dev/null +++ b/ql/src/test/queries/clientpositive/partial_column_stats.q @@ -0,0 +1,9 @@ +set hive.mapred.mode=nonstrict; + +create table t1 (key int, data struct, value string); + +explain analyze table t1 compute statistics for columns; + +analyze table t1 compute statistics for columns; + +desc formatted t1 value; diff --git a/ql/src/test/results/clientpositive/partial_column_stats.q.out b/ql/src/test/results/clientpositive/partial_column_stats.q.out new file mode 100644 index 0000000..59b52b0 --- /dev/null +++ b/ql/src/test/results/clientpositive/partial_column_stats.q.out @@ -0,0 +1,74 @@ +PREHOOK: query: create table t1 (key int, data struct, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (key int, data struct, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: explain analyze table t1 compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze table t1 compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.t1 + +PREHOOK: query: analyze table t1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table t1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +PREHOOK: query: desc formatted t1 value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t1 +POSTHOOK: query: desc formatted t1 value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t1 +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value string 0 0 0.0 0 from deserializer