diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index ca51e71..2b9afdf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; @@ -198,6 +199,8 @@ import org.apache.hadoop.util.ToolRunner; import org.stringtemplate.v4.ST; +import com.google.common.collect.Lists; + /** * DDLTask implementation. * @@ -2571,7 +2574,7 @@ public int showColumns(Hive db, ShowColumnsDesc showCols) // as HiveServer2 output is consumed by JDBC/ODBC clients. boolean isOutputPadded = !SessionState.get().isHiveServerQuery(); outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation( - cols, false, isOutputPadded)); + cols, false, isOutputPadded, null)); outStream.close(); outStream = null; } catch (IOException e) { @@ -3396,6 +3399,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { outStream = fs.create(resFile); List cols = null; + List colStats = null; if (colPath.equals(tableName)) { cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? tbl.getCols() : part.getCols(); @@ -3405,6 +3409,18 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { } } else { cols = Hive.getFieldsFromDeserializer(colPath, tbl.getDeserializer()); + if (descTbl.isFormatted()) { + String dbName = "default"; + String tabName = tableName; + String colName = colPath.split("\\.")[1]; + if(tableName.contains(".")) { + String[] names = tableName.split("\\."); + dbName = names[0]; + tabName = names[1]; + } + colStats = db.getTableColumnStatistics(dbName.toLowerCase(), tabName.toLowerCase(), + Lists.newArrayList(colName.toLowerCase())); + } } fixDecimalColumnTypeName(cols); @@ -3413,7 +3429,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { boolean isOutputPadded = !SessionState.get().isHiveServerQuery(); formatter.describeTable(outStream, colPath, tableName, tbl, part, cols, descTbl.isFormatted(), descTbl.isExt(), - descTbl.isPretty(), isOutputPadded); + descTbl.isPretty(), isOutputPadded, colStats); LOG.info("DDLTask: written data for " + tbl.getTableName()); outStream.close(); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java index ee35857..818e7ca 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java @@ -36,6 +36,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -101,7 +102,7 @@ public void showTables(DataOutputStream out, Set tables) public void describeTable(DataOutputStream out, String colPath, String tableName, Table tbl, Partition part, List cols, boolean isFormatted, boolean isExt, boolean isPretty, - boolean isOutputPadded) throws HiveException { + boolean isOutputPadded, List colStats) throws HiveException { MapBuilder builder = MapBuilder.create(); builder.put("columns", makeColsUnformatted(cols)); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index 1a5e840..8d05933 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -31,9 +31,17 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; import org.apache.hadoop.hive.ql.index.HiveIndex; import org.apache.hadoop.hive.ql.index.HiveIndex.IndexType; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -42,6 +50,8 @@ import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ShowIndexesDesc; +import com.google.common.collect.Lists; + /** * This class provides methods to format table and index information. @@ -58,9 +68,10 @@ private MetaDataFormatUtils() { } - private static void formatColumnsHeader(StringBuilder columnInformation) { + private static void formatColumnsHeader(StringBuilder columnInformation, + List colStats) { columnInformation.append("# "); // Easy for shell scripts to ignore - formatOutput(getColumnsHeader(), columnInformation); + formatOutput(getColumnsHeader(colStats), columnInformation); columnInformation.append(LINE_DELIM); } @@ -70,15 +81,17 @@ private static void formatColumnsHeader(StringBuilder columnInformation) { * @param printHeader - if header should be included * @param isOutputPadded - make it more human readable by setting indentation * with spaces. Turned off for use by HiveServer2 + * @param colStats * @return string with formatted column information */ public static String getAllColumnsInformation(List cols, - boolean printHeader, boolean isOutputPadded) { + boolean printHeader, boolean isOutputPadded, List colStats) { StringBuilder columnInformation = new StringBuilder(DEFAULT_STRINGBUILDER_SIZE); if(printHeader){ - formatColumnsHeader(columnInformation); + formatColumnsHeader(columnInformation, colStats); } - formatAllFields(columnInformation, cols, isOutputPadded); + + formatAllFields(columnInformation, cols, isOutputPadded, colStats); return columnInformation.toString(); } @@ -96,15 +109,15 @@ public static String getAllColumnsInformation(List cols, List partCols, boolean printHeader, boolean isOutputPadded, boolean showPartColsSep) { StringBuilder columnInformation = new StringBuilder(DEFAULT_STRINGBUILDER_SIZE); if(printHeader){ - formatColumnsHeader(columnInformation); + formatColumnsHeader(columnInformation, null); } - formatAllFields(columnInformation, cols, isOutputPadded); + formatAllFields(columnInformation, cols, isOutputPadded, null); if ((partCols != null) && !partCols.isEmpty() && showPartColsSep) { columnInformation.append(LINE_DELIM).append("# Partition Information") .append(LINE_DELIM); - formatColumnsHeader(columnInformation); - formatAllFields(columnInformation, partCols, isOutputPadded); + formatColumnsHeader(columnInformation, null); + formatAllFields(columnInformation, partCols, isOutputPadded, null); } return columnInformation.toString(); @@ -116,21 +129,36 @@ public static String getAllColumnsInformation(List cols, * @param cols - list of columns * @param isOutputPadded - make it more human readable by setting indentation * with spaces. Turned off for use by HiveServer2 + * @param colStats */ private static void formatAllFields(StringBuilder tableInfo, - List cols, boolean isOutputPadded) { + List cols, boolean isOutputPadded, List colStats) { for (FieldSchema col : cols) { + ColumnStatisticsObj cso = getColumnStatisticsObject(col, colStats); if(isOutputPadded) { - formatWithIndentation(col.getName(), col.getType(), getComment(col), tableInfo); + formatWithIndentation(col.getName(), col.getType(), getComment(col), tableInfo, cso); } else { - formatWithoutIndentation(col.getName(), col.getType(), col.getComment(), tableInfo); + formatWithoutIndentation(col.getName(), col.getType(), col.getComment(), tableInfo, cso); + } + } + } + + private static ColumnStatisticsObj getColumnStatisticsObject(FieldSchema col, + List colStats) { + if (colStats != null && !colStats.isEmpty()) { + for (ColumnStatisticsObj cso : colStats) { + if (cso.getColName().equalsIgnoreCase(col.getName()) + && cso.getColType().equalsIgnoreCase(col.getType())) { + return cso; + } } } + return null; } private static void formatWithoutIndentation(String name, String type, String comment, - StringBuilder colBuffer) { + StringBuilder colBuffer, ColumnStatisticsObj cso) { colBuffer.append(name); colBuffer.append(FIELD_DELIM); colBuffer.append(type); @@ -341,10 +369,39 @@ private static void formatOutput(String name, String value, } private static void formatWithIndentation(String colName, String colType, String colComment, - StringBuilder tableInfo) { + StringBuilder tableInfo, ColumnStatisticsObj cso) { tableInfo.append(String.format("%-" + ALIGNMENT + "s", colName)).append(FIELD_DELIM); tableInfo.append(String.format("%-" + ALIGNMENT + "s", colType)).append(FIELD_DELIM); + if (cso != null) { + ColumnStatisticsData csd = cso.getStatsData(); + if (csd.isSetBinaryStats()) { + BinaryColumnStatsData bcsd = csd.getBinaryStats(); + appendColumnStats(tableInfo, null, null, bcsd.getNumNulls(), null, bcsd.getAvgColLen(), + bcsd.getMaxColLen(), null, null); + } else if (csd.isSetStringStats()) { + StringColumnStatsData scsd = csd.getStringStats(); + appendColumnStats(tableInfo, null, null, scsd.getNumNulls(), scsd.getNumDVs(), + scsd.getAvgColLen(), scsd.getMaxColLen(), null, null); + } else if (csd.isSetBooleanStats()) { + BooleanColumnStatsData bcsd = csd.getBooleanStats(); + appendColumnStats(tableInfo, null, null, bcsd.getNumNulls(), null, null, null, + bcsd.getNumTrues(), bcsd.getNumFalses()); + } else if (csd.isSetDecimalStats()) { + DecimalColumnStatsData dcsd = csd.getDecimalStats(); + appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(), + dcsd.getNumDVs(), null, null, null, null); + } else if (csd.isSetDoubleStats()) { + DoubleColumnStatsData dcsd = csd.getDoubleStats(); + appendColumnStats(tableInfo, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(), + dcsd.getNumDVs(), null, null, null, null); + } else if (csd.isSetLongStats()) { + LongColumnStatsData lcsd = csd.getLongStats(); + appendColumnStats(tableInfo, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(), + lcsd.getNumDVs(), null, null, null, null); + } + } + // comment indent processing for multi-line comments // comments should be indented the same amount on each line // if the first line comment starts indented by k, @@ -354,13 +411,30 @@ private static void formatWithIndentation(String colName, String colType, String int colNameLength = ALIGNMENT > colName.length() ? ALIGNMENT : colName.length(); int colTypeLength = ALIGNMENT > colType.length() ? ALIGNMENT : colType.length(); for (int i = 1; i < commentSegments.length; i++) { - tableInfo.append(String.format("%" + colNameLength + "s" + FIELD_DELIM + "%" - + colTypeLength + "s" + FIELD_DELIM + "%s", "", "", commentSegments[i])).append(LINE_DELIM); + tableInfo.append( + String.format("%" + colNameLength + "s" + FIELD_DELIM + "%" + colTypeLength + "s" + + FIELD_DELIM + "%s", "", "", commentSegments[i])).append(LINE_DELIM); } } - public static String[] getColumnsHeader() { - return DescTableDesc.getSchema().split("#")[0].split(","); + private static void appendColumnStats(StringBuilder sb, Object min, Object max, Object numNulls, + Object ndv, Object avgColLen, Object maxColLen, Object numTrues, Object numFalses) { + sb.append(String.format("%-" + ALIGNMENT + "s", min)).append(FIELD_DELIM); + sb.append(String.format("%-" + ALIGNMENT + "s", max)).append(FIELD_DELIM); + sb.append(String.format("%-" + ALIGNMENT + "s", numNulls)).append(FIELD_DELIM); + sb.append(String.format("%-" + ALIGNMENT + "s", ndv)).append(FIELD_DELIM); + sb.append(String.format("%-" + ALIGNMENT + "s", avgColLen)).append(FIELD_DELIM); + sb.append(String.format("%-" + ALIGNMENT + "s", maxColLen)).append(FIELD_DELIM); + sb.append(String.format("%-" + ALIGNMENT + "s", numTrues)).append(FIELD_DELIM); + sb.append(String.format("%-" + ALIGNMENT + "s", numFalses)).append(FIELD_DELIM); + } + + public static String[] getColumnsHeader(List colStats) { + boolean showColStats = false; + if (colStats != null && !colStats.isEmpty()) { + showColStats = true; + } + return DescTableDesc.getSchema(showColStats).split("#")[0].split(","); } public static String getIndexColumnsHeader() { diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java index b600155..e42958c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatter.java @@ -25,6 +25,7 @@ import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -69,12 +70,13 @@ public void showTables(DataOutputStream out, Set tables) * @param isExt * @param isPretty * @param isOutputPadded - if true, add spacing and indentation + * @param colStats * @throws HiveException */ public void describeTable(DataOutputStream out, String colPath, String tableName, Table tbl, Partition part, List cols, boolean isFormatted, boolean isExt, boolean isPretty, - boolean isOutputPadded) + boolean isOutputPadded, List colStats) throws HiveException; /** diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataPrettyFormatUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataPrettyFormatUtils.java index 86da780..8f939e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataPrettyFormatUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataPrettyFormatUtils.java @@ -86,7 +86,7 @@ private static int findMaxColumnNameLen(List cols) { */ private static void formatColumnsHeaderPretty(StringBuilder columnInformation, int maxColNameLen, int prettyOutputNumCols) { - String columnHeaders[] = MetaDataFormatUtils.getColumnsHeader(); + String columnHeaders[] = MetaDataFormatUtils.getColumnsHeader(null); formatOutputPretty(columnHeaders[0], columnHeaders[1], columnHeaders[2], columnInformation, maxColNameLen, prettyOutputNumCols); columnInformation.append(MetaDataFormatUtils.LINE_DELIM); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java index ccdff17..8fabea9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.Hive; @@ -116,7 +117,7 @@ public void showTables(DataOutputStream out, Set tables) public void describeTable(DataOutputStream outStream, String colPath, String tableName, Table tbl, Partition part, List cols, boolean isFormatted, boolean isExt, boolean isPretty, - boolean isOutputPadded) throws HiveException { + boolean isOutputPadded, List colStats) throws HiveException { try { String output; if (colPath.equals(tableName)) { @@ -127,7 +128,7 @@ public void describeTable(DataOutputStream outStream, String colPath, : MetaDataFormatUtils.getAllColumnsInformation(cols, partCols, isFormatted, isOutputPadded, showPartColsSeparately); } else { - output = MetaDataFormatUtils.getAllColumnsInformation(cols, isFormatted, isOutputPadded); + output = MetaDataFormatUtils.getAllColumnsInformation(cols, isFormatted, isOutputPadded, colStats); } outStream.write(output.getBytes("UTF-8")); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 4a0056c..bed7041 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1972,17 +1972,22 @@ private void analyzeDescribeTable(ASTNode ast) throws SemanticException { DescTableDesc descTblDesc = new DescTableDesc( ctx.getResFile(), tableName, partSpec, colPath); + boolean showColStats = false; if (ast.getChildCount() == 2) { int descOptions = ast.getChild(1).getType(); descTblDesc.setFormatted(descOptions == HiveParser.KW_FORMATTED); descTblDesc.setExt(descOptions == HiveParser.KW_EXTENDED); descTblDesc.setPretty(descOptions == HiveParser.KW_PRETTY); + if (!colPath.equalsIgnoreCase(tableName)) { + showColStats = true; + } } inputs.add(new ReadEntity(getTable(tableName))); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), descTblDesc), conf)); - setFetchTask(createFetchTask(DescTableDesc.getSchema())); + String schema = DescTableDesc.getSchema(showColStats); + setFetchTask(createFetchTask(schema)); LOG.info("analyzeDescribeTable done"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java index 688bca0..eefd4d4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DescTableDesc.java @@ -56,6 +56,9 @@ public void setPartSpec(Map partSpec) { * thrift ddl for the result of describe table. */ private static final String schema = "col_name,data_type,comment#string:string:string"; + private static final String colStatsSchema = "col_name,data_type,min,max,num_nulls," + + "distinct_count,avg_col_len,max_col_len,num_trues,num_falses,comment" + + "#string:string:string:string:string:string:string:string:string:string:string"; public DescTableDesc() { } @@ -80,7 +83,10 @@ public String getTable() { return table; } - public static String getSchema() { + public static String getSchema(boolean colStats) { + if (colStats) { + return colStatsSchema; + } return schema; } diff --git ql/src/test/queries/clientpositive/display_colstats_tbllvl.q ql/src/test/queries/clientpositive/display_colstats_tbllvl.q new file mode 100644 index 0000000..53a18ee --- /dev/null +++ ql/src/test/queries/clientpositive/display_colstats_tbllvl.q @@ -0,0 +1,69 @@ +DROP TABLE IF EXISTS UserVisits_web_text_none; + +CREATE TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none; + +desc extended UserVisits_web_text_none sourceIP; +desc formatted UserVisits_web_text_none sourceIP; + +explain +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue; + +explain extended +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue; + +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue; +desc formatted UserVisits_web_text_none sourceIP; +desc formatted UserVisits_web_text_none avgTimeOnSite; +desc formatted UserVisits_web_text_none adRevenue; + +CREATE TABLE empty_tab( + a int, + b double, + c string, + d boolean, + e binary) +row format delimited fields terminated by '|' stored as textfile; + +desc formatted empty_tab a; +explain +analyze table empty_tab compute statistics for columns a,b,c,d,e; + +analyze table empty_tab compute statistics for columns a,b,c,d,e; +desc formatted empty_tab a; +desc formatted empty_tab b; + +CREATE DATABASE test; +USE test; + +CREATE TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none; + +desc extended UserVisits_web_text_none sourceIP; +desc extended test.UserVisits_web_text_none sourceIP; +desc extended default.UserVisits_web_text_none sourceIP; +desc formatted UserVisits_web_text_none sourceIP; +desc formatted test.UserVisits_web_text_none sourceIP; +desc formatted default.UserVisits_web_text_none sourceIP; diff --git ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out new file mode 100644 index 0000000..856ade3 --- /dev/null +++ ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -0,0 +1,460 @@ +PREHOOK: query: DROP TABLE IF EXISTS UserVisits_web_text_none +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS UserVisits_web_text_none +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@UserVisits_web_text_none +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@uservisits_web_text_none +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@uservisits_web_text_none +PREHOOK: query: desc extended UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc extended UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +sourceIP string from deserializer +PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +# col_name data_type comment + +sourceIP string from deserializer +PREHOOK: query: explain +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: QUERY +POSTHOOK: query: explain +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_web_text_none + Select Operator + expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) + outputColumnNames: sourceip, avgtimeonsite, adrevenue + Group By Operator + aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: sourceIP, avgTimeOnSite, adRevenue + Column Types: string, int, float + Table: UserVisits_web_text_none + +PREHOOK: query: explain extended +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_ANALYZE + TOK_TAB + TOK_TABNAME + UserVisits_web_text_none + TOK_TABCOLNAME + sourceIP + avgTimeOnSite + adRevenue + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_web_text_none + GatherStats: false + Select Operator + expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) + outputColumnNames: sourceip, avgtimeonsite, adrevenue + Group By Operator + aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: uservisits_web_text_none + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | +#### A masked pattern was here #### + name default.uservisits_web_text_none + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | +#### A masked pattern was here #### + name default.uservisits_web_text_none + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.uservisits_web_text_none + name: default.uservisits_web_text_none + Truncated Path -> Alias: + /uservisits_web_text_none [uservisits_web_text_none] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: sourceIP, avgTimeOnSite, adRevenue + Column Types: string, int, float + Table: UserVisits_web_text_none + Is Table Level Stats: true + +PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: QUERY +PREHOOK: Input: default@uservisits_web_text_none +#### A masked pattern was here #### +POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: QUERY +POSTHOOK: Input: default@uservisits_web_text_none +#### A masked pattern was here #### +PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +sourceIP string null null 0 69 12.763636363636364 13 null null from deserializer +PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +avgTimeOnSite int 0 9 0 11 null null null null from deserializer +PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none adRevenue +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +adRevenue float 0.0 492.98870849609375 0 58 null null null null from deserializer +PREHOOK: query: CREATE TABLE empty_tab( + a int, + b double, + c string, + d boolean, + e binary) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE empty_tab( + a int, + b double, + c string, + d boolean, + e binary) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@empty_tab +PREHOOK: query: desc formatted empty_tab a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@empty_tab +POSTHOOK: query: desc formatted empty_tab a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@empty_tab +# col_name data_type comment + +a int from deserializer +PREHOOK: query: explain +analyze table empty_tab compute statistics for columns a,b,c,d,e +PREHOOK: type: QUERY +POSTHOOK: query: explain +analyze table empty_tab compute statistics for columns a,b,c,d,e +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: empty_tab + Select Operator + expressions: a (type: int), b (type: double), c (type: string), d (type: boolean), e (type: binary) + outputColumnNames: a, b, c, d, e + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16), compute_stats(e, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: a, b, c, d, e + Column Types: int, double, string, boolean, binary + Table: empty_tab + +PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e +PREHOOK: type: QUERY +PREHOOK: Input: default@empty_tab +#### A masked pattern was here #### +POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e +POSTHOOK: type: QUERY +POSTHOOK: Input: default@empty_tab +#### A masked pattern was here #### +PREHOOK: query: desc formatted empty_tab a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@empty_tab +POSTHOOK: query: desc formatted empty_tab a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@empty_tab +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +a int 0 0 0 0 null null null null from deserializer +PREHOOK: query: desc formatted empty_tab b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@empty_tab +POSTHOOK: query: desc formatted empty_tab b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@empty_tab +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +b double 0.0 0.0 0 0 null null null null from deserializer +PREHOOK: query: CREATE DATABASE test +PREHOOK: type: CREATEDATABASE +POSTHOOK: query: CREATE DATABASE test +POSTHOOK: type: CREATEDATABASE +PREHOOK: query: USE test +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE test +POSTHOOK: type: SWITCHDATABASE +PREHOOK: query: CREATE TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:test +POSTHOOK: query: CREATE TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:test +POSTHOOK: Output: test@UserVisits_web_text_none +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: test@uservisits_web_text_none +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: test@uservisits_web_text_none +PREHOOK: query: desc extended UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc extended UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +sourceIP string from deserializer +PREHOOK: query: desc extended test.UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc extended test.UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +sourceIP string from deserializer +PREHOOK: query: desc extended default.UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc extended default.UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +sourceIP string from deserializer +PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +sourceIP string null null 0 69 12.763636363636364 13 null null from deserializer +PREHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +# col_name data_type comment + +sourceIP string from deserializer +PREHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +sourceIP string null null 0 69 12.763636363636364 13 null null from deserializer