diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 51c3f2c..9589133 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -45,6 +45,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; @@ -163,19 +164,25 @@ public static boolean updateUnpartitionedTableStatsFast(Database db, Table tbl, return updateUnpartitionedTableStatsFast(db, tbl, wh, madeDir, false); } + public static boolean updateUnpartitionedTableStatsFast(Database db, Table tbl, Warehouse wh, + boolean madeDir, boolean forceRecompute) throws MetaException { + return updateUnpartitionedTableStatsFast(tbl, + wh.getFileStatusesForUnpartitionedTable(db, tbl), madeDir, forceRecompute); + } + /** * Updates the numFiles and totalSize parameters for the passed unpartitioned Table by querying * the warehouse if the passed Table does not already have values for these parameters. - * @param db * @param tbl - * @param wh + * @param fileStatus * @param newDir if true, the directory was just created and can be assumed to be empty * @param forceRecompute Recompute stats even if the passed Table already has * these parameters set * @return true if the stats were updated, false otherwise */ - public static boolean updateUnpartitionedTableStatsFast(Database db, Table tbl, Warehouse wh, - boolean newDir, boolean forceRecompute) throws MetaException { + public static boolean updateUnpartitionedTableStatsFast(Table tbl, + FileStatus[] fileStatus, boolean newDir, boolean forceRecompute) throws MetaException { + Map params = tbl.getParameters(); boolean updated = false; if (forceRecompute || @@ -188,7 +195,6 @@ public static boolean updateUnpartitionedTableStatsFast(Database db, Table tbl, // The table location already exists and may contain data. // Let's try to populate those stats that don't require full scan. LOG.info("Updating table stats fast for " + tbl.getTableName()); - FileStatus[] fileStatus = wh.getFileStatusesForUnpartitionedTable(db, tbl); populateQuickStats(fileStatus, params); LOG.info("Updated size of table " + tbl.getTableName() +" to "+ params.get(StatsSetupConst.TOTAL_SIZE)); if(!params.containsKey(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 4cf98d8..84ac477 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -5,6 +5,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -13,22 +14,33 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.HiveObjectRef; import org.apache.hadoop.hive.metastore.api.HiveObjectType; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; +import org.apache.hadoop.hive.metastore.api.TableStatsRequest; import org.apache.hadoop.hive.metastore.api.UnknownDBException; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.thrift.TException; public class SessionHiveMetaStoreClient extends HiveMetaStoreClient implements IMetaStoreClient { @@ -71,6 +83,12 @@ protected void drop_table_with_environment_context(String dbname, String name, // First try temp table org.apache.hadoop.hive.metastore.api.Table table = getTempTable(dbname, name); if (table != null) { + try { + deleteTempTableColumnStatsForTable(dbname, name); + } catch (NoSuchObjectException err){ + // No stats to delete, forgivable error. + LOG.info(err); + } dropTempTable(table, deleteData, envContext); return; } @@ -217,6 +235,41 @@ public PrincipalPrivilegeSet get_privilege_set(HiveObjectRef hiveObject, return super.get_privilege_set(hiveObject, userName, groupNames); } + /** {@inheritDoc} */ + @Override + public boolean updateTableColumnStatistics(ColumnStatistics statsObj) + throws NoSuchObjectException, InvalidObjectException, MetaException, TException, + InvalidInputException { + String dbName = statsObj.getStatsDesc().getDbName().toLowerCase(); + String tableName = statsObj.getStatsDesc().getTableName().toLowerCase(); + if (getTempTable(dbName, tableName) != null) { + return updateTempTableColumnStats(dbName, tableName, statsObj); + } + return super.updateTableColumnStatistics(statsObj); + } + + /** {@inheritDoc} */ + @Override + public List getTableColumnStatistics(String dbName, String tableName, + List colNames) throws NoSuchObjectException, MetaException, TException, + InvalidInputException, InvalidObjectException { + if (getTempTable(dbName, tableName) != null) { + return getTempTableColumnStats(dbName, tableName, colNames); + } + return super.getTableColumnStatistics(dbName, tableName, colNames); + } + + /** {@inheritDoc} */ + @Override + public boolean deleteTableColumnStatistics(String dbName, String tableName, String colName) + throws NoSuchObjectException, InvalidObjectException, MetaException, TException, + InvalidInputException { + if (getTempTable(dbName, tableName) != null) { + return deleteTempTableColumnStats(dbName, tableName, colName); + } + return super.deleteTableColumnStatistics(dbName, tableName, colName); + } + private void createTempTable(org.apache.hadoop.hive.metastore.api.Table tbl, EnvironmentContext envContext) throws AlreadyExistsException, InvalidObjectException, MetaException, NoSuchObjectException, TException { @@ -277,15 +330,19 @@ private void alterTempTable(String dbname, String tbl_name, org.apache.hadoop.hive.metastore.api.Table oldt, org.apache.hadoop.hive.metastore.api.Table newt, EnvironmentContext envContext) throws InvalidOperationException, MetaException, TException { - Table newTable = new Table(deepCopyAndLowerCaseTable(newt)); dbname = dbname.toLowerCase(); tbl_name = tbl_name.toLowerCase(); + boolean shouldDeleteColStats = false; // Disallow changing temp table location if (!newt.getSd().getLocation().equals(oldt.getSd().getLocation())) { throw new MetaException("Temp table location cannot be changed"); } + org.apache.hadoop.hive.metastore.api.Table newtCopy = deepCopyAndLowerCaseTable(newt); + MetaStoreUtils.updateUnpartitionedTableStatsFast(newtCopy, + wh.getFileStatusesForSD(newtCopy.getSd()), false, true); + Table newTable = new Table(newtCopy); String newDbName = newTable.getDbName(); String newTableName = newTable.getTableName(); if (!newDbName.equals(oldt.getDbName()) || !newTableName.equals(oldt.getTableName())) { @@ -303,6 +360,7 @@ private void alterTempTable(String dbname, String tbl_name, if (tables == null || tables.remove(tbl_name) == null) { throw new MetaException("Could not find temp table entry for " + dbname + "." + tbl_name); } + shouldDeleteColStats = true; tables = getTempTablesForDatabase(newDbName); if (tables == null) { @@ -311,8 +369,50 @@ private void alterTempTable(String dbname, String tbl_name, } tables.put(newTableName, newTable); } else { + if (haveTableColumnsChanged(oldt, newt)) { + shouldDeleteColStats = true; + } getTempTablesForDatabase(dbname).put(tbl_name, newTable); } + + if (shouldDeleteColStats) { + try { + deleteTempTableColumnStatsForTable(dbname, tbl_name); + } catch (NoSuchObjectException err){ + // No stats to delete, forgivable error. + LOG.info(err); + } + } + } + + private static boolean haveTableColumnsChanged(org.apache.hadoop.hive.metastore.api.Table oldt, + org.apache.hadoop.hive.metastore.api.Table newt) { + List oldCols = oldt.getSd().getCols(); + List newCols = newt.getSd().getCols(); + if (oldCols.size() != newCols.size()) { + return true; + } + Iterator oldColsIter = oldCols.iterator(); + Iterator newColsIter = newCols.iterator(); + while (oldColsIter.hasNext()) { + // Don't use FieldSchema.equals() since it also compares comments, + // which is unnecessary for this method. + if (!fieldSchemaEqualsIgnoreComment(oldColsIter.next(), newColsIter.next())) { + return true; + } + } + return false; + } + + private static boolean fieldSchemaEqualsIgnoreComment(FieldSchema left, FieldSchema right) { + // Just check name/type for equality, don't compare comment + if (!left.getName().equals(right.getName())) { + return true; + } + if (!left.getType().equals(right.getType())) { + return true; + } + return false; } private void dropTempTable(org.apache.hadoop.hive.metastore.api.Table table, boolean deleteData, @@ -373,4 +473,102 @@ private void dropTempTable(org.apache.hadoop.hive.metastore.api.Table table, boo } return ss.getTempTables().get(dbName); } + + private Map getTempTableColumnStatsForTable(String dbName, + String tableName) { + SessionState ss = SessionState.get(); + if (ss == null) { + LOG.debug("No current SessionState, skipping temp tables"); + return null; + } + String lookupName = StatsUtils.getFullyQualifiedTableName(dbName.toLowerCase(), + tableName.toLowerCase()); + return ss.getTempTableColStats().get(lookupName); + } + + private static List copyColumnStatisticsObjList(Map csoMap) { + List retval = new ArrayList(csoMap.size()); + for (ColumnStatisticsObj cso : csoMap.values()) { + retval.add(new ColumnStatisticsObj(cso)); + } + return retval; + } + + private List getTempTableColumnStats(String dbName, String tableName, + List colNames) { + Map tableColStats = + getTempTableColumnStatsForTable(dbName, tableName); + List retval = new ArrayList(); + + if (tableColStats != null) { + for (String colName : colNames) { + colName = colName.toLowerCase(); + if (tableColStats.containsKey(colName)) { + retval.add(new ColumnStatisticsObj(tableColStats.get(colName))); + } + } + } + return retval; + } + + private boolean updateTempTableColumnStats(String dbName, String tableName, + ColumnStatistics colStats) throws MetaException { + SessionState ss = SessionState.get(); + if (ss == null) { + throw new MetaException("No current SessionState, cannot update temporary table stats for " + + dbName + "." + tableName); + } + Map ssTableColStats = + getTempTableColumnStatsForTable(dbName, tableName); + if (ssTableColStats == null) { + // Add new entry for this table + ssTableColStats = new HashMap(); + ss.getTempTableColStats().put( + StatsUtils.getFullyQualifiedTableName(dbName, tableName), + ssTableColStats); + } + mergeColumnStats(ssTableColStats, colStats); + return true; + } + + private static void mergeColumnStats(Map oldStats, + ColumnStatistics newStats) { + List newColList = newStats.getStatsObj(); + if (newColList != null) { + for (ColumnStatisticsObj colStat : newColList) { + // This is admittedly a bit simple, StatsObjectConverter seems to allow + // old stats attributes to be kept if the new values do not overwrite them. + oldStats.put(colStat.getColName().toLowerCase(), colStat); + } + } + } + + private boolean deleteTempTableColumnStatsForTable(String dbName, String tableName) + throws NoSuchObjectException { + Map deletedEntry = + getTempTableColumnStatsForTable(dbName, tableName); + if (deletedEntry != null) { + SessionState.get().getTempTableColStats().remove( + StatsUtils.getFullyQualifiedTableName(dbName, tableName)); + } else { + throw new NoSuchObjectException("Column stats doesn't exist for db=" + dbName + + " temp table=" + tableName); + } + return true; + } + + private boolean deleteTempTableColumnStats(String dbName, String tableName, String columnName) + throws NoSuchObjectException { + ColumnStatisticsObj deletedEntry = null; + Map ssTableColStats = + getTempTableColumnStatsForTable(dbName, tableName); + if (ssTableColStats != null) { + deletedEntry = ssTableColStats.remove(columnName.toLowerCase()); + } + if (deletedEntry == null) { + throw new NoSuchObjectException("Column stats doesn't exist for db=" + dbName + + " temp table=" + tableName); + } + return true; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 3f8648b..44c193f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -363,7 +363,6 @@ public ColumnStatsSemanticAnalyzer(HiveConf conf, ASTNode tree) throws SemanticE originalTree = tree; boolean isPartitionStats = isPartitionLevelStats(tree); Map partSpec = null; - checkIfTemporaryTable(); checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys())); validateSpecifiedColumnNames(colNames); if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned()) { @@ -414,13 +413,6 @@ private void checkForPartitionColumns(List specifiedCols, List p } } - private void checkIfTemporaryTable() throws SemanticException { - if (tbl.isTemporary()) { - throw new SemanticException(tbl.getTableName() - + " is a temporary table. Column statistics are not supported on temporary tables."); - } - } - @Override public void analyze(ASTNode ast, Context origCtx) throws SemanticException { QB qb; diff --git ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 9798cf3..df66f83 100644 --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -45,6 +45,8 @@ import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.ql.MapRedStats; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager; @@ -86,6 +88,8 @@ private static final String HDFS_SESSION_PATH_KEY = "_hive.hdfs.session.path"; private static final String TMP_TABLE_SPACE_KEY = "_hive.tmp_table_space"; private final Map> tempTables = new HashMap>(); + private final Map> tempTableColStats = + new HashMap>(); protected ClassLoader parentLoader; @@ -1145,6 +1149,10 @@ public void applyAuthorizationPolicy() throws HiveException { return tempTables; } + public Map> getTempTableColStats() { + return tempTableColStats; + } + /** * @return ip address for user running the query */ diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 7cb7c5e..f664a7b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1166,6 +1166,10 @@ public static String getFullyQualifiedColumnName(String dbName, String tabName, return getFullyQualifiedName(dbName, tabName, partName, colName); } + public static String getFullyQualifiedTableName(String dbName, String tabName) { + return getFullyQualifiedName(dbName, tabName); + } + private static String getFullyQualifiedName(String... names) { List nonNullAndEmptyNames = Lists.newArrayList(); for (String name : names) { diff --git ql/src/test/queries/clientnegative/temp_table_column_stats.q ql/src/test/queries/clientnegative/temp_table_column_stats.q deleted file mode 100644 index 9b7aa4a..0000000 --- ql/src/test/queries/clientnegative/temp_table_column_stats.q +++ /dev/null @@ -1,5 +0,0 @@ -create temporary table tmp1 (c1 string); --- table-level stats should work -analyze table tmp1 compute statistics; --- column stats should fail -analyze table tmp1 compute statistics for columns; diff --git ql/src/test/queries/clientpositive/temp_table_display_colstats_tbllvl.q ql/src/test/queries/clientpositive/temp_table_display_colstats_tbllvl.q new file mode 100644 index 0000000..39a11f2 --- /dev/null +++ ql/src/test/queries/clientpositive/temp_table_display_colstats_tbllvl.q @@ -0,0 +1,78 @@ +-- Based on display_colstats_tbllvl.q, output should be almost exactly the same. +DROP TABLE IF EXISTS UserVisits_web_text_none; + +-- Hack, set external location because generated filename changes during test runs +CREATE TEMPORARY EXTERNAL TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +location 'pfile://${system:test.tmp.dir}/uservisits_web_text_none'; + +LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none; + +desc extended UserVisits_web_text_none sourceIP; +desc formatted UserVisits_web_text_none sourceIP; + +explain +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue; + +explain extended +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue; + +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue; +desc formatted UserVisits_web_text_none sourceIP; +desc formatted UserVisits_web_text_none avgTimeOnSite; +desc formatted UserVisits_web_text_none adRevenue; + +CREATE TEMPORARY TABLE empty_tab( + a int, + b double, + c string, + d boolean, + e binary) +row format delimited fields terminated by '|' stored as textfile; + +desc formatted empty_tab a; +explain +analyze table empty_tab compute statistics for columns a,b,c,d,e; + +analyze table empty_tab compute statistics for columns a,b,c,d,e; +desc formatted empty_tab a; +desc formatted empty_tab b; + +CREATE DATABASE test; +USE test; + +CREATE TEMPORARY TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none; + +desc extended UserVisits_web_text_none sourceIP; +desc extended test.UserVisits_web_text_none sourceIP; +desc extended default.UserVisits_web_text_none sourceIP; +desc formatted UserVisits_web_text_none sourceIP; +desc formatted test.UserVisits_web_text_none sourceIP; +desc formatted default.UserVisits_web_text_none sourceIP; + +analyze table UserVisits_web_text_none compute statistics for columns sKeyword; +desc extended UserVisits_web_text_none sKeyword; +desc formatted UserVisits_web_text_none sKeyword; +desc formatted test.UserVisits_web_text_none sKeyword; + diff --git ql/src/test/results/clientnegative/temp_table_column_stats.q.out ql/src/test/results/clientnegative/temp_table_column_stats.q.out deleted file mode 100644 index 486597a..0000000 --- ql/src/test/results/clientnegative/temp_table_column_stats.q.out +++ /dev/null @@ -1,19 +0,0 @@ -PREHOOK: query: create temporary table tmp1 (c1 string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tmp1 -POSTHOOK: query: create temporary table tmp1 (c1 string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tmp1 -PREHOOK: query: -- table-level stats should work -analyze table tmp1 compute statistics -PREHOOK: type: QUERY -PREHOOK: Input: default@tmp1 -PREHOOK: Output: default@tmp1 -POSTHOOK: query: -- table-level stats should work -analyze table tmp1 compute statistics -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmp1 -POSTHOOK: Output: default@tmp1 -FAILED: SemanticException tmp1 is a temporary table. Column statistics are not supported on temporary tables. diff --git ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out new file mode 100644 index 0000000..b021b70 --- /dev/null +++ ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -0,0 +1,510 @@ +PREHOOK: query: -- Based on display_colstats_tbllvl.q, output should be almost exactly the same. +DROP TABLE IF EXISTS UserVisits_web_text_none +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Based on display_colstats_tbllvl.q, output should be almost exactly the same. +DROP TABLE IF EXISTS UserVisits_web_text_none +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- Hack, set external location because generated filename changes during test runs +CREATE TEMPORARY EXTERNAL TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@UserVisits_web_text_none +POSTHOOK: query: -- Hack, set external location because generated filename changes during test runs +CREATE TEMPORARY EXTERNAL TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@UserVisits_web_text_none +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@uservisits_web_text_none +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@uservisits_web_text_none +PREHOOK: query: desc extended UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc extended UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +sourceIP string from deserializer +PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +sourceIP string from deserializer +PREHOOK: query: explain +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: QUERY +POSTHOOK: query: explain +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_web_text_none + Select Operator + expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) + outputColumnNames: sourceip, avgtimeonsite, adrevenue + Group By Operator + aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: sourceIP, avgTimeOnSite, adRevenue + Column Types: string, int, float + Table: uservisits_web_text_none + +PREHOOK: query: explain extended +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_ANALYZE + TOK_TAB + TOK_TABNAME + UserVisits_web_text_none + columns + TOK_TABCOLNAME + sourceIP + avgTimeOnSite + adRevenue + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_web_text_none + GatherStats: false + Select Operator + expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) + outputColumnNames: sourceip, avgtimeonsite, adrevenue + Group By Operator + aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: uservisits_web_text_none + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + EXTERNAL TRUE + bucket_count -1 + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | +#### A masked pattern was here #### + name default.uservisits_web_text_none + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + EXTERNAL TRUE + bucket_count -1 + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | +#### A masked pattern was here #### + name default.uservisits_web_text_none + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.uservisits_web_text_none + name: default.uservisits_web_text_none + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: sourceIP, avgTimeOnSite, adRevenue + Column Types: string, int, float + Table: uservisits_web_text_none + Is Table Level Stats: true + +PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: QUERY +PREHOOK: Input: default@uservisits_web_text_none +#### A masked pattern was here #### +POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: QUERY +POSTHOOK: Input: default@uservisits_web_text_none +#### A masked pattern was here #### +PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +sourceIP string 0 69 12.763636363636364 13 from deserializer +PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +avgTimeOnSite int 1 9 0 11 from deserializer +PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none adRevenue +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +adRevenue float 13.099044799804688 492.98870849609375 0 58 from deserializer +PREHOOK: query: CREATE TEMPORARY TABLE empty_tab( + a int, + b double, + c string, + d boolean, + e binary) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@empty_tab +POSTHOOK: query: CREATE TEMPORARY TABLE empty_tab( + a int, + b double, + c string, + d boolean, + e binary) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@empty_tab +PREHOOK: query: desc formatted empty_tab a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@empty_tab +POSTHOOK: query: desc formatted empty_tab a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@empty_tab +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +a int from deserializer +PREHOOK: query: explain +analyze table empty_tab compute statistics for columns a,b,c,d,e +PREHOOK: type: QUERY +POSTHOOK: query: explain +analyze table empty_tab compute statistics for columns a,b,c,d,e +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: empty_tab + Select Operator + expressions: a (type: int), b (type: double), c (type: string), d (type: boolean), e (type: binary) + outputColumnNames: a, b, c, d, e + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16), compute_stats(c, 16), compute_stats(d, 16), compute_stats(e, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: a, b, c, d, e + Column Types: int, double, string, boolean, binary + Table: empty_tab + +PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e +PREHOOK: type: QUERY +PREHOOK: Input: default@empty_tab +#### A masked pattern was here #### +POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e +POSTHOOK: type: QUERY +POSTHOOK: Input: default@empty_tab +#### A masked pattern was here #### +PREHOOK: query: desc formatted empty_tab a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@empty_tab +POSTHOOK: query: desc formatted empty_tab a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@empty_tab +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +a int 0 0 0 0 from deserializer +PREHOOK: query: desc formatted empty_tab b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@empty_tab +POSTHOOK: query: desc formatted empty_tab b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@empty_tab +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +b double 0.0 0.0 0 0 from deserializer +PREHOOK: query: CREATE DATABASE test +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:test +POSTHOOK: query: CREATE DATABASE test +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:test +PREHOOK: query: USE test +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:test +POSTHOOK: query: USE test +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:test +PREHOOK: query: CREATE TEMPORARY TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:test +PREHOOK: Output: test@UserVisits_web_text_none +POSTHOOK: query: CREATE TEMPORARY TABLE UserVisits_web_text_none ( + sourceIP string, + destURL string, + visitDate string, + adRevenue float, + userAgent string, + cCode string, + lCode string, + sKeyword string, + avgTimeOnSite int) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:test +POSTHOOK: Output: test@UserVisits_web_text_none +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: test@uservisits_web_text_none +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/UserVisits.dat" INTO TABLE UserVisits_web_text_none +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: test@uservisits_web_text_none +PREHOOK: query: desc extended UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc extended UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +sourceIP string from deserializer +PREHOOK: query: desc extended test.UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc extended test.UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +sourceIP string from deserializer +PREHOOK: query: desc extended default.UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc extended default.UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +sourceIP string from deserializer +PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +sourceIP string from deserializer +PREHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc formatted test.UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +sourceIP string from deserializer +PREHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@uservisits_web_text_none +POSTHOOK: query: desc formatted default.UserVisits_web_text_none sourceIP +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +sourceIP string 0 69 12.763636363636364 13 from deserializer +PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword +PREHOOK: type: QUERY +PREHOOK: Input: test@uservisits_web_text_none +#### A masked pattern was here #### +POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword +POSTHOOK: type: QUERY +POSTHOOK: Input: test@uservisits_web_text_none +#### A masked pattern was here #### +PREHOOK: query: desc extended UserVisits_web_text_none sKeyword +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc extended UserVisits_web_text_none sKeyword +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +sKeyword string from deserializer +PREHOOK: query: desc formatted UserVisits_web_text_none sKeyword +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc formatted UserVisits_web_text_none sKeyword +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +sKeyword string 0 49 7.872727272727273 19 from deserializer +PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword +PREHOOK: type: DESCTABLE +PREHOOK: Input: test@uservisits_web_text_none +POSTHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: test@uservisits_web_text_none +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +sKeyword string 0 49 7.872727272727273 19 from deserializer