diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index e2f696eea7b472d4e417629eedd33406b41caf98..3414e9a525a06df8150e75800d559fa2ba939018 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -267,7 +267,7 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, private List constructColumnStatsFromPackedRows() throws HiveException, MetaException, IOException { - String dbName = SessionState.get().getCurrentDatabase(); + String currentDb = SessionState.get().getCurrentDatabase(); String tableName = work.getColStats().getTableName(); String partName = null; List colName = work.getColStats().getColName(); @@ -286,7 +286,7 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, List fields = soi.getAllStructFieldRefs(); List list = soi.getStructFieldsDataAsList(packedRow.o); - Table tbl = db.getTable(dbName,tableName); + Table tbl = db.getTable(currentDb,tableName); List partColSchema = tbl.getPartCols(); // Partition columns are appended at end, we only care about stats column int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size(); @@ -313,8 +313,8 @@ private void unpackStructObject(ObjectInspector oi, Object o, String fName, } partName = Warehouse.makePartName(partColSchema, partVals); } - - ColumnStatisticsDesc statsDesc = getColumnStatsDesc(dbName, tableName, partName, isTblLevel); + String [] names = Utilities.getDbTableName(currentDb, tableName); + ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, isTblLevel); ColumnStatistics colStats = new ColumnStatistics(); colStats.setStatsDesc(statsDesc); colStats.setStatsObj(statsObjs); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java index 28c17b75356eb4ffaac2193a80b27bdd67d98009..b85282c2472a8fda7f3f6b62b0af4de3ee7fb4a8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java @@ -238,8 +238,8 @@ private ColumnStatistics constructColumnStatsFromInput() } else { throw new SemanticException("Unsupported type"); } - - ColumnStatisticsDesc statsDesc = getColumnStatsDesc(dbName, tableName, + String [] names = Utilities.getDbTableName(dbName, tableName); + ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, partName == null); ColumnStatistics colStat = new ColumnStatistics(); colStat.setStatsDesc(statsDesc); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index c83523e507f1003eff1a821c216a7f6904f293ed..b84ea46c09259cea5ef5ebee0f8553a8fae5ae93 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.session.SessionState; /** * ColumnStatsSemanticAnalyzer. @@ -94,13 +95,9 @@ private boolean isPartitionLevelStats(ASTNode tree) { private Table getTable(ASTNode tree) throws SemanticException { String tableName = getUnescapedName((ASTNode) tree.getChild(0).getChild(0)); - try { - return db.getTable(tableName); - } catch (InvalidTableException e) { - throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName), e); - } catch (HiveException e) { - throw new SemanticException(e.getMessage(), e); - } + String currentDb = SessionState.get().getCurrentDatabase(); + String [] names = Utilities.getDbTableName(currentDb, tableName); + return getTable(names[0], names[1], true); } private Map getPartKeyValuePairsFromAST(ASTNode tree) { @@ -315,6 +312,8 @@ private String genRewrittenQuery(List colNames, int numBitVectors, Map), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: sourceIP, avgTimeOnSite, adRevenue + Column Types: string, int, float + Table: dummydb.uservisits_in_dummy_db + +PREHOOK: query: explain extended +analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_ANALYZE + TOK_TAB + TOK_TABNAME + dummydb + UserVisits_in_dummy_db + columns + TOK_TABCOLNAME + sourceIP + avgTimeOnSite + adRevenue + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_in_dummy_db + GatherStats: false + Select Operator + expressions: sourceip (type: string), avgtimeonsite (type: int), adrevenue (type: float) + outputColumnNames: sourceip, avgtimeonsite, adrevenue + Group By Operator + aggregations: compute_stats(sourceip, 16), compute_stats(avgtimeonsite, 16), compute_stats(adrevenue, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: uservisits_in_dummy_db + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | +#### A masked pattern was here #### + name dummydb.uservisits_in_dummy_db + numFiles 1 + serialization.ddl struct uservisits_in_dummy_db { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite + columns.comments + columns.types string:string:string:float:string:string:string:string:int + field.delim | +#### A masked pattern was here #### + name dummydb.uservisits_in_dummy_db + numFiles 1 + serialization.ddl struct uservisits_in_dummy_db { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7060 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dummydb.uservisits_in_dummy_db + name: dummydb.uservisits_in_dummy_db + Truncated Path -> Alias: + /dummydb.db/uservisits_in_dummy_db [uservisits_in_dummy_db] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: sourceIP, avgTimeOnSite, adRevenue + Column Types: string, int, float + Table: dummydb.uservisits_in_dummy_db + Is Table Level Stats: true + +PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +PREHOOK: type: QUERY +PREHOOK: Input: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue +POSTHOOK: type: QUERY +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +PREHOOK: query: explain +analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain +analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: uservisits_in_dummy_db + Select Operator + expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) + outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite + Group By Operator + aggregations: compute_stats(sourceip, 16), compute_stats(desturl, 16), compute_stats(visitdate, 16), compute_stats(adrevenue, 16), compute_stats(useragent, 16), compute_stats(ccode, 16), compute_stats(lcode, 16), compute_stats(skeyword, 16), compute_stats(avgtimeonsite, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite + Column Types: string, string, string, float, string, string, string, string, int + Table: dummydb.uservisits_in_dummy_db + +PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +#### A masked pattern was here #### +PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL +PREHOOK: type: DESCTABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +destURL string 0 56 48.945454545454545 96 from deserializer +PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue +PREHOOK: type: DESCTABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +adRevenue float 13.099044799804688 492.98870849609375 0 58 from deserializer +PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite +PREHOOK: type: DESCTABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +avgTimeOnSite int 1 9 0 11 from deserializer +PREHOOK: query: drop table dummydb.UserVisits_in_dummy_db +PREHOOK: type: DROPTABLE +PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db +POSTHOOK: query: drop table dummydb.UserVisits_in_dummy_db +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db +PREHOOK: query: drop database dummydb +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:dummydb +PREHOOK: Output: database:dummydb +POSTHOOK: query: drop database dummydb +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:dummydb +POSTHOOK: Output: database:dummydb diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out index a176d9a2a9db7eb848d1db022c4d99680db1b62c..53f06d93001ac7e7e99300cfe0bd0263f4a71369 100644 --- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -97,7 +97,7 @@ STAGE PLANS: Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float - Table: uservisits_web_text_none + Table: default.uservisits_web_text_none PREHOOK: query: explain extended analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue @@ -220,7 +220,7 @@ STAGE PLANS: Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float - Table: uservisits_web_text_none + Table: default.uservisits_web_text_none Is Table Level Stats: true PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue @@ -330,7 +330,7 @@ STAGE PLANS: Column Stats Desc: Columns: a, b, c, d, e Column Types: int, double, string, boolean, binary - Table: empty_tab + Table: default.empty_tab PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index 933d24b6cde934faa013f1ea102b9b753a7d44a4..3f63cbb6b7c67371f18d355fb7e68ce57b731b95 100644 --- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -105,7 +105,7 @@ STAGE PLANS: Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float - Table: uservisits_web_text_none + Table: default.uservisits_web_text_none PREHOOK: query: explain extended analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue @@ -228,7 +228,7 @@ STAGE PLANS: Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float - Table: uservisits_web_text_none + Table: default.uservisits_web_text_none Is Table Level Stats: true PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue @@ -338,7 +338,7 @@ STAGE PLANS: Column Stats Desc: Columns: a, b, c, d, e Column Types: int, double, string, boolean, binary - Table: empty_tab + Table: default.empty_tab PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY