diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 211f474..a373bb1 100644
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -548,6 +548,9 @@ public class HiveConf extends Configuration {
HIVE_STATS_RELIABLE("hive.stats.reliable", false),
// Collect table access keys information for operators that can benefit from bucketing
HIVE_STATS_COLLECT_TABLEKEYS("hive.stats.collect.tablekeys", false),
+ // standard error allowed for ndv estimates. A lower value indicates higher accuracy and a
+ // higher compute cost.
+ HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0),
// Concurrency
HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false),
diff --git conf/hive-default.xml.template conf/hive-default.xml.template
index 93a86ec..abfff34 100644
--- conf/hive-default.xml.template
+++ conf/hive-default.xml.template
@@ -1041,6 +1041,13 @@
+ hive.stats.ndv.error
+ 20.0
+ Standard error expressed in percentage. Provides a tradeoff between accuracy and compute cost.A lower value for error indicates higher accuracy and a higher compute cost.
+
+
+
+
hive.support.concurrency
false
Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.
diff --git data/files/UserVisits.dat data/files/UserVisits.dat
new file mode 100644
index 0000000..f56c5a1
--- /dev/null
+++ data/files/UserVisits.dat
@@ -0,0 +1,55 @@
+170.131.22.2|13rdgckzlcblruc.html|1984-8-7|336.869186722|NuSearch Spider|HUN|HUN-NL|remnants|3
+162.114.4.2|6xpirzjeytxdjsmwtmyeugkesratmpvamliekrijlgmvyyrslqwgw.html|1978-1-9|331.791153595|Superdownloads Spiderma|AUT|AUT-ZR|MHD|8
+177.110.45.18|11zvmoamsyaameokoeylbkivgquksibqbalnpmailbiyfxitbhfdroyxesixbjndkyqzl.html|1986-9-25|411.968497603|Mozilla/4.0|FLK|FLK-GB|apj@as.arizona.edu.|7
+157.111.12.37|44mvdnls.html|2002-7-3|486.660926201|PHP/4.0.|FIN|FIN-CZ|diffuse|3
+161.100.45.22|14ceyigx.html|1978-10-26|399.80234522|NP/0.1|BEN|BEN-CA|region|8
+164.118.48.16|28axfinfqwdcwoorukpwqvqoxxeuivbniclnkytavwdslrj.html|1996-12-8|298.335411612|MSNBOT_Mobile MSMOBOT Mozilla/2.0|USA|USA-IO|medium|1
+153.104.13.11|19aysprojntmnwymfdkaznbqxprxxaissjqkzhzivsvipuvuxfuxsvnqlfnigvby.html|1976-10-6|146.309480768|WebSearch.COM.AU/3.0.1|MCO|MCO-YD|state|5
+150.112.45.27|12hcaewxiswjeezfnlulkenwubaqsitpuarufosogoxls.html|1995-6-19|173.469334335|WinkBot/0.06|PHL|PHL-NN|important|2
+152.108.39.16|36umg.html|1974-3-28|269.969215988|GSiteCrawler/v1.xx rev. xxx|MNG|MNG-HI|...)|6
+174.119.41.16|60yxoboskwpyfin.html|2002-7-17|436.113482675|Infoseek SideWinder/2.0B|NIC|NIC-JP|data|1
+165.116.21.12|70difiadhmrvragggmoaufnuwwbakbjntnwzvxcdjtybufiarwbmcphzmizwkikewh.html|1984-2-6|13.099044572|WWWeasel Robot v1.00|THA|THA-FO|bubbles|6
+155.128.42.14|21brkepinqiwvtmfmebjckkhwevhxaesogkykzgyqpuligrul.html|1986-7-29|347.800952938|Mozilla/4.0 compatible ZyBorg/1.0|IRN|IRN-YS|conduction|1
+156.131.31.12|14nbaaoablhxrlvbfgrwcxktvshtkoqzddbdepegbmesxztdglzjjkc.html|2002-7-30|85.7691140217|Java1.1.xx.|BRA|BRA-BL|circumstellar|9
+159.122.42.18|4xfydvopxveeduudfzodxkbczvdlzou.html|1989-9-20|332.572440865|Metaeuro Web Crawler/0.2|LUX|LUX-SD|kinematics|7
+151.104.39.45|65psclahgvasawczpyicyxkuqzwpbowghmzkxzsdvtwwpzvfydiwbsqrrmhtbezjqyuo.html|2002-1-13|190.528735328|JobSpider_BA/1.|UGA|UGA-PY|pulsars:|7
+159.132.24.22|18vhcbzhhblfbayejcybyibwqsgzlkmswizyjzgrbrw.html|1978-1-2|182.368755789|Piffany_Web_Scraper_v0.|ITA|ITA-NJ|nonthermal|1
+170.101.17.16|40prmxavsjoizdzkgsncesndxebatfwvrmmejnacxol.html|1989-9-1|41.4163486896|Mozilla/4.01 [en]|ZAF|ZAF-AK|Scuti|6
+171.124.38.2|29nripzogexadckoiaoafxvtkrxksdqgveydtxsabpbfsltbmibrfwlqojagmr.html|1979-6-12|192.085693167|IconSurf/2.0 favicon monitor|SVN|SVN-DY|systems|5
+178.128.29.41|24tmrndfialwvkwybuspjyexlkiamebwtvilimqqncnimkgofzepximj.html|2000-7-8|276.89796127|obidos-bot|SLB|SLB-RL|(...|4
+175.101.24.43|70dcfbcotdzhfhuhquyosbcviglrkrakddmifpxzswg.html|1978-3-16|131.775726872|Mozilla/4.0|BMU|BMU-BR|spiral|6
+155.102.37.30|99cyllzbnsowifxdxsdmiseiceeriaaoucmgnlhaewxmbvqynulwmpepujhckhqfjdmxpuyt.html|1975-5-4|311.052004479|WebSearch.COM.AU/3.0.1|NLD|NLD-GX|Herbig-Haro|6
+156.105.11.18|1nczmzpivhbgn.html|1992-9-19|36.9747263531|Search/1.0|GLP|GLP-DJ|observations|3
+164.115.38.23|79bvcojctkaugbcterbzfykwvesklokgilbkalntvoocqqvuixunvekqjcburlbzxckxnyrjm.html|1991-4-20|267.047961774|Journster.com RSS/Atom aggregator 0.5|HKG|HKG-PK|radio|2
+179.133.2.36|12azizhsdhdgdpidjgmdeyzmfhdwsbezbeyjegcioforvxvfehjigiulqyhizmhargkwmmeartsnrosvvbdbkynawvi.html|1999-12-9|481.463770712|LeechGet 200x|SCG|SCG-XF|instruments|8
+178.107.45.18|45mbziaowxegkhzcmbsyrextgqjbyezodmqduqrqnwxydwaqytopxmidcsfbwfparfemvwdjtaiwxjnvcclaotdrmjs.html|1983-4-13|51.6686671965|IlTrovatore/1.2|HND|HND-AN|dynamics|2
+162.117.17.14|17tkabzxynnqswezhqmkvrlfycpmxqowlhgligihuwxmscmasylopwuozjawaotlwaxfggmack.html|2001-12-24|161.048060104|Mozilla/4.5 [en]C-CCK-MCD {TLC;RETAIL}|RWA|RWA-QE|rays|9
+178.119.40.7|48amqtmqxsjgrmjkszztfpegqzapidysnze.html|1987-4-3|492.988714137|Mozilla/4.0|AUT|AUT-ZR|cosmology:|8
+160.119.18.18|15yufqaoxpuqwb.html|1979-7-22|394.694548614|scooter-venus-3.0.vn|MCO|MCO-YD|outflows|1
+162.112.21.25|21boum.html|1991-2-6|165.368136543|LinkProver 2.|TCA|TCA-IS|spots|8
+176.112.31.17|20gblxgjcvpu.html|1991-8-5|78.2740990152|Mozilla/4.0|BMU|BMU-BR|masses|2
+166.130.12.13|9izokfebomgsiifyzrsepbbemutvj.html|2003-12-5|188.600736756|WWW-Mechanize/1.1|TGO|TGO-WB|bursts|5
+171.100.18.39|97sxfsgahjujwzlszmxkahyslcobrrlx.html|1985-11-21|143.277058506|Overture-WebCrawler/3.8/Fresh|SAU|SAU-KL|interferometric|5
+152.122.43.35|85zdszgzonsxkqbrkthtceiuzjsedwvghvkzvqzj.html|1989-12-1|315.628996565|moget/x.x|UMI|UMI-VU|Galaxy:|2
+157.133.36.37|15xnilzhtqjsxhhbzazrflznupllyhvdbsqjeqqyharfiyhhyhzdszrnpcyoktslljvqam.html|1990-3-20|426.498017786|PrivacyFinder/1.|UZB|UZB-ZJ|nebulae|7
+161.134.11.11|96kvrofepctfbesrphjiznjktygntkkubupsjvxyxrdzvwrkeasdobohauvueg.html|1984-6-6|280.039128409|Waypath development crawler - info at waypath dot co|IDN|IDN-BH|supergiants|6
+163.123.23.13|19rkrtwumqwmnnzisxyeesqacwolpypyxhipaejnvfzitzrlwqqbigblcqxrpnqmuybudkiyqhhjgzvdpleysg.html|1977-10-11|86.3390049695|Opera/5.0|LSO|LSO-PW|testing|7
+166.126.40.21|52ejufqiidwioozorbnsjxezfwaucndbihldnblvehdtwchoeuhoslnyioslbwmkdynrzymegpy.html|1990-10-20|125.582281932|Mozilla/4.0|BTN|BTN-HP|catalogs|9
+158.133.10.19|87nzdhsnzhkylakazmkvctgaaxtrafpxscxvjqijxthitrj.html|1982-10-5|481.583542862|larbin|GAB|GAB-CS|angular|8
+173.104.45.8|49sdptdphxjlbiwrbbrsebwqquadx.html|1981-5-2|41.3182727245|LECodeChecker/3.0 libgetdoc/1.|AUS|AUS-AV|bands|6
+160.101.31.43|6lrepnctlanokfhla.html|1973-9-7|133.29867101|sogou develop spide|SWE|SWE-TM|time|5
+150.127.33.8|22oeawpxhqahkvtaecwp.html|1999-3-16|398.882494477|W3C-WebCon/5.x.x libwww/5.x.|ISR|ISR-DY|history|1
+154.114.47.36|2mzzsgievabpkaoqegadbbjxwkutdisnvrmox.html|1981-7-24|332.760102125|mammoth/1.0|AUT|AUT-ZR|FUNCTION|3
+155.108.15.24|22beewtbnpw.html|1996-6-7|393.470347637|Scrubby/3.0|ABW|ABW-NB|horizontal-branch|4
+177.120.40.39|48itvyjulckeddslsuayoguojzhvqvmfgvyctiwflhj.html|1977-8-12|239.601807636|webmeasurement-bot, http://rvs.informatik.uni-leipzig.d|WSM|WSM-UF|are|3
+179.123.41.31|46eppnympstjuhivvpritvotqmivgsfmdkbtxafns.html|2001-11-26|258.55616439|Mozilla/2.0|SYR|SYR-XP|photometric|1
+175.100.9.4|32fjrnrlabonc.html|1988-10-22|344.394849153|Snapbot/1.|GUF|GUF-KP|acceleration|2
+155.126.7.17|72wufwnsdsqncftnvdcunnknzqnaiyflmcgsytkbmbpogicblew.html|1981-12-5|398.334494319|UKWizz/Nutch-0.8.1|NIC|NIC-JP|Kuiper|4
+150.118.20.31|1mbyargbxtnjtivflxzzredcfbtehxbxjcwkucmrwaaqiwvutuulzxnezhi.html|1982-8-27|168.936669894|Mozilla/4.0|IRL|IRL-NN|cataclysmic|5
+177.116.39.36|84maivbmcqggefkjtsde.html|1982-6-11|88.121669797|Mozilla/4.0|ARE|ARE-MX|instruments|1
+168.119.19.26|73vhjursdvxateuvrxsspwwfdbsoqfegeannuegyadzuitparisgfomiqfxhkcnocacxfivfmuzuopvfynmdcyl.html|1991-11-17|397.829289621|webbandit/4.xx.|NIC|NIC-JP|dust|2
+154.100.36.32|57rylepuglpfqvjwkxgrtftvqkjzjwsznjyzontuzizqdimofsfzxzuojeot.html|1999-1-5|334.714055649|RRC|GTM|GTM-VH|blue|7
+153.112.2.11|6pkwxtlgkkxoqtxpgrullqxjauquvmlkcwhzpsgzdeotymieddqpu.html|1975-8-6|348.218411093|Wotbox/alpha0.6|MNP|MNP-UD|supernovae:|6
+150.107.15.22|53gohsgrvrjgfptttlpfipgsnijsrhxsyeggwnysfhykxrdqdsvlicdwkmpcumut.html|1978-8-2|355.771603423|Mozilla/3.0|DEU|DEU-PU|stars|4
+150.126.27.44|0rgxbnwiqebsmszpkvfpxvhkleebngzxxgvzt.html|1989-5-18|467.800755054|Mozilla/3.01|ZWE|ZWE-TS|system|3
+151.101.32.3|34btbqii.html|1998-8-1|131.055972797|Orca Browser|THA|THA-FO|late-type|5
diff --git data/files/binary.txt data/files/binary.txt
new file mode 100644
index 0000000..c6fbdee
--- /dev/null
+++ data/files/binary.txt
@@ -0,0 +1,10 @@
+the quick brown fox jumped over the lazy little dog
+today is nice outside
+the quick brown fox jumped over the lazy little dog
+
+wikipedia is a great source of information
+the quick brown fox jumped over the lazy little dog
+
+estimating the number of distinct values is a hard problem
+
+the quick brown fox jumped over the lazy little dog
diff --git data/files/bool.txt data/files/bool.txt
new file mode 100644
index 0000000..065359b
--- /dev/null
+++ data/files/bool.txt
@@ -0,0 +1,33 @@
+true
+false
+true
+true
+true
+false
+false
+false
+false
+true
+true
+true
+true
+false
+
+false
+true
+true
+false
+false
+false
+false
+false
+false
+false
+false
+true
+false
+false
+false
+true
+true
+false
diff --git data/files/double.txt data/files/double.txt
new file mode 100644
index 0000000..66c030b
--- /dev/null
+++ data/files/double.txt
@@ -0,0 +1,16 @@
+55.33
+44.2
+435.33
+324.33
+324.33
+44.2
+55.3
+55.3
+0.0
+
+66.4
+23.22
+-87.2
+
+33.44
+55.3
diff --git data/files/employee.dat data/files/employee.dat
new file mode 100644
index 0000000..c9d0197
--- /dev/null
+++ data/files/employee.dat
@@ -0,0 +1,13 @@
+16|john
+17|robert
+18|andrew
+19|katty
+21|tom
+22|tim
+23|james
+24|paul
+27|edward
+29|alan
+31|kerry
+34|terri
+
diff --git data/files/employee2.dat data/files/employee2.dat
new file mode 100644
index 0000000..97fd05a
--- /dev/null
+++ data/files/employee2.dat
@@ -0,0 +1,7 @@
+16|john
+17|robert
+18|andrew
+19|katty
+27|edward
+29|alan
+31|kerry
\ No newline at end of file
diff --git data/files/int.txt data/files/int.txt
new file mode 100644
index 0000000..9553ed2
--- /dev/null
+++ data/files/int.txt
@@ -0,0 +1,12 @@
+4
+252
+233
+
+343
+43
+45
+344
+22
+54
+8
+13
diff --git metastore/if/hive_metastore.thrift metastore/if/hive_metastore.thrift
index d4fad72..7230107 100755
--- metastore/if/hive_metastore.thrift
+++ metastore/if/hive_metastore.thrift
@@ -194,6 +194,67 @@ struct Index {
10: bool deferredRebuild
}
+// column statistics
+struct BooleanColumnStatsData {
+1: required i64 numTrues,
+2: required i64 numFalses,
+3: required i64 numNulls
+}
+
+struct DoubleColumnStatsData {
+1: required double lowValue,
+2: required double highValue,
+3: required i64 numNulls,
+4: required i64 numDVs
+}
+
+struct LongColumnStatsData {
+1: required i64 lowValue,
+2: required i64 highValue,
+3: required i64 numNulls,
+4: required i64 numDVs
+}
+
+struct StringColumnStatsData {
+1: required i64 maxColLen,
+2: required double avgColLen,
+3: required i64 numNulls,
+4: required i64 numDVs
+}
+
+struct BinaryColumnStatsData {
+1: required i64 maxColLen,
+2: required double avgColLen,
+3: required i64 numNulls
+}
+
+union ColumnStatisticsData {
+1: BooleanColumnStatsData booleanStats,
+2: LongColumnStatsData longStats,
+3: DoubleColumnStatsData doubleStats,
+4: StringColumnStatsData stringStats,
+5: BinaryColumnStatsData binaryStats
+}
+
+struct ColumnStatisticsObj {
+1: required string colName,
+2: required string colType,
+3: required ColumnStatisticsData statsData
+}
+
+struct ColumnStatisticsDesc {
+1: required bool isTblLevel,
+2: required string dbName,
+3: required string tableName,
+4: optional string partName,
+5: optional i64 lastAnalyzed
+}
+
+struct ColumnStatistics {
+1: required ColumnStatisticsDesc statsDesc,
+2: required list statsObj;
+}
+
// schema of the table/query results etc.
struct Schema {
// column names, types, comments
@@ -253,6 +314,10 @@ exception ConfigValSecurityException {
1: string message
}
+exception InvalidInputException {
+ 1: string message
+}
+
/**
* This interface is live.
*/
@@ -472,6 +537,37 @@ service ThriftHiveMetastore extends fb303.FacebookService
list get_index_names(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
throws(1:MetaException o2)
+ // column statistics interfaces
+
+ // update APIs persist the column statistics object(s) that are passed in. If statistics already
+ // exists for one or more columns, the existing statistics will be overwritten. The update APIs
+ // validate that the dbName, tableName, partName, colName[] passed in as part of the ColumnStatistics
+ // struct are valid, throws InvalidInputException/NoSuchObjectException if found to be invalid
+ bool update_table_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1,
+ 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
+ bool update_partition_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1,
+ 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4)
+
+ // get APIs return the column statistics corresponding to db_name, tbl_name, [part_name], col_name if
+ // such statistics exists. If the required statistics doesn't exist, get APIs throw NoSuchObjectException
+ // For instance, if get_table_column_statistics is called on a partitioned table for which only
+ // partition level column stats exist, get_table_column_statistics will throw NoSuchObjectException
+ ColumnStatistics get_table_column_statistics(1:string db_name, 2:string tbl_name, 3:string col_name) throws
+ (1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidInputException o3, 4:InvalidObjectException o4)
+ ColumnStatistics get_partition_column_statistics(1:string db_name, 2:string tbl_name, 3:string part_name,
+ 4:string col_name) throws (1:NoSuchObjectException o1, 2:MetaException o2,
+ 3:InvalidInputException o3, 4:InvalidObjectException o4)
+
+ // delete APIs attempt to delete column statistics, if found, associated with a given db_name, tbl_name, [part_name]
+ // and col_name. If the delete API doesn't find the statistics record in the metastore, throws NoSuchObjectException
+ // Delete API validates the input and if the input is invalid throws InvalidInputException/InvalidObjectException.
+ bool delete_partition_column_statistics(1:string db_name, 2:string tbl_name, 3:string part_name, 4:string col_name) throws
+ (1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidObjectException o3,
+ 4:InvalidInputException o4)
+ bool delete_table_column_statistics(1:string db_name, 2:string tbl_name, 3:string col_name) throws
+ (1:NoSuchObjectException o1, 2:MetaException o2, 3:InvalidObjectException o3,
+ 4:InvalidInputException o4)
+
//authorization privileges
bool create_role(1:Role role) throws(1:MetaException o1)
diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 915a5cf..dfd0ed0 100644
--- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -55,6 +55,9 @@ import org.apache.hadoop.hive.common.metrics.Metrics;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
import org.apache.hadoop.hive.metastore.api.Constants;
import org.apache.hadoop.hive.metastore.api.Database;
@@ -65,6 +68,7 @@ import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
import org.apache.hadoop.hive.metastore.api.HiveObjectType;
import org.apache.hadoop.hive.metastore.api.Index;
import org.apache.hadoop.hive.metastore.api.IndexAlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
@@ -616,7 +620,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
private void drop_database_core(RawStore ms,
final String name, final boolean deleteData, final boolean cascade)
throws NoSuchObjectException, InvalidOperationException, MetaException,
- IOException {
+ IOException, InvalidObjectException, InvalidInputException {
boolean success = false;
Database db = null;
List tablePaths = new ArrayList();
@@ -748,6 +752,10 @@ public class HiveMetaStore extends ThriftHiveMetastore {
success = true;
} catch (IOException e) {
throw new MetaException(e.getMessage());
+ } catch (InvalidInputException e) {
+ throw new MetaException(e.getMessage());
+ } catch (InvalidObjectException e) {
+ throw new MetaException(e.getMessage());
} finally {
endFunction("drop_database", success);
}
@@ -993,10 +1001,9 @@ public class HiveMetaStore extends ThriftHiveMetastore {
return (ms.getTable(dbname, name) != null);
}
- private void drop_table_core(final RawStore ms, final String dbname,
- final String name, final boolean deleteData)
- throws NoSuchObjectException, MetaException, IOException {
-
+ private void drop_table_core(final RawStore ms, final String dbname, final String name,
+ final boolean deleteData) throws NoSuchObjectException, MetaException, IOException,
+ InvalidObjectException, InvalidInputException {
boolean success = false;
boolean isExternal = false;
Path tblPath = null;
@@ -1123,11 +1130,14 @@ public class HiveMetaStore extends ThriftHiveMetastore {
* @return
* @throws MetaException
* @throws IOException
+ * @throws InvalidInputException
+ * @throws InvalidObjectException
+ * @throws NoSuchObjectException
*/
private List dropPartitionsAndGetLocations(RawStore ms, String dbName,
- String tableName, Path tablePath, List partitionKeys, boolean checkLocation)
- throws MetaException, IOException {
-
+ String tableName, Path tablePath, List partitionKeys, boolean checkLocation)
+ throws MetaException, IOException, NoSuchObjectException, InvalidObjectException,
+ InvalidInputException {
int partitionBatchSize = HiveConf.getIntVar(hiveConf,
ConfVars.METASTORE_BATCH_RETRIEVE_MAX);
Path tableDnsPath = null;
@@ -1176,6 +1186,10 @@ public class HiveMetaStore extends ThriftHiveMetastore {
success = true;
} catch (IOException e) {
throw new MetaException(e.getMessage());
+ } catch (InvalidInputException e) {
+ throw new MetaException(e.getMessage());
+ } catch (InvalidObjectException e) {
+ throw new MetaException(e.getMessage());
} finally {
endFunction("drop_table", success);
}
@@ -1602,7 +1616,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
private boolean drop_partition_common(RawStore ms, String db_name, String tbl_name,
List part_vals, final boolean deleteData)
- throws MetaException, NoSuchObjectException, IOException {
+ throws MetaException, NoSuchObjectException, IOException, InvalidObjectException, InvalidInputException {
boolean success = false;
Path partPath = null;
@@ -1679,6 +1693,10 @@ public class HiveMetaStore extends ThriftHiveMetastore {
ret = drop_partition_common(getMS(), db_name, tbl_name, part_vals, deleteData);
} catch (IOException e) {
throw new MetaException(e.getMessage());
+ } catch (InvalidInputException e) {
+ throw new MetaException(e.getMessage());
+ } catch (InvalidObjectException e) {
+ throw new MetaException(e.getMessage());
} finally {
endFunction("drop_partition", ret);
}
@@ -2189,7 +2207,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
private boolean drop_partition_by_name_core(final RawStore ms,
final String db_name, final String tbl_name, final String part_name,
final boolean deleteData) throws NoSuchObjectException,
- MetaException, TException, IOException {
+ MetaException, TException, IOException, InvalidObjectException, InvalidInputException {
List partVals = null;
try {
@@ -2214,6 +2232,10 @@ public class HiveMetaStore extends ThriftHiveMetastore {
part_name, deleteData);
} catch (IOException e) {
throw new MetaException(e.getMessage());
+ } catch (InvalidInputException e) {
+ throw new MetaException(e.getMessage());
+ } catch (InvalidObjectException e) {
+ throw new MetaException(e.getMessage());
} finally {
endFunction("drop_partition_by_name", ret);
}
@@ -2375,6 +2397,10 @@ public class HiveMetaStore extends ThriftHiveMetastore {
indexName, deleteData);
} catch (IOException e) {
throw new MetaException(e.getMessage());
+ } catch (InvalidInputException e) {
+ throw new MetaException(e.getMessage());
+ } catch (InvalidObjectException e) {
+ throw new MetaException(e.getMessage());
} finally {
endFunction("drop_index_by_name", ret);
}
@@ -2385,7 +2411,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
private boolean drop_index_by_name_core(final RawStore ms,
final String dbName, final String tblName,
final String indexName, final boolean deleteData) throws NoSuchObjectException,
- MetaException, TException, IOException {
+ MetaException, TException, IOException, InvalidObjectException, InvalidInputException {
boolean success = false;
Path tblPath = null;
@@ -2499,6 +2525,195 @@ public class HiveMetaStore extends ThriftHiveMetastore {
return ret;
}
+ private String lowerCaseConvertPartName(String partName) throws MetaException {
+ boolean isFirst = true;
+ Map partSpec = Warehouse.makeEscSpecFromName(partName);
+ String convertedPartName = new String();
+
+ for (Map.Entry entry : partSpec.entrySet()) {
+ String partColName = entry.getKey();
+ String partColVal = entry.getValue();
+
+ if (!isFirst) {
+ convertedPartName += "/";
+ } else {
+ isFirst = false;
+ }
+ convertedPartName += partColName.toLowerCase() + "=" + partColVal;
+ }
+ return convertedPartName;
+ }
+
+ public ColumnStatistics get_table_column_statistics(String dbName, String tableName,
+ String colName) throws NoSuchObjectException, MetaException, TException,
+ InvalidInputException, InvalidObjectException
+ {
+ dbName = dbName.toLowerCase();
+ tableName = tableName.toLowerCase();
+ colName = colName.toLowerCase();
+ startFunction("get_column_statistics_by_table: db=" + dbName + " table=" + tableName +
+ " column=" + colName);
+ ColumnStatistics statsObj = null;
+ try {
+ statsObj = getMS().getTableColumnStatistics(dbName, tableName, colName);
+ } finally {
+ endFunction("get_column_statistics_by_table: ", statsObj != null);
+ }
+ return statsObj;
+ }
+
+ public ColumnStatistics get_partition_column_statistics(String dbName, String tableName,
+ String partName, String colName) throws NoSuchObjectException, MetaException,
+ InvalidInputException, TException,InvalidObjectException
+ {
+ dbName = dbName.toLowerCase();
+ tableName = tableName.toLowerCase();
+ colName = colName.toLowerCase();
+ String convertedPartName = lowerCaseConvertPartName(partName);
+ startFunction("get_column_statistics_by_partition: db=" + dbName + " table=" + tableName +
+ " partition=" + convertedPartName + " column=" + colName);
+ ColumnStatistics statsObj = null;
+
+ try {
+ List partVals = getPartValsFromName(getMS(), dbName, tableName, partName);
+ statsObj = getMS().getPartitionColumnStatistics(dbName, tableName, convertedPartName,
+ partVals, colName);
+ } finally {
+ endFunction("get_column_statistics_by_partition: ", statsObj != null);
+ }
+ return statsObj;
+ }
+
+ public boolean update_table_column_statistics(ColumnStatistics colStats)
+ throws NoSuchObjectException,InvalidObjectException,MetaException,TException,
+ InvalidInputException
+ {
+ String dbName = null;
+ String tableName = null;
+ String colName = null;
+ ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
+ dbName = statsDesc.getDbName().toLowerCase();
+ tableName = statsDesc.getTableName().toLowerCase();
+
+ statsDesc.setDbName(dbName);
+ statsDesc.setTableName(tableName);
+ long time = System.currentTimeMillis() / 1000;
+ statsDesc.setLastAnalyzed(time);
+
+ List statsObjs = colStats.getStatsObj();
+
+ for (ColumnStatisticsObj statsObj:statsObjs) {
+ colName = statsObj.getColName().toLowerCase();
+ statsObj.setColName(colName);
+ startFunction("write_column_statistics: db=" + dbName + " table=" + tableName +
+ " column=" + colName);
+ }
+
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+
+ boolean ret = false;
+
+ try {
+ ret = getMS().updateTableColumnStatistics(colStats);
+ return ret;
+ } finally {
+ endFunction("write_column_statistics: ", ret != false);
+ }
+ }
+
+ public boolean update_partition_column_statistics(ColumnStatistics colStats)
+ throws NoSuchObjectException,InvalidObjectException,MetaException,TException,
+ InvalidInputException
+ {
+
+ String dbName = null;
+ String tableName = null;
+ String partName = null;
+ String colName = null;
+
+ ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
+ dbName = statsDesc.getDbName().toLowerCase();
+ tableName = statsDesc.getTableName().toLowerCase();
+ partName = lowerCaseConvertPartName(statsDesc.getPartName());
+
+ statsDesc.setDbName(dbName);
+ statsDesc.setTableName(tableName);
+ statsDesc.setPartName(partName);
+
+ long time = System.currentTimeMillis() / 1000;
+ statsDesc.setLastAnalyzed(time);
+
+ List statsObjs = colStats.getStatsObj();
+
+ for (ColumnStatisticsObj statsObj:statsObjs) {
+ colName = statsObj.getColName().toLowerCase();
+ statsObj.setColName(colName);
+ startFunction("write_partition_column_statistics: db=" + dbName + " table=" + tableName +
+ " part=" + partName + "column=" + colName);
+ }
+
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+
+ boolean ret = false;
+
+ try {
+ List partVals = getPartValsFromName(getMS(), dbName,
+ tableName, partName);
+ ret = getMS().updatePartitionColumnStatistics(colStats, partVals);
+ return ret;
+ } finally {
+ endFunction("write_partition_column_statistics: ", ret != false);
+ }
+ }
+
+ public boolean delete_partition_column_statistics(String dbName, String tableName,
+ String partName, String colName) throws NoSuchObjectException, MetaException,
+ InvalidObjectException, TException, InvalidInputException
+ {
+ dbName = dbName.toLowerCase();
+ tableName = tableName.toLowerCase();
+ if (colName != null) {
+ colName = colName.toLowerCase();
+ }
+ String convertedPartName = lowerCaseConvertPartName(partName);
+ startFunction("delete_column_statistics_by_partition: db=" + dbName + " table=" + tableName +
+ " partition=" + convertedPartName + " column=" + colName);
+ boolean ret = false;
+
+ try {
+ List partVals = getPartValsFromName(getMS(), dbName, tableName, convertedPartName);
+ ret = getMS().deletePartitionColumnStatistics(dbName, tableName,
+ convertedPartName, partVals, colName);
+ } finally {
+ endFunction("delete_column_statistics_by_partition: ", ret != false);
+ }
+ return ret;
+ }
+
+ public boolean delete_table_column_statistics(String dbName, String tableName, String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, TException,
+ InvalidInputException
+ {
+ dbName = dbName.toLowerCase();
+ tableName = tableName.toLowerCase();
+
+ if (colName != null) {
+ colName = colName.toLowerCase();
+ }
+ startFunction("delete_column_statistics_by_table: db=" + dbName + " table=" + tableName +
+ " column=" + colName);
+
+ boolean ret = false;
+ try {
+ ret = getMS().deleteTableColumnStatistics(dbName, tableName, colName);
+ } finally {
+ endFunction("delete_column_statistics_by_table: ", ret != false);
+ }
+ return ret;
+ }
+
@Override
public List get_partitions_by_filter(final String dbName,
final String tblName, final String filter, final short maxParts)
@@ -3173,6 +3388,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
}
+
/**
* Discard a current delegation token.
*
diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index 17b986c..0ae8a4c 100644
--- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -40,12 +40,14 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
@@ -935,6 +937,50 @@ public class HiveMetaStoreClient implements IMetaStoreClient {
return client.get_indexes(dbName, tblName, max);
}
+ /** {@inheritDoc} */
+ public boolean updateTableColumnStatistics(ColumnStatistics statsObj)
+ throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
+ InvalidInputException{
+ return client.update_table_column_statistics(statsObj);
+ }
+
+ /** {@inheritDoc} */
+ public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj)
+ throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
+ InvalidInputException{
+ return client.update_partition_column_statistics(statsObj);
+ }
+
+ /** {@inheritDoc} */
+ public ColumnStatistics getTableColumnStatistics(String dbName, String tableName,String colName)
+ throws NoSuchObjectException, MetaException, TException, InvalidInputException,
+ InvalidObjectException {
+ return client.get_table_column_statistics(dbName, tableName, colName);
+ }
+
+ /** {@inheritDoc} */
+ public ColumnStatistics getPartitionColumnStatistics(String dbName, String tableName,
+ String partName, String colName) throws NoSuchObjectException, MetaException, TException,
+ InvalidInputException, InvalidObjectException {
+ return client.get_partition_column_statistics(dbName, tableName, partName, colName);
+ }
+
+ /** {@inheritDoc} */
+ public boolean deletePartitionColumnStatistics(String dbName, String tableName, String partName,
+ String colName) throws NoSuchObjectException, InvalidObjectException, MetaException,
+ TException, InvalidInputException
+ {
+ return client.delete_partition_column_statistics(dbName, tableName, partName, colName);
+ }
+
+ /** {@inheritDoc} */
+ public boolean deleteTableColumnStatistics(String dbName, String tableName, String colName)
+ throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
+ InvalidInputException
+ {
+ return client.delete_table_column_statistics(dbName, tableName, colName);
+ }
+
/**
* @param db
* @param tableName
diff --git metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
index 3883b5b..6f32b22 100644
--- metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
+++ metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
@@ -22,12 +22,14 @@ import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
@@ -726,6 +728,105 @@ public interface IMetaStoreClient {
MetaException, TException;
/**
+ * Write table level column statistics to persistent store
+ * @param statsObj
+ * @return boolean indicating the status of the operation
+ * @throws NoSuchObjectException
+ * @throws InvalidObjectException
+ * @throws MetaException
+ * @throws TException
+ * @throws InvalidInputException
+ */
+
+ public boolean updateTableColumnStatistics(ColumnStatistics statsObj)
+ throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
+ InvalidInputException;
+
+ /**
+ * Write partition level column statistics to persistent store
+ * @param statsObj
+ * @return boolean indicating the status of the operation
+ * @throws NoSuchObjectException
+ * @throws InvalidObjectException
+ * @throws MetaException
+ * @throws TException
+ * @throws InvalidInputException
+ */
+
+ public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj)
+ throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
+ InvalidInputException;
+
+ /**
+ * Get table level column statistics given dbName, tableName and colName
+ * @param dbName
+ * @param tableName
+ * @param colName
+ * @return ColumnStatistics struct for a given db, table and col
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws TException
+ * @throws InvalidInputException
+ * @throws InvalidObjectException
+ */
+
+ public ColumnStatistics getTableColumnStatistics(String dbName, String tableName, String colName)
+ throws NoSuchObjectException, MetaException, TException,
+ InvalidInputException, InvalidObjectException;
+
+ /**
+ * Get partition level column statistics given dbName, tableName, partitionName and colName
+ * @param dbName
+ * @param tableName
+ * @param partitionName
+ * @param colName
+ * @return ColumnStatistics struct for a given db, table, partition and col
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws TException
+ * @throws InvalidInputException
+ * @throws InvalidObjectException
+ */
+
+ public ColumnStatistics getPartitionColumnStatistics(String dbName, String tableName,
+ String partitionName, String colName) throws NoSuchObjectException, MetaException, TException,
+ InvalidInputException, InvalidObjectException;
+
+ /**
+ * Delete partition level column statistics given dbName, tableName, partName and colName
+ * @param dbName
+ * @param tableName
+ * @param partName
+ * @param colName
+ * @return boolean indicating outcome of the operation
+ * @throws NoSuchObjectException
+ * @throws InvalidObjectException
+ * @throws MetaException
+ * @throws TException
+ * @throws InvalidInputException
+ */
+
+ public boolean deletePartitionColumnStatistics(String dbName, String tableName,
+ String partName, String colName) throws NoSuchObjectException, MetaException,
+ InvalidObjectException, TException, InvalidInputException;
+
+ /**
+ * Delete table level column statistics given dbName, tableName and colName
+ * @param dbName
+ * @param tableName
+ * @param colName
+ * @return boolean indicating the outcome of the operation
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidObjectException
+ * @throws TException
+ * @throws InvalidInputException
+ */
+
+ public boolean deleteTableColumnStatistics(String dbName, String tableName, String colName) throws
+ NoSuchObjectException, MetaException, InvalidObjectException, TException, InvalidInputException;
+
+ /**
* @param role
* role object
* @return true on success
diff --git metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index a49aecd..652f249 100644
--- metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -56,14 +56,23 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
import org.apache.hadoop.hive.metastore.api.HiveObjectType;
import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Order;
@@ -77,6 +86,7 @@ import org.apache.hadoop.hive.metastore.api.Role;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.Type;
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
@@ -91,6 +101,7 @@ import org.apache.hadoop.hive.metastore.model.MIndex;
import org.apache.hadoop.hive.metastore.model.MOrder;
import org.apache.hadoop.hive.metastore.model.MPartition;
import org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege;
+import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics;
import org.apache.hadoop.hive.metastore.model.MPartitionEvent;
import org.apache.hadoop.hive.metastore.model.MPartitionPrivilege;
import org.apache.hadoop.hive.metastore.model.MRole;
@@ -100,6 +111,7 @@ import org.apache.hadoop.hive.metastore.model.MStorageDescriptor;
import org.apache.hadoop.hive.metastore.model.MStringList;
import org.apache.hadoop.hive.metastore.model.MTable;
import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege;
+import org.apache.hadoop.hive.metastore.model.MTableColumnStatistics;
import org.apache.hadoop.hive.metastore.model.MTablePrivilege;
import org.apache.hadoop.hive.metastore.model.MType;
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.ANTLRNoCaseStringStream;
@@ -673,7 +685,8 @@ public class ObjectStore implements RawStore, Configurable {
}
}
- public boolean dropTable(String dbName, String tableName) throws MetaException {
+ public boolean dropTable(String dbName, String tableName) throws MetaException,
+ NoSuchObjectException, InvalidObjectException, InvalidInputException {
boolean success = false;
try {
openTransaction();
@@ -701,6 +714,13 @@ public class ObjectStore implements RawStore, Configurable {
if (partColGrants != null && partColGrants.size() > 0) {
pm.deletePersistentAll(partColGrants);
}
+ // delete column statistics if present
+ try {
+ deleteTableColumnStatistics(dbName, tableName, null);
+ } catch (NoSuchObjectException e) {
+ LOG.info("Found no table level column statistics associated with db " + dbName +
+ " table " + tableName + " record to delete");
+ }
preDropStorageDescriptor(tbl.getSd());
// then remove the table
@@ -1267,7 +1287,8 @@ public class ObjectStore implements RawStore, Configurable {
@Override
public boolean dropPartition(String dbName, String tableName,
- List part_vals) throws MetaException {
+ List part_vals) throws MetaException, NoSuchObjectException, InvalidObjectException,
+ InvalidInputException {
boolean success = false;
try {
openTransaction();
@@ -1287,8 +1308,13 @@ public class ObjectStore implements RawStore, Configurable {
* drop the storage descriptor cleanly, etc.)
* @param part - the MPartition to drop
* @return whether the transaction committed successfully
+ * @throws InvalidInputException
+ * @throws InvalidObjectException
+ * @throws MetaException
+ * @throws NoSuchObjectException
*/
- private boolean dropPartitionCommon(MPartition part) {
+ private boolean dropPartitionCommon(MPartition part) throws NoSuchObjectException, MetaException,
+ InvalidObjectException, InvalidInputException {
boolean success = false;
try {
openTransaction();
@@ -1316,6 +1342,17 @@ public class ObjectStore implements RawStore, Configurable {
if (partColumnGrants != null && partColumnGrants.size() > 0) {
pm.deletePersistentAll(partColumnGrants);
}
+
+ String dbName = part.getTable().getDatabase().getName();
+ String tableName = part.getTable().getTableName();
+
+ // delete partition level column stats if it exists
+ try {
+ deletePartitionColumnStatistics(dbName, tableName, partName, part.getValues(), null);
+ } catch (NoSuchObjectException e) {
+ LOG.info("No column statistics records found to delete");
+ }
+
preDropStorageDescriptor(part.getSd());
pm.deletePersistent(part);
}
@@ -4446,6 +4483,743 @@ public class ObjectStore implements RawStore, Configurable {
}
}
+ // Methods to persist, maintain and retrieve Column Statistics
+ private MTableColumnStatistics convertToMTableColumnStatistics(ColumnStatisticsDesc statsDesc,
+ ColumnStatisticsObj statsObj) throws NoSuchObjectException,
+ MetaException, InvalidObjectException
+ {
+ if (statsObj == null || statsDesc == null) {
+ throw new InvalidObjectException("Invalid column stats object");
+ }
+
+ String dbName = statsDesc.getDbName();
+ String tableName = statsDesc.getTableName();
+ MTable table = getMTable(dbName, tableName);
+
+ if (table == null) {
+ throw new NoSuchObjectException("Table " + tableName +
+ " for which stats is gathered doesn't exist.");
+ }
+
+ MTableColumnStatistics mColStats = new MTableColumnStatistics();
+ mColStats.setTable(table);
+ mColStats.setDbName(statsDesc.getDbName());
+ mColStats.setTableName(statsDesc.getTableName());
+ mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
+ mColStats.setColName(statsObj.getColName());
+ mColStats.setColType(statsObj.getColType());
+
+ if (statsObj.getStatsData().isSetBooleanStats()) {
+ BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
+ mColStats.setBooleanStats(boolStats.getNumTrues(), boolStats.getNumFalses(),
+ boolStats.getNumNulls());
+ } else if (statsObj.getStatsData().isSetLongStats()) {
+ LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
+ mColStats.setLongStats(longStats.getNumNulls(), longStats.getNumDVs(),
+ longStats.getLowValue(), longStats.getHighValue());
+ } else if (statsObj.getStatsData().isSetDoubleStats()) {
+ DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
+ mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(),
+ doubleStats.getLowValue(), doubleStats.getHighValue());
+ } else if (statsObj.getStatsData().isSetStringStats()) {
+ StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
+ mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(),
+ stringStats.getMaxColLen(), stringStats.getAvgColLen());
+ } else if (statsObj.getStatsData().isSetBinaryStats()) {
+ BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
+ mColStats.setBinaryStats(binaryStats.getNumNulls(), binaryStats.getMaxColLen(),
+ binaryStats.getAvgColLen());
+ }
+ return mColStats;
+ }
+
+ private ColumnStatisticsObj getTableColumnStatisticsObj(MTableColumnStatistics mStatsObj) {
+ ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
+ statsObj.setColType(mStatsObj.getColType());
+ statsObj.setColName(mStatsObj.getColName());
+ String colType = mStatsObj.getColType();
+ ColumnStatisticsData colStatsData = new ColumnStatisticsData();
+
+ if (colType.equalsIgnoreCase("boolean")) {
+ BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
+ boolStats.setNumFalses(mStatsObj.getNumFalses());
+ boolStats.setNumTrues(mStatsObj.getNumTrues());
+ boolStats.setNumNulls(mStatsObj.getNumNulls());
+ colStatsData.setBooleanStats(boolStats);
+ } else if (colType.equalsIgnoreCase("string")) {
+ StringColumnStatsData stringStats = new StringColumnStatsData();
+ stringStats.setNumNulls(mStatsObj.getNumNulls());
+ stringStats.setAvgColLen(mStatsObj.getAvgColLen());
+ stringStats.setMaxColLen(mStatsObj.getMaxColLen());
+ stringStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setStringStats(stringStats);
+ } else if (colType.equalsIgnoreCase("binary")) {
+ BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
+ binaryStats.setNumNulls(mStatsObj.getNumNulls());
+ binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
+ binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
+ colStatsData.setBinaryStats(binaryStats);
+ } else if (colType.equalsIgnoreCase("bigint") || colType.equalsIgnoreCase("int") ||
+ colType.equalsIgnoreCase("smallint") || colType.equalsIgnoreCase("tinyint") ||
+ colType.equalsIgnoreCase("timestamp")) {
+ LongColumnStatsData longStats = new LongColumnStatsData();
+ longStats.setNumNulls(mStatsObj.getNumNulls());
+ longStats.setHighValue(mStatsObj.getHighValueAsLong());
+ longStats.setLowValue(mStatsObj.getLowValueAsLong());
+ longStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setLongStats(longStats);
+ } else if (colType.equalsIgnoreCase("double") || colType.equalsIgnoreCase("float")) {
+ DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+ doubleStats.setNumNulls(mStatsObj.getNumNulls());
+ doubleStats.setHighValue(mStatsObj.getHighValueAsDouble());
+ doubleStats.setLowValue(mStatsObj.getLowValueAsDouble());
+ doubleStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setDoubleStats(doubleStats);
+ }
+ statsObj.setStatsData(colStatsData);
+ return statsObj;
+ }
+
+ private ColumnStatisticsDesc getTableColumnStatisticsDesc(MTableColumnStatistics mStatsObj) {
+ ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
+ statsDesc.setIsTblLevel(true);
+ statsDesc.setDbName(mStatsObj.getDbName());
+ statsDesc.setTableName(mStatsObj.getTableName());
+ statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed());
+ return statsDesc;
+ }
+
+ private ColumnStatistics convertToTableColumnStatistics(MTableColumnStatistics mStatsObj)
+ throws MetaException
+ {
+ if (mStatsObj == null) {
+ return null;
+ }
+
+ ColumnStatisticsDesc statsDesc = getTableColumnStatisticsDesc(mStatsObj);
+ ColumnStatisticsObj statsObj = getTableColumnStatisticsObj(mStatsObj);
+ List statsObjs = new ArrayList();
+ statsObjs.add(statsObj);
+
+ ColumnStatistics colStats = new ColumnStatistics();
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+ return colStats;
+ }
+
+ private MPartitionColumnStatistics convertToMPartitionColumnStatistics(ColumnStatisticsDesc
+ statsDesc, ColumnStatisticsObj statsObj, List partVal)
+ throws MetaException, NoSuchObjectException
+ {
+ if (statsDesc == null || statsObj == null || partVal == null) {
+ return null;
+ }
+
+ MPartition partition = getMPartition(statsDesc.getDbName(), statsDesc.getTableName(), partVal);
+
+ if (partition == null) {
+ throw new NoSuchObjectException("Partition for which stats is gathered doesn't exist.");
+ }
+
+ MPartitionColumnStatistics mColStats = new MPartitionColumnStatistics();
+ mColStats.setPartition(partition);
+ mColStats.setDbName(statsDesc.getDbName());
+ mColStats.setTableName(statsDesc.getTableName());
+ mColStats.setPartitionName(statsDesc.getPartName());
+ mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
+ mColStats.setColName(statsObj.getColName());
+ mColStats.setColType(statsObj.getColType());
+
+ if (statsObj.getStatsData().isSetBooleanStats()) {
+ BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
+ mColStats.setBooleanStats(boolStats.getNumTrues(), boolStats.getNumFalses(),
+ boolStats.getNumNulls());
+ } else if (statsObj.getStatsData().isSetLongStats()) {
+ LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
+ mColStats.setLongStats(longStats.getNumNulls(), longStats.getNumDVs(),
+ longStats.getLowValue(), longStats.getHighValue());
+ } else if (statsObj.getStatsData().isSetDoubleStats()) {
+ DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
+ mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(),
+ doubleStats.getLowValue(), doubleStats.getHighValue());
+ } else if (statsObj.getStatsData().isSetStringStats()) {
+ StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
+ mColStats.setStringStats(stringStats.getNumNulls(), stringStats.getNumDVs(),
+ stringStats.getMaxColLen(), stringStats.getAvgColLen());
+ } else if (statsObj.getStatsData().isSetBinaryStats()) {
+ BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
+ mColStats.setBinaryStats(binaryStats.getNumNulls(), binaryStats.getMaxColLen(),
+ binaryStats.getAvgColLen());
+ }
+ return mColStats;
+ }
+
+ private void writeMTableColumnStatistics(MTableColumnStatistics mStatsObj)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
+ {
+ String dbName = mStatsObj.getDbName();
+ String tableName = mStatsObj.getTableName();
+ String colName = mStatsObj.getColName();
+
+ LOG.info("Updating table level column statistics for db=" + dbName + " tableName=" + tableName
+ + " colName=" + colName);
+
+ MTable mTable = getMTable(mStatsObj.getDbName(), mStatsObj.getTableName());
+ boolean foundCol = false;
+
+ if (mTable == null) {
+ throw new
+ NoSuchObjectException("Table " + tableName +
+ " for which stats gathering is requested doesn't exist.");
+ }
+
+ MStorageDescriptor mSDS = mTable.getSd();
+ List colList = mSDS.getCD().getCols();
+
+ for(MFieldSchema mCol:colList) {
+ if (mCol.getName().equals(mStatsObj.getColName().trim())) {
+ foundCol = true;
+ break;
+ }
+ }
+
+ if (!foundCol) {
+ throw new
+ NoSuchObjectException("Column " + colName +
+ " for which stats gathering is requested doesn't exist.");
+ }
+
+ MTableColumnStatistics oldStatsObj = getMTableColumnStatistics(dbName, tableName, colName);
+
+ if (oldStatsObj != null) {
+ oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen());
+ oldStatsObj.setHighValue(mStatsObj.getHighValue());
+ oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed());
+ oldStatsObj.setLowValue(mStatsObj.getLowValue());
+ oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen());
+ oldStatsObj.setNumDVs(mStatsObj.getNumDVs());
+ oldStatsObj.setNumFalses(mStatsObj.getNumFalses());
+ oldStatsObj.setNumTrues(mStatsObj.getNumTrues());
+ oldStatsObj.setNumNulls(mStatsObj.getNumNulls());
+ } else {
+ pm.makePersistent(mStatsObj);
+ }
+ }
+
+ private ColumnStatisticsObj getPartitionColumnStatisticsObj(MPartitionColumnStatistics mStatsObj)
+ {
+ ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
+ statsObj.setColType(mStatsObj.getColType());
+ statsObj.setColName(mStatsObj.getColName());
+ String colType = mStatsObj.getColType();
+ ColumnStatisticsData colStatsData = new ColumnStatisticsData();
+
+ if (colType.equalsIgnoreCase("boolean")) {
+ BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
+ boolStats.setNumFalses(mStatsObj.getNumFalses());
+ boolStats.setNumTrues(mStatsObj.getNumTrues());
+ boolStats.setNumNulls(mStatsObj.getNumNulls());
+ colStatsData.setBooleanStats(boolStats);
+ } else if (colType.equalsIgnoreCase("string")) {
+ StringColumnStatsData stringStats = new StringColumnStatsData();
+ stringStats.setNumNulls(mStatsObj.getNumNulls());
+ stringStats.setAvgColLen(mStatsObj.getAvgColLen());
+ stringStats.setMaxColLen(mStatsObj.getMaxColLen());
+ stringStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setStringStats(stringStats);
+ } else if (colType.equalsIgnoreCase("binary")) {
+ BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
+ binaryStats.setNumNulls(mStatsObj.getNumNulls());
+ binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
+ binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
+ colStatsData.setBinaryStats(binaryStats);
+ } else if (colType.equalsIgnoreCase("tinyint") || colType.equalsIgnoreCase("smallint") ||
+ colType.equalsIgnoreCase("int") || colType.equalsIgnoreCase("bigint") ||
+ colType.equalsIgnoreCase("timestamp")) {
+ LongColumnStatsData longStats = new LongColumnStatsData();
+ longStats.setNumNulls(mStatsObj.getNumNulls());
+ longStats.setHighValue(mStatsObj.getHighValueAsLong());
+ longStats.setLowValue(mStatsObj.getLowValueAsLong());
+ longStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setLongStats(longStats);
+ } else if (colType.equalsIgnoreCase("double") || colType.equalsIgnoreCase("float")) {
+ DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+ doubleStats.setNumNulls(mStatsObj.getNumNulls());
+ doubleStats.setHighValue(mStatsObj.getHighValueAsDouble());
+ doubleStats.setLowValue(mStatsObj.getLowValueAsDouble());
+ doubleStats.setNumDVs(mStatsObj.getNumDVs());
+ colStatsData.setDoubleStats(doubleStats);
+ }
+ statsObj.setStatsData(colStatsData);
+ return statsObj;
+ }
+
+ private ColumnStatisticsDesc getPartitionColumnStatisticsDesc(
+ MPartitionColumnStatistics mStatsObj) {
+ ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
+ statsDesc.setIsTblLevel(false);
+ statsDesc.setDbName(mStatsObj.getDbName());
+ statsDesc.setTableName(mStatsObj.getTableName());
+ statsDesc.setPartName(mStatsObj.getPartitionName());
+ statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed());
+ return statsDesc;
+ }
+
+ private void writeMPartitionColumnStatistics(MPartitionColumnStatistics mStatsObj,
+ List partVal) throws NoSuchObjectException, MetaException, InvalidObjectException,
+ InvalidInputException
+ {
+ String dbName = mStatsObj.getDbName();
+ String tableName = mStatsObj.getTableName();
+ String partName = mStatsObj.getPartitionName();
+ String colName = mStatsObj.getColName();
+
+ LOG.info("Updating partition level column statistics for db=" + dbName + " tableName=" +
+ tableName + " partName=" + partName + " colName=" + colName);
+
+ MTable mTable = getMTable(mStatsObj.getDbName(), mStatsObj.getTableName());
+ boolean foundCol = false;
+
+ if (mTable == null) {
+ throw new
+ NoSuchObjectException("Table " + tableName +
+ " for which stats gathering is requested doesn't exist.");
+ }
+
+ MPartition mPartition =
+ getMPartition(mStatsObj.getDbName(), mStatsObj.getTableName(), partVal);
+
+ if (mPartition == null) {
+ throw new
+ NoSuchObjectException("Partition " + partName +
+ " for which stats gathering is requested doesn't exist");
+ }
+
+ MStorageDescriptor mSDS = mPartition.getSd();
+ List colList = mSDS.getCD().getCols();
+
+ for(MFieldSchema mCol:colList) {
+ if (mCol.getName().equals(mStatsObj.getColName().trim())) {
+ foundCol = true;
+ break;
+ }
+ }
+
+ if (!foundCol) {
+ throw new
+ NoSuchObjectException("Column " + colName +
+ " for which stats gathering is requested doesn't exist.");
+ }
+
+ MPartitionColumnStatistics oldStatsObj = getMPartitionColumnStatistics(dbName, tableName,
+ partName, partVal, colName);
+ if (oldStatsObj != null) {
+ oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen());
+ oldStatsObj.setHighValue(mStatsObj.getHighValue());
+ oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed());
+ oldStatsObj.setLowValue(mStatsObj.getLowValue());
+ oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen());
+ oldStatsObj.setNumDVs(mStatsObj.getNumDVs());
+ oldStatsObj.setNumFalses(mStatsObj.getNumFalses());
+ oldStatsObj.setNumTrues(mStatsObj.getNumTrues());
+ oldStatsObj.setNumNulls(mStatsObj.getNumNulls());
+ } else {
+ pm.makePersistent(mStatsObj);
+ }
+ }
+
+ public boolean updateTableColumnStatistics(ColumnStatistics colStats)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
+ {
+ boolean committed = false;
+
+ try {
+ openTransaction();
+ List statsObjs = colStats.getStatsObj();
+ ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
+
+ for (ColumnStatisticsObj statsObj:statsObjs) {
+ MTableColumnStatistics mStatsObj = convertToMTableColumnStatistics(statsDesc, statsObj);
+ writeMTableColumnStatistics(mStatsObj);
+ }
+ committed = commitTransaction();
+ return committed;
+ } finally {
+ if (!committed) {
+ rollbackTransaction();
+ }
+ }
+ }
+
+ public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List partVals)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
+ {
+ boolean committed = false;
+
+ try {
+ openTransaction();
+ List statsObjs = colStats.getStatsObj();
+ ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
+
+ for (ColumnStatisticsObj statsObj:statsObjs) {
+ MPartitionColumnStatistics mStatsObj =
+ convertToMPartitionColumnStatistics(statsDesc, statsObj, partVals);
+ writeMPartitionColumnStatistics(mStatsObj, partVals);
+ }
+ committed = commitTransaction();
+ return committed;
+ } finally {
+ if (!committed) {
+ rollbackTransaction();
+ }
+ }
+ }
+
+ private MTableColumnStatistics getMTableColumnStatistics(String dbName, String tableName,
+ String colName) throws NoSuchObjectException, InvalidInputException, MetaException
+ {
+ boolean committed = false;
+
+ if (dbName == null) {
+ dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
+ }
+
+ if (tableName == null || colName == null) {
+ throw new InvalidInputException("TableName/ColName passed to get_table_column_statistics " +
+ "is null");
+ }
+
+ try {
+ openTransaction();
+ MTableColumnStatistics mStatsObj = null;
+ MTable mTable = getMTable(dbName.trim(), tableName.trim());
+ boolean foundCol = false;
+
+ if (mTable == null) {
+ throw new NoSuchObjectException("Table " + tableName +
+ " for which stats is requested doesn't exist.");
+ }
+
+ MStorageDescriptor mSDS = mTable.getSd();
+ List colList = mSDS.getCD().getCols();
+
+ for(MFieldSchema mCol:colList) {
+ if (mCol.getName().equals(colName.trim())) {
+ foundCol = true;
+ break;
+ }
+ }
+
+ if (!foundCol) {
+ throw new NoSuchObjectException("Column " + colName +
+ " for which stats is requested doesn't exist.");
+ }
+
+ Query query = pm.newQuery(MTableColumnStatistics.class);
+ query.setFilter("table.tableName == t1 && " +
+ "dbName == t2 && " + "colName == t3");
+ query
+ .declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3");
+ query.setUnique(true);
+
+ mStatsObj = (MTableColumnStatistics) query.execute(tableName.trim(),
+ dbName.trim(), colName.trim());
+ pm.retrieve(mStatsObj);
+ committed = commitTransaction();
+ return mStatsObj;
+ } finally {
+ if (!committed) {
+ rollbackTransaction();
+ return null;
+ }
+ }
+ }
+
+ public ColumnStatistics getTableColumnStatistics(String dbName, String tableName, String colName)
+ throws MetaException, NoSuchObjectException, InvalidInputException
+ {
+ ColumnStatistics statsObj;
+ MTableColumnStatistics mStatsObj = getMTableColumnStatistics(dbName, tableName, colName);
+
+ if (mStatsObj == null) {
+ throw new NoSuchObjectException("Statistics for dbName=" + dbName + " tableName=" + tableName
+ + " columnName=" + colName + " doesn't exist.");
+ }
+
+ statsObj = convertToTableColumnStatistics(mStatsObj);
+ return statsObj;
+ }
+
+ public ColumnStatistics getPartitionColumnStatistics(String dbName, String tableName,
+ String partName, List partVal, String colName)
+ throws MetaException, NoSuchObjectException, InvalidInputException
+ {
+ ColumnStatistics statsObj;
+ MPartitionColumnStatistics mStatsObj =
+ getMPartitionColumnStatistics(dbName, tableName, partName, partVal, colName);
+
+ if (mStatsObj == null) {
+ throw new NoSuchObjectException("Statistics for dbName=" + dbName + " tableName=" + tableName
+ + " partName= " + partName + " columnName=" + colName + " doesn't exist.");
+ }
+ statsObj = convertToPartColumnStatistics(mStatsObj);
+ return statsObj;
+ }
+
+ private ColumnStatistics convertToPartColumnStatistics(MPartitionColumnStatistics mStatsObj)
+ {
+ if (mStatsObj == null) {
+ return null;
+ }
+
+ ColumnStatisticsDesc statsDesc = getPartitionColumnStatisticsDesc(mStatsObj);
+ ColumnStatisticsObj statsObj = getPartitionColumnStatisticsObj(mStatsObj);
+ List statsObjs = new ArrayList();
+ statsObjs.add(statsObj);
+
+ ColumnStatistics colStats = new ColumnStatistics();
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+ return colStats;
+ }
+
+ private MPartitionColumnStatistics getMPartitionColumnStatistics(String dbName, String tableName,
+ String partName, List partVal, String colName) throws NoSuchObjectException,
+ InvalidInputException, MetaException
+ {
+ boolean committed = false;
+ MPartitionColumnStatistics mStatsObj = null;
+
+ if (dbName == null) {
+ dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
+ }
+
+ if (tableName == null || partVal == null || colName == null) {
+ throw new InvalidInputException("TableName/PartName/ColName passed to " +
+ " get_partition_column_statistics is null");
+ }
+
+ try {
+ openTransaction();
+ MTable mTable = getMTable(dbName.trim(), tableName.trim());
+ boolean foundCol = false;
+
+ if (mTable == null) {
+ throw new NoSuchObjectException("Table " + tableName +
+ " for which stats is requested doesn't exist.");
+ }
+
+ MPartition mPartition =
+ getMPartition(dbName, tableName, partVal);
+
+ if (mPartition == null) {
+ throw new
+ NoSuchObjectException("Partition " + partName +
+ " for which stats is requested doesn't exist");
+ }
+
+ MStorageDescriptor mSDS = mPartition.getSd();
+ List colList = mSDS.getCD().getCols();
+
+ for(MFieldSchema mCol:colList) {
+ if (mCol.getName().equals(colName.trim())) {
+ foundCol = true;
+ break;
+ }
+ }
+
+ if (!foundCol) {
+ throw new NoSuchObjectException("Column " + colName +
+ " for which stats is requested doesn't exist.");
+ }
+
+ Query query = pm.newQuery(MPartitionColumnStatistics.class);
+ query.setFilter("partition.partitionName == t1 && " +
+ "dbName == t2 && " + "tableName == t3 && " + "colName == t4");
+ query
+ .declareParameters("java.lang.String t1, java.lang.String t2, " +
+ "java.lang.String t3, java.lang.String t4");
+ query.setUnique(true);
+
+ mStatsObj = (MPartitionColumnStatistics) query.executeWithArray(partName.trim(),
+ dbName.trim(), tableName.trim(),
+ colName.trim());
+ pm.retrieve(mStatsObj);
+ committed = commitTransaction();
+ return mStatsObj;
+
+ } finally {
+ if (!committed) {
+ rollbackTransaction();
+ }
+ }
+ }
+
+ public boolean deletePartitionColumnStatistics(String dbName, String tableName,
+ String partName, List partVals,String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
+ {
+ boolean ret = false;
+
+ if (dbName == null) {
+ dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
+ }
+
+ if (tableName == null) {
+ throw new InvalidInputException("Table name is null.");
+ }
+
+ try {
+ openTransaction();
+ MTable mTable = getMTable(dbName, tableName);
+ MPartitionColumnStatistics mStatsObj;
+ List mStatsObjColl;
+
+ if (mTable == null) {
+ throw new
+ NoSuchObjectException("Table " + tableName +
+ " for which stats deletion is requested doesn't exist");
+ }
+
+ MPartition mPartition =
+ getMPartition(dbName, tableName, partVals);
+
+ if (mPartition == null) {
+ throw new
+ NoSuchObjectException("Partition " + partName +
+ " for which stats deletion is requested doesn't exist");
+ }
+
+ Query query = pm.newQuery(MPartitionColumnStatistics.class);
+ String filter;
+ String parameters;
+
+ if (colName != null) {
+ filter = "partition.partitionName == t1 && dbName == t2 && tableName == t3 && " +
+ "colName == t4";
+ parameters = "java.lang.String t1, java.lang.String t2, " +
+ "java.lang.String t3, java.lang.String t4";
+ } else {
+ filter = "partition.partitionName == t1 && dbName == t2 && tableName == t3";
+ parameters = "java.lang.String t1, java.lang.String t2, java.lang.String t3";
+ }
+
+ query.setFilter(filter);
+ query
+ .declareParameters(parameters);
+
+ if (colName != null) {
+ query.setUnique(true);
+ mStatsObj = (MPartitionColumnStatistics)query.executeWithArray(partName.trim(),
+ dbName.trim(), tableName.trim(), colName.trim());
+ pm.retrieve(mStatsObj);
+
+ if (mStatsObj != null) {
+ pm.deletePersistent(mStatsObj);
+ } else {
+ throw new NoSuchObjectException("Column stats doesn't exist for db=" +dbName + " table="
+ + tableName + " partition=" + partName + " col=" + colName);
+ }
+ } else {
+ mStatsObjColl= (List)query.execute(partName.trim(),
+ dbName.trim(), tableName.trim());
+ pm.retrieveAll(mStatsObjColl);
+
+ if (mStatsObjColl != null) {
+ pm.deletePersistentAll(mStatsObjColl);
+ } else {
+ throw new NoSuchObjectException("Column stats doesn't exist for db=" + dbName +
+ " table=" + tableName + " partition" + partName);
+ }
+ }
+ ret = commitTransaction();
+ } catch(NoSuchObjectException e) {
+ rollbackTransaction();
+ throw e;
+ } finally {
+ if (!ret) {
+ rollbackTransaction();
+ }
+ }
+ return ret;
+ }
+
+ public boolean deleteTableColumnStatistics(String dbName, String tableName, String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
+ {
+ boolean ret = false;
+
+ if (dbName == null) {
+ dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
+ }
+
+ if (tableName == null) {
+ throw new InvalidInputException("Table name is null.");
+ }
+
+ try {
+ openTransaction();
+ MTable mTable = getMTable(dbName, tableName);
+ MTableColumnStatistics mStatsObj;
+ List mStatsObjColl;
+
+ if (mTable == null) {
+ throw new
+ NoSuchObjectException("Table " + tableName +
+ " for which stats deletion is requested doesn't exist");
+ }
+
+ Query query = pm.newQuery(MTableColumnStatistics.class);
+ String filter;
+ String parameters;
+
+ if (colName != null) {
+ filter = "table.tableName == t1 && dbName == t2 && colName == t3";
+ parameters = "java.lang.String t1, java.lang.String t2, java.lang.String t3";
+ } else {
+ filter = "table.tableName == t1 && dbName == t2";
+ parameters = "java.lang.String t1, java.lang.String t2";
+ }
+
+ query.setFilter(filter);
+ query
+ .declareParameters(parameters);
+
+ if (colName != null) {
+ query.setUnique(true);
+ mStatsObj = (MTableColumnStatistics)query.execute(tableName.trim(),
+ dbName.trim(), colName.trim());
+ pm.retrieve(mStatsObj);
+
+ if (mStatsObj != null) {
+ pm.deletePersistent(mStatsObj);
+ } else {
+ throw new NoSuchObjectException("Column stats doesn't exist for db=" +dbName + " table="
+ + tableName + " col=" + colName);
+ }
+ } else {
+ mStatsObjColl= (List)query.execute(tableName.trim(), dbName.trim());
+ pm.retrieveAll(mStatsObjColl);
+
+ if (mStatsObjColl != null) {
+ pm.deletePersistentAll(mStatsObjColl);
+ } else {
+ throw new NoSuchObjectException("Column stats doesn't exist for db=" + dbName +
+ " table=" + tableName);
+ }
+ }
+ ret = commitTransaction();
+ } catch(NoSuchObjectException e) {
+ rollbackTransaction();
+ throw e;
+ } finally {
+ if (!ret) {
+ rollbackTransaction();
+ }
+ }
+ return ret;
+ }
+
@Override
public long cleanupEvents() {
boolean commited = false;
@@ -4468,4 +5242,5 @@ public class ObjectStore implements RawStore, Configurable {
}
return delCnt;
}
+
}
diff --git metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
index bf5ae3a..233fb46 100644
--- metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
+++ metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
@@ -22,8 +22,10 @@ import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -97,7 +99,7 @@ public interface RawStore extends Configurable {
MetaException;
public abstract boolean dropTable(String dbName, String tableName)
- throws MetaException;
+ throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException;
public abstract Table getTable(String dbName, String tableName)
throws MetaException;
@@ -109,7 +111,8 @@ public interface RawStore extends Configurable {
List part_vals) throws MetaException, NoSuchObjectException;
public abstract boolean dropPartition(String dbName, String tableName,
- List part_vals) throws MetaException;
+ List part_vals) throws MetaException, NoSuchObjectException, InvalidObjectException,
+ InvalidInputException;
public abstract List getPartitions(String dbName,
String tableName, int max) throws MetaException;
@@ -306,5 +309,116 @@ public interface RawStore extends Configurable {
List part_vals, short max_parts, String userName, List groupNames)
throws MetaException, InvalidObjectException, NoSuchObjectException;
+ /** Persists the given column statistics object to the metastore
+ * @param partVals
+ *
+ * @param ColumnStats object to persist
+ * @param List of partVals
+ * @return Boolean indicating the outcome of the operation
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidObjectException
+ * @throws InvalidInputException
+ */
+ public abstract boolean updateTableColumnStatistics(ColumnStatistics colStats)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException;
+
+ /** Persists the given column statistics object to the metastore
+ * @param partVals
+ *
+ * @param ColumnStats object to persist
+ * @param List of partVals
+ * @return Boolean indicating the outcome of the operation
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidObjectException
+ * @throws InvalidInputException
+ */
+ public abstract boolean updatePartitionColumnStatistics(ColumnStatistics statsObj,
+ List partVals)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException;
+
+ /**
+ * Returns the relevant column statistics for a given column in a given table in a given database
+ * if such statistics exist.
+ *
+ * @param The name of the database, defaults to current database
+ * @param The name of the table
+ * @param The name of the column for which statistics is requested
+ * @return Relevant column statistics for the column for the given table
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidInputException
+ *
+ */
+ public abstract ColumnStatistics getTableColumnStatistics(String dbName, String tableName,
+ String colName) throws MetaException, NoSuchObjectException, InvalidInputException,
+ InvalidObjectException;
+
+ /**
+ * Returns the relevant column statistics for a given column in a given partition in a given
+ * table in a given database if such statistics exist.
+ * @param partName
+ *
+ * @param The name of the database, defaults to current database
+ * @param The name of the table
+ * @param The name of the partition
+ * @param List of partVals for the partition
+ * @param The name of the column for which statistics is requested
+ * @return Relevant column statistics for the column for the given partition in a given table
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidInputException
+ * @throws InvalidObjectException
+ *
+ */
+
+ public abstract ColumnStatistics getPartitionColumnStatistics(String dbName, String tableName,
+ String partName, List partVals, String colName)
+ throws MetaException, NoSuchObjectException, InvalidInputException, InvalidObjectException;
+
+ /**
+ * Deletes column statistics if present associated with a given db, table, partition and col. If
+ * null is passed instead of a colName, stats when present for all columns associated
+ * with a given db, table and partition are deleted.
+ *
+ * @param dbName
+ * @param tableName
+ * @param partName
+ * @param partVals
+ * @param colName
+ * @return Boolean indicating the outcome of the operation
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidObjectException
+ * @throws InvalidInputException
+ */
+
+ public abstract boolean deletePartitionColumnStatistics(String dbName, String tableName,
+ String partName, List partVals, String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException;
+
+ /**
+ * Deletes column statistics if present associated with a given db, table and col. If
+ * null is passed instead of a colName, stats when present for all columns associated
+ * with a given db and table are deleted.
+ *
+ * @param dbName
+ * @param tableName
+ * @param colName
+ * @return Boolean indicating the outcome of the operation
+ * @throws NoSuchObjectException
+ * @throws MetaException
+ * @throws InvalidObjectException
+ * @throws InvalidInputException
+ */
+
+ public abstract boolean deleteTableColumnStatistics(String dbName, String tableName,
+ String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException;
+
public abstract long cleanupEvents();
+
+
+
}
diff --git metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
index 77d1caa..9f3b9ff 100755
--- metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
+++ metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
@@ -376,6 +376,38 @@ public class Warehouse {
}
}
+ public static Map makeEscSpecFromName(String name) throws MetaException {
+
+ if (name == null || name.isEmpty()) {
+ throw new MetaException("Partition name is invalid. " + name);
+ }
+ LinkedHashMap partSpec = new LinkedHashMap();
+
+ Path currPath = new Path(name);
+
+ List kvs = new ArrayList();
+ do {
+ String component = currPath.getName();
+ Matcher m = pat.matcher(component);
+ if (m.matches()) {
+ String k = m.group(1);
+ String v = m.group(2);
+ String[] kv = new String[2];
+ kv[0] = k;
+ kv[1] = v;
+ kvs.add(kv);
+ }
+ currPath = currPath.getParent();
+ } while (currPath != null && !currPath.getName().isEmpty());
+
+ // reverse the list since we checked the part from leaf dir to table's base dir
+ for (int i = kvs.size(); i > 0; i--) {
+ partSpec.put(kvs.get(i - 1)[0], kvs.get(i - 1)[1]);
+ }
+
+ return partSpec;
+ }
+
public Path getPartitionPath(Database db, String tableName,
LinkedHashMap pm) throws MetaException {
return new Path(getTablePath(db, tableName), makePartPath(pm));
diff --git metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
new file mode 100644
index 0000000..067cb49
--- /dev/null
+++ metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ */
+package org.apache.hadoop.hive.metastore.model;
+
+import java.nio.ByteBuffer;
+
+
+/**
+ *
+ * MPartitionColumnStatistics - Represents Hive's partiton level Column Statistics Description.
+ * The fields in this class with the exception of partition are persisted in the metastore.
+ * In case of partition, part_id is persisted in its place.
+ *
+ */
+public class MPartitionColumnStatistics {
+
+ private MPartition partition;
+
+ private String dbName;
+ private String tableName;
+ private String partitionName;
+ private String colName;
+ private String colType;
+
+ private byte[] lowValue;
+ private byte[] highValue;
+ private long numNulls;
+ private long numDVs;
+ private double avgColLen;
+ private long maxColLen;
+ private long numTrues;
+ private long numFalses;
+ private long lastAnalyzed;
+
+ public MPartitionColumnStatistics() {}
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public void setTableName(String tableName) {
+ this.tableName = tableName;
+ }
+
+ public String getColName() {
+ return colName;
+ }
+
+ public void setColName(String colName) {
+ this.colName = colName;
+ }
+
+ public byte[] getLowValue() {
+ return lowValue;
+ }
+
+ public long getLowValueAsLong() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(lowValue);
+ return byteBuf.getLong();
+ }
+
+ public double getLowValueAsDouble() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(lowValue);
+ return byteBuf.getDouble();
+ }
+
+ public byte[] getHighValue() {
+ return highValue;
+ }
+
+ public long getHighValueAsLong() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(highValue);
+ return byteBuf.getLong();
+ }
+
+ public double getHighValueAsDouble() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(highValue);
+ return byteBuf.getDouble();
+ }
+
+ public void setHighValue(byte[] b) {
+ this.highValue = b;
+ }
+
+ public void setLowValue(byte[] b) {
+ this.lowValue = b;
+ }
+
+ public long getNumNulls() {
+ return numNulls;
+ }
+
+ public void setNumNulls(long numNulls) {
+ this.numNulls = numNulls;
+ }
+
+ public long getNumDVs() {
+ return numDVs;
+ }
+
+ public void setNumDVs(long numDVs) {
+ this.numDVs = numDVs;
+ }
+
+ public double getAvgColLen() {
+ return avgColLen;
+ }
+
+ public void setAvgColLen(double avgColLen) {
+ this.avgColLen = avgColLen;
+ }
+
+ public long getMaxColLen() {
+ return maxColLen;
+ }
+
+ public void setMaxColLen(long maxColLen) {
+ this.maxColLen = maxColLen;
+ }
+
+ public long getNumTrues() {
+ return numTrues;
+ }
+
+ public void setNumTrues(long numTrues) {
+ this.numTrues = numTrues;
+ }
+
+ public long getNumFalses() {
+ return numFalses;
+ }
+
+ public void setNumFalses(long numFalses) {
+ this.numFalses = numFalses;
+ }
+
+ public long getLastAnalyzed() {
+ return lastAnalyzed;
+ }
+
+ public void setLastAnalyzed(long lastAnalyzed) {
+ this.lastAnalyzed = lastAnalyzed;
+ }
+
+ public String getDbName() {
+ return dbName;
+ }
+
+ public void setDbName(String dbName) {
+ this.dbName = dbName;
+ }
+
+ public MPartition getPartition() {
+ return partition;
+ }
+
+ public void setPartition(MPartition partition) {
+ this.partition = partition;
+ }
+
+ public String getPartitionName() {
+ return partitionName;
+ }
+
+ public void setPartitionName(String partitionName) {
+ this.partitionName = partitionName;
+ }
+
+ public String getColType() {
+ return colType;
+ }
+
+ public void setColType(String colType) {
+ this.colType = colType;
+ }
+
+ public void setBooleanStats(long numTrues, long numFalses, long numNulls) {
+ this.numTrues = numTrues;
+ this.numFalses = numFalses;
+ this.numNulls = numNulls;
+ }
+
+ public void setLongStats(long numNulls, long numNDVs, long lowValue, long highValue) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ byte[] bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(lowValue).array();
+ this.lowValue = bytes;
+ bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(highValue).array();
+ this.highValue = bytes;
+ }
+
+ public void setDoubleStats(long numNulls, long numNDVs, double lowValue, double highValue) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ byte[] bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(lowValue).array();
+ this.lowValue = bytes;
+ bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(highValue).array();
+ this.highValue = bytes;
+ }
+
+ public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ this.maxColLen = maxColLen;
+ this.avgColLen = avgColLen;
+ }
+
+ public void setBinaryStats(long numNulls, long maxColLen, double avgColLen) {
+ this.numNulls = numNulls;
+ this.maxColLen = maxColLen;
+ this.avgColLen = avgColLen;
+ }
+}
\ No newline at end of file
diff --git metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java
new file mode 100644
index 0000000..19c2a36
--- /dev/null
+++ metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java
@@ -0,0 +1,223 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ */
+package org.apache.hadoop.hive.metastore.model;
+
+import java.nio.ByteBuffer;
+
+
+/**
+ *
+ * MTableColumnStatistics - Represents Hive's Column Statistics Description. The fields in this
+ * class with the exception of table are persisted in the metastore. In case of table, tbl_id is
+ * persisted in its place.
+ *
+ */
+public class MTableColumnStatistics {
+
+ private MTable table;
+ private String dbName;
+ private String tableName;
+ private String colName;
+ private String colType;
+
+ private byte[] lowValue;
+ private byte[] highValue;
+ private long numNulls;
+ private long numDVs;
+ private double avgColLen;
+ private long maxColLen;
+ private long numTrues;
+ private long numFalses;
+ private long lastAnalyzed;
+
+ public MTableColumnStatistics() {}
+
+ public MTable getTable() {
+ return table;
+ }
+
+ public void setTable(MTable table) {
+ this.table = table;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public void setTableName(String tableName) {
+ this.tableName = tableName;
+ }
+
+ public String getColName() {
+ return colName;
+ }
+
+ public void setColName(String colName) {
+ this.colName = colName;
+ }
+
+ public String getColType() {
+ return colType;
+ }
+
+ public void setColType(String colType) {
+ this.colType = colType;
+ }
+
+ public byte[] getLowValue() {
+ return lowValue;
+ }
+
+ public long getLowValueAsLong() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(lowValue);
+ return byteBuf.getLong();
+ }
+
+ public double getLowValueAsDouble() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(lowValue);
+ return byteBuf.getDouble();
+ }
+
+ public byte[] getHighValue() {
+ return highValue;
+ }
+
+ public long getHighValueAsLong() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(highValue);
+ return byteBuf.getLong();
+ }
+
+ public double getHighValueAsDouble() {
+ ByteBuffer byteBuf = ByteBuffer.wrap(highValue);
+ return byteBuf.getDouble();
+ }
+
+ public void setHighValue(byte[] b) {
+ this.highValue = b;
+ }
+
+ public void setLowValue(byte[] b) {
+ this.lowValue = b;
+ }
+
+ public long getNumNulls() {
+ return numNulls;
+ }
+
+
+ public void setNumNulls(long numNulls) {
+ this.numNulls = numNulls;
+ }
+
+ public long getNumDVs() {
+ return numDVs;
+ }
+
+ public void setNumDVs(long numDVs) {
+ this.numDVs = numDVs;
+ }
+
+ public double getAvgColLen() {
+ return avgColLen;
+ }
+
+ public void setAvgColLen(double avgColLen) {
+ this.avgColLen = avgColLen;
+ }
+
+ public long getMaxColLen() {
+ return maxColLen;
+ }
+
+ public void setMaxColLen(long maxColLen) {
+ this.maxColLen = maxColLen;
+ }
+
+ public long getNumTrues() {
+ return numTrues;
+ }
+
+ public void setNumTrues(long numTrues) {
+ this.numTrues = numTrues;
+ }
+
+ public long getNumFalses() {
+ return numFalses;
+ }
+
+ public void setNumFalses(long numFalses) {
+ this.numFalses = numFalses;
+ }
+
+ public long getLastAnalyzed() {
+ return lastAnalyzed;
+ }
+
+ public void setLastAnalyzed(long lastAnalyzed) {
+ this.lastAnalyzed = lastAnalyzed;
+ }
+
+ public String getDbName() {
+ return dbName;
+ }
+
+ public void setDbName(String dbName) {
+ this.dbName = dbName;
+ }
+
+ public void setBooleanStats(long numTrues, long numFalses, long numNulls) {
+ this.numTrues = numTrues;
+ this.numFalses = numFalses;
+ this.numNulls = numNulls;
+ }
+
+ public void setLongStats(long numNulls, long numNDVs, long lowValue, long highValue) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ byte[] bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(lowValue).array();
+ this.lowValue = bytes;
+ bytes = ByteBuffer.allocate(Long.SIZE/8).putLong(highValue).array();
+ this.highValue = bytes;
+ }
+
+ public void setDoubleStats(long numNulls, long numNDVs, double lowValue, double highValue) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ byte[] bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(lowValue).array();
+ this.lowValue = bytes;
+ bytes = ByteBuffer.allocate(Double.SIZE/8).putDouble(highValue).array();
+ this.highValue = bytes;
+ }
+
+ public void setStringStats(long numNulls, long numNDVs, long maxColLen, double avgColLen) {
+ this.numNulls = numNulls;
+ this.numDVs = numNDVs;
+ this.maxColLen = maxColLen;
+ this.avgColLen = avgColLen;
+ }
+
+ public void setBinaryStats(long numNulls, long maxColLen, double avgColLen) {
+ this.numNulls = numNulls;
+ this.maxColLen = maxColLen;
+ this.avgColLen = avgColLen;
+ }
+}
\ No newline at end of file
diff --git metastore/src/model/package.jdo metastore/src/model/package.jdo
index 38ce6d5..703b110 100644
--- metastore/src/model/package.jdo
+++ metastore/src/model/package.jdo
@@ -751,5 +751,104 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
index 528a100..f7bc7d7 100644
--- metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
+++ metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
@@ -25,8 +25,10 @@ import junit.framework.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -512,4 +514,46 @@ public class DummyRawStoreForJdoConnection implements RawStore {
return 0;
}
+ @Override
+ public ColumnStatistics getTableColumnStatistics(String dbName, String tableName, String colName)
+ throws MetaException, NoSuchObjectException {
+ return null;
+ }
+
+
+ @Override
+ public boolean deleteTableColumnStatistics(String dbName, String tableName,
+ String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException {
+ return false;
+ }
+
+
+ public boolean deletePartitionColumnStatistics(String dbName, String tableName,
+ String partName, List partVals, String colName)
+ throws NoSuchObjectException, MetaException, InvalidObjectException,
+ InvalidInputException {
+ return false;
+
+ }
+
+ @Override
+ public ColumnStatistics getPartitionColumnStatistics(String dbName, String tableName,
+ String partName, List partVal, String colName) throws MetaException,
+ NoSuchObjectException, InvalidInputException, InvalidObjectException {
+ return null;
+ }
+
+ @Override
+ public boolean updateTableColumnStatistics(ColumnStatistics statsObj)
+ throws NoSuchObjectException, MetaException, InvalidObjectException {
+ return false;
+ }
+
+ public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj,List partVals)
+ throws NoSuchObjectException, MetaException, InvalidObjectException {
+ return false;
+ }
}
+
+
diff --git metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
index 925938d..e857002 100644
--- metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
+++ metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
@@ -37,8 +37,13 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
@@ -49,6 +54,7 @@ import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.Type;
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
@@ -1228,6 +1234,187 @@ public abstract class TestHiveMetaStore extends TestCase {
}
}
+ public void testColumnStatistics() throws Throwable {
+
+ String dbName = "columnstatstestdb";
+ String tblName = "tbl";
+ String typeName = "Person";
+ String tblOwner = "testowner";
+ int lastAccessed = 6796;
+
+ try {
+ cleanUp(dbName, tblName, typeName);
+ Database db = new Database();
+ db.setName(dbName);
+ client.createDatabase(db);
+ createTableForTestFilter(dbName,tblName, tblOwner, lastAccessed, true);
+
+ // Create a ColumnStatistics Obj
+ String[] colName = new String[]{"income", "name"};
+ double lowValue = 50000.21;
+ double highValue = 1200000.4525;
+ long numNulls = 3;
+ long numDVs = 22;
+ double avgColLen = 50.30;
+ long maxColLen = 102;
+ String[] colType = new String[] {"double", "string"};
+ boolean isTblLevel = true;
+ String partName = null;
+ List statsObjs = new ArrayList();
+
+ ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
+ statsDesc.setDbName(dbName);
+ statsDesc.setTableName(tblName);
+ statsDesc.setIsTblLevel(isTblLevel);
+ statsDesc.setPartName(partName);
+
+ ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
+ statsObj.setColName(colName[0]);
+ statsObj.setColType(colType[0]);
+
+ ColumnStatisticsData statsData = new ColumnStatisticsData();
+ DoubleColumnStatsData numericStats = new DoubleColumnStatsData();
+ statsData.setDoubleStats(numericStats);
+
+ statsData.getDoubleStats().setHighValue(highValue);
+ statsData.getDoubleStats().setLowValue(lowValue);
+ statsData.getDoubleStats().setNumDVs(numDVs);
+ statsData.getDoubleStats().setNumNulls(numNulls);
+
+ statsObj.setStatsData(statsData);
+ statsObjs.add(statsObj);
+
+ statsObj = new ColumnStatisticsObj();
+ statsObj.setColName(colName[1]);
+ statsObj.setColType(colType[1]);
+
+ statsData = new ColumnStatisticsData();
+ StringColumnStatsData stringStats = new StringColumnStatsData();
+ statsData.setStringStats(stringStats);
+ statsData.getStringStats().setAvgColLen(avgColLen);
+ statsData.getStringStats().setMaxColLen(maxColLen);
+ statsData.getStringStats().setNumDVs(numDVs);
+ statsData.getStringStats().setNumNulls(numNulls);
+
+ statsObj.setStatsData(statsData);
+ statsObjs.add(statsObj);
+
+ ColumnStatistics colStats = new ColumnStatistics();
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+
+ // write stats objs persistently
+ client.updateTableColumnStatistics(colStats);
+
+ // retrieve the stats obj that was just written
+ ColumnStatistics colStats2 = client.getTableColumnStatistics(dbName, tblName, colName[0]);
+
+ // compare stats obj to ensure what we get is what we wrote
+ assertNotNull(colStats2);
+ assertEquals(colStats2.getStatsDesc().getDbName(), dbName);
+ assertEquals(colStats2.getStatsDesc().getTableName(), tblName);
+ assertEquals(colStats2.getStatsObj().get(0).getColName(), colName[0]);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getLowValue(),
+ lowValue);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getHighValue(),
+ highValue);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getNumNulls(),
+ numNulls);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getNumDVs(),
+ numDVs);
+ assertEquals(colStats2.getStatsDesc().isIsTblLevel(), isTblLevel);
+
+ // test delete column stats; if no col name is passed all column stats associated with the
+ // table is deleted
+ boolean status = client.deleteTableColumnStatistics(dbName, tblName, null);
+ assertTrue(status);
+ // try to query stats for a column for which stats doesn't exist
+ try {
+ colStats2 = client.getTableColumnStatistics(dbName, tblName, colName[1]);
+ assertTrue(true);
+ } catch (NoSuchObjectException e) {
+ System.out.println("Statistics for column=" + colName[1] + " not found");
+ }
+
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+
+ // update table level column stats
+ client.updateTableColumnStatistics(colStats);
+
+ // query column stats for column whose stats were updated in the previous call
+ colStats2 = client.getTableColumnStatistics(dbName, tblName, colName[0]);
+
+ // partition level column statistics test
+ // create a table with multiple partitions
+ cleanUp(dbName, tblName, typeName);
+
+ List> values = new ArrayList>();
+ values.add(makeVals("2008-07-01 14:13:12", "14"));
+ values.add(makeVals("2008-07-01 14:13:12", "15"));
+ values.add(makeVals("2008-07-02 14:13:12", "15"));
+ values.add(makeVals("2008-07-03 14:13:12", "151"));
+
+ createMultiPartitionTableSchema(dbName, tblName, typeName, values);
+
+ List partitions = client.listPartitionNames(dbName, tblName, (short)-1);
+
+ partName = partitions.get(0);
+ isTblLevel = false;
+
+ // create a new columnstatistics desc to represent partition level column stats
+ statsDesc = new ColumnStatisticsDesc();
+ statsDesc.setDbName(dbName);
+ statsDesc.setTableName(tblName);
+ statsDesc.setPartName(partName);
+ statsDesc.setIsTblLevel(isTblLevel);
+
+ colStats = new ColumnStatistics();
+ colStats.setStatsDesc(statsDesc);
+ colStats.setStatsObj(statsObjs);
+
+ client.updatePartitionColumnStatistics(colStats);
+
+ colStats2 = client.getPartitionColumnStatistics(dbName, tblName, partName, colName[1]);
+
+ // compare stats obj to ensure what we get is what we wrote
+ assertNotNull(colStats2);
+ assertEquals(colStats2.getStatsDesc().getDbName(), dbName);
+ assertEquals(colStats2.getStatsDesc().getTableName(), tblName);
+ assertEquals(colStats.getStatsDesc().getPartName(), partName);
+ assertEquals(colStats2.getStatsObj().get(0).getColName(), colName[1]);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getMaxColLen(),
+ maxColLen);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getAvgColLen(),
+ avgColLen);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getNumNulls(),
+ numNulls);
+ assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getNumDVs(),
+ numDVs);
+ assertEquals(colStats2.getStatsDesc().isIsTblLevel(), isTblLevel);
+
+ // test stats deletion at partition level
+ client.deletePartitionColumnStatistics(dbName, tblName, partName, colName[1]);
+
+ colStats2 = client.getPartitionColumnStatistics(dbName, tblName, partName, colName[0]);
+
+ // test get stats on a column for which stats doesn't exist
+ try {
+ colStats2 = client.getPartitionColumnStatistics(dbName, tblName, partName, colName[1]);
+ assertTrue(true);
+ } catch (NoSuchObjectException e) {
+ System.out.println("Statistics for column=" + colName[1] + " not found");
+ }
+
+ } catch (Exception e) {
+ System.err.println(StringUtils.stringifyException(e));
+ System.err.println("testColumnStatistics() failed.");
+ throw e;
+ } finally {
+ cleanUp(dbName, tblName, typeName);
+ }
+ }
+
public void testAlterTable() throws Exception {
String dbName = "alterdb";
String invTblName = "alter-tbl";
diff --git ql/build.xml ql/build.xml
index 80b7f79..207cb5b 100644
--- ql/build.xml
+++ ql/build.xml
@@ -214,6 +214,7 @@
+
diff --git ql/if/queryplan.thrift ql/if/queryplan.thrift
index 05fbf58..4427929 100644
--- ql/if/queryplan.thrift
+++ ql/if/queryplan.thrift
@@ -91,6 +91,7 @@ enum StageType {
MOVE,
STATS,
DEPENDENCY_COLLECTION,
+ COLUMNSTATS,
}
struct Stage {
diff --git ql/ivy.xml ql/ivy.xml
index 2c4410a..d5a318b 100644
--- ql/ivy.xml
+++ ql/ivy.xml
@@ -74,7 +74,7 @@
-
+
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
new file mode 100644
index 0000000..d2dc853
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
@@ -0,0 +1,384 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.ql.CommandNeedRetryException;
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.DriverContext;
+import org.apache.hadoop.hive.ql.QueryPlan;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ColumnStatsWork;
+import org.apache.hadoop.hive.ql.plan.FetchWork;
+import org.apache.hadoop.hive.ql.plan.api.StageType;
+import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * ColumnStatsTask implementation.
+ **/
+
+public class ColumnStatsTask extends Task implements Serializable {
+ private static final long serialVersionUID = 1L;
+ private FetchOperator ftOp;
+ private static transient final Log LOG = LogFactory.getLog(ColumnStatsTask.class);
+
+ public ColumnStatsTask() {
+ super();
+ }
+
+ @Override
+ public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) {
+ super.initialize(conf, queryPlan, ctx);
+ try {
+ JobConf job = new JobConf(conf, ExecDriver.class);
+ ftOp = new FetchOperator(work.getfWork(), job);
+ } catch (Exception e) {
+ LOG.error(StringUtils.stringifyException(e));
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void unpackBooleanStats(ObjectInspector oi, Object o, String fName,
+ ColumnStatisticsObj statsObj) {
+ long v = ((LongObjectInspector) oi).get(o);
+ if (fName.equals("counttrues")) {
+ statsObj.getStatsData().getBooleanStats().setNumTrues(v);
+ } else if (fName.equals("countfalses")) {
+ statsObj.getStatsData().getBooleanStats().setNumFalses(v);
+ } else if (fName.equals("countnulls")) {
+ statsObj.getStatsData().getBooleanStats().setNumNulls(v);
+ }
+ }
+
+ private void unpackDoubleStats(ObjectInspector oi, Object o, String fName,
+ ColumnStatisticsObj statsObj) {
+ if (fName.equals("countnulls")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getDoubleStats().setNumNulls(v);
+ } else if (fName.equals("numdistinctvalues")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getDoubleStats().setNumDVs(v);
+ } else if (fName.equals("max")) {
+ double d = ((DoubleObjectInspector) oi).get(o);
+ statsObj.getStatsData().getDoubleStats().setHighValue(d);
+ } else if (fName.equals("min")) {
+ double d = ((DoubleObjectInspector) oi).get(o);
+ statsObj.getStatsData().getDoubleStats().setLowValue(d);
+ }
+ }
+
+ private void unpackLongStats(ObjectInspector oi, Object o, String fName,
+ ColumnStatisticsObj statsObj) {
+ if (fName.equals("countnulls")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getLongStats().setNumNulls(v);
+ } else if (fName.equals("numdistinctvalues")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getLongStats().setNumDVs(v);
+ } else if (fName.equals("max")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getLongStats().setHighValue(v);
+ } else if (fName.equals("min")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getLongStats().setLowValue(v);
+ }
+ }
+
+ private void unpackStringStats(ObjectInspector oi, Object o, String fName,
+ ColumnStatisticsObj statsObj) {
+ if (fName.equals("countnulls")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getStringStats().setNumNulls(v);
+ } else if (fName.equals("numdistinctvalues")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getStringStats().setNumDVs(v);
+ } else if (fName.equals("avglength")) {
+ double d = ((DoubleObjectInspector) oi).get(o);
+ statsObj.getStatsData().getStringStats().setAvgColLen(d);
+ } else if (fName.equals("maxlength")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getStringStats().setMaxColLen(v);
+ }
+ }
+
+ private void unpackBinaryStats(ObjectInspector oi, Object o, String fName,
+ ColumnStatisticsObj statsObj) {
+ if (fName.equals("countnulls")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getBinaryStats().setNumNulls(v);
+ } else if (fName.equals("avglength")) {
+ double d = ((DoubleObjectInspector) oi).get(o);
+ statsObj.getStatsData().getBinaryStats().setAvgColLen(d);
+ } else if (fName.equals("maxlength")) {
+ long v = ((LongObjectInspector) oi).get(o);
+ statsObj.getStatsData().getBinaryStats().setMaxColLen(v);
+ }
+ }
+
+ private void unpackPrimitiveObject (ObjectInspector oi, Object o, String fieldName,
+ ColumnStatisticsObj statsObj) {
+ // First infer the type of object
+ if (fieldName.equals("columntype")) {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+ String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+ ColumnStatisticsData statsData = new ColumnStatisticsData();
+
+ if (s.equalsIgnoreCase("long")) {
+ LongColumnStatsData longStats = new LongColumnStatsData();
+ statsData.setLongStats(longStats);
+ statsObj.setStatsData(statsData);
+ } else if (s.equalsIgnoreCase("double")) {
+ DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+ statsData.setDoubleStats(doubleStats);
+ statsObj.setStatsData(statsData);
+ } else if (s.equalsIgnoreCase("string")) {
+ StringColumnStatsData stringStats = new StringColumnStatsData();
+ statsData.setStringStats(stringStats);
+ statsObj.setStatsData(statsData);
+ } else if (s.equalsIgnoreCase("boolean")) {
+ BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
+ statsData.setBooleanStats(booleanStats);
+ statsObj.setStatsData(statsData);
+ } else if (s.equalsIgnoreCase("binary")) {
+ BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
+ statsData.setBinaryStats(binaryStats);
+ statsObj.setStatsData(statsData);
+ }
+ } else {
+ // invoke the right unpack method depending on data type of the column
+ if (statsObj.getStatsData().isSetBooleanStats()) {
+ unpackBooleanStats(oi, o, fieldName, statsObj);
+ } else if (statsObj.getStatsData().isSetLongStats()) {
+ unpackLongStats(oi, o, fieldName, statsObj);
+ } else if (statsObj.getStatsData().isSetDoubleStats()) {
+ unpackDoubleStats(oi,o,fieldName, statsObj);
+ } else if (statsObj.getStatsData().isSetStringStats()) {
+ unpackStringStats(oi, o, fieldName, statsObj);
+ } else if (statsObj.getStatsData().isSetBinaryStats()) {
+ unpackBinaryStats(oi, o, fieldName, statsObj);
+ }
+ }
+ }
+
+ private void unpackStructObject(ObjectInspector oi, Object o, String fName,
+ ColumnStatisticsObj cStatsObj) {
+ if (oi.getCategory() != ObjectInspector.Category.STRUCT) {
+ throw new RuntimeException("Invalid object datatype : " + oi.getCategory().toString());
+ }
+
+ StructObjectInspector soi = (StructObjectInspector) oi;
+ List extends StructField> fields = soi.getAllStructFieldRefs();
+ List