From 47ec51405e32f5aac63a4daae38f3e30bb310462 Mon Sep 17 00:00:00 2001 From: Alexander Kolbasov Date: Fri, 10 Aug 2018 17:26:53 -0700 Subject: [PATCH 1/1] HIVE-20195: Split MetastoreUtils into common and server-specific parts --- .../hcatalog/listener/DummyRawStoreFailEvent.java | 2 +- .../AbstractTestAuthorizationApiAuthorizer.java | 2 - .../org/apache/hadoop/hive/ql/metadata/Hive.java | 5 +- .../org/apache/hadoop/hive/ql/metadata/Table.java | 3 +- .../hadoop/hive/ql/stats/BasicStatsTask.java | 5 +- .../hadoop/hive/metastore/HiveAlterHandler.java | 25 +- .../hadoop/hive/metastore/HiveMetaStore.java | 53 +- .../hadoop/hive/metastore/MetaStoreDirectSql.java | 13 +- .../apache/hadoop/hive/metastore/ObjectStore.java | 19 +- .../org/apache/hadoop/hive/metastore/RawStore.java | 2 +- .../hadoop/hive/metastore/cache/CachedStore.java | 5 +- .../hadoop/hive/metastore/cache/SharedCache.java | 8 +- .../aggr/BinaryColumnStatsAggregator.java | 2 +- .../aggr/BooleanColumnStatsAggregator.java | 2 +- .../columnstats/aggr/ColumnStatsAggregator.java | 2 +- .../aggr/DateColumnStatsAggregator.java | 2 +- .../aggr/DecimalColumnStatsAggregator.java | 31 +- .../aggr/DoubleColumnStatsAggregator.java | 2 +- .../aggr/LongColumnStatsAggregator.java | 3 +- .../aggr/StringColumnStatsAggregator.java | 3 +- .../hadoop/hive/metastore/txn/TxnHandler.java | 4 +- .../metastore/utils/HiveStrictManagedUtils.java | 6 +- .../hive/metastore/utils/MetaStoreServerUtils.java | 939 +++++++++++++++++++++ .../hive/metastore/utils/MetaStoreUtils.java | 904 +------------------- .../metastore/DummyRawStoreControlledCommit.java | 2 +- .../metastore/DummyRawStoreForJdoConnection.java | 4 +- .../hadoop/hive/metastore/TestHiveMetaStore.java | 5 +- .../metastore/TestHiveMetaStoreGetMetaConf.java | 4 +- .../hive/metastore/utils/TestMetaStoreUtils.java | 20 +- 29 files changed, 1057 insertions(+), 1020 deletions(-) create mode 100644 standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index be40395cc3..0ad2a2469e 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -91,7 +91,7 @@ import org.apache.hadoop.hive.metastore.api.WMNullablePool; import org.apache.hadoop.hive.metastore.api.WriteEventInfo; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.apache.thrift.TException; /** diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/AbstractTestAuthorizationApiAuthorizer.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/AbstractTestAuthorizationApiAuthorizer.java index 69692d0c19..c10060f817 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/AbstractTestAuthorizationApiAuthorizer.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/AbstractTestAuthorizationApiAuthorizer.java @@ -31,8 +31,6 @@ import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.PrivilegeBag; import org.apache.hadoop.hive.metastore.api.Role; -import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.security.authorization.MetaStoreAuthzAPIAuthorizerEmbedOnly; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener; import org.apache.thrift.TException; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 45610e37ab..f846e93ce5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -104,6 +104,7 @@ import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.ReplChangeManager; import org.apache.hadoop.hive.metastore.api.*; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.AbstractFileMergeOperator; @@ -1957,10 +1958,10 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par newPartPath, -1, newPartPath.getFileSystem(conf)); } if (filesForStats != null) { - MetaStoreUtils.populateQuickStats(filesForStats, newTPart.getParameters()); + MetaStoreServerUtils.populateQuickStats(filesForStats, newTPart.getParameters()); } else { // The ACID state is probably absent. Warning is logged in the get method. - MetaStoreUtils.clearQuickStats(newTPart.getParameters()); + MetaStoreServerUtils.clearQuickStats(newTPart.getParameters()); } try { LOG.debug("Adding new partition " + newTPart.getSpec()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 9e0cea7af6..26f21cf8c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; @@ -1073,7 +1074,7 @@ public static void validateColumns(List columns, List } private static String normalize(String colName) throws HiveException { - if (!MetaStoreUtils.validateColumnName(colName)) { + if (!MetaStoreServerUtils.validateColumnName(colName)) { throw new HiveException("Invalid column name '" + colName + "' in the table definition"); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index b9b4a442b7..6eb1ca2645 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -145,7 +146,7 @@ public Object process(StatsAggregator statsAggregator) throws HiveException, Met : partish.getPartition().getSpec().toString(); LOG.warn("Partition/partfiles is null for: " + spec); if (isMissingAcidState) { - MetaStoreUtils.clearQuickStats(parameters); + MetaStoreServerUtils.clearQuickStats(parameters); return p.getOutput(); } return null; @@ -159,7 +160,7 @@ public Object process(StatsAggregator statsAggregator) throws HiveException, Met StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); } - MetaStoreUtils.populateQuickStats(partfileStatus, parameters); + MetaStoreServerUtils.populateQuickStats(partfileStatus, parameters); if (statsAggregator != null) { // Update stats for transactional tables (MM, or full ACID with overwrite), even diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 69f6ed570e..0441a33cd7 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.metastore.events.AlterTableEvent; import org.apache.hadoop.hive.metastore.messaging.EventMessage; import org.apache.hadoop.hive.metastore.utils.FileUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -110,7 +111,7 @@ public void alterTable(RawStore msdb, Warehouse wh, String catName, String dbnam if (!MetaStoreUtils.validateName(newTblName, handler.getConf())) { throw new InvalidOperationException(newTblName + " is not a valid object name"); } - String validate = MetaStoreUtils.validateTblColumns(newt.getSd().getCols()); + String validate = MetaStoreServerUtils.validateTblColumns(newt.getSd().getCols()); if (validate != null) { throw new InvalidOperationException("Invalid column " + validate); } @@ -318,12 +319,12 @@ public void alterTable(RawStore msdb, Warehouse wh, String catName, String dbnam !isPartitionedTable) { Database db = msdb.getDatabase(catName, newDbName); // Update table stats. For partitioned table, we update stats in alterPartition() - MetaStoreUtils.updateTableStatsSlow(db, newt, wh, false, true, environmentContext); + MetaStoreServerUtils.updateTableStatsSlow(db, newt, wh, false, true, environmentContext); } if (isPartitionedTable) { //Currently only column related changes can be cascaded in alter table - if(!MetaStoreUtils.areSameColumns(oldt.getSd().getCols(), newt.getSd().getCols())) { + if(!MetaStoreServerUtils.areSameColumns(oldt.getSd().getCols(), newt.getSd().getCols())) { parts = msdb.getPartitions(catName, dbname, name, -1); for (Partition part : parts) { Partition oldPart = new Partition(part); @@ -467,10 +468,10 @@ public Partition alterPartition(RawStore msdb, Warehouse wh, String catName, Str oldPart = msdb.getPartition(catName, dbname, name, new_part.getValues()); if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) { // if stats are same, no need to update - if (MetaStoreUtils.isFastStatsSame(oldPart, new_part)) { - MetaStoreUtils.updateBasicState(environmentContext, new_part.getParameters()); + if (MetaStoreServerUtils.isFastStatsSame(oldPart, new_part)) { + MetaStoreServerUtils.updateBasicState(environmentContext, new_part.getParameters()); } else { - MetaStoreUtils.updatePartitionStatsFast( + MetaStoreServerUtils.updatePartitionStatsFast( new_part, tbl, wh, false, true, environmentContext, false); } } @@ -610,7 +611,7 @@ public Partition alterPartition(RawStore msdb, Warehouse wh, String catName, Str } if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) { - MetaStoreUtils.updatePartitionStatsFast( + MetaStoreServerUtils.updatePartitionStatsFast( new_part, tbl, wh, false, true, environmentContext, false); } @@ -711,10 +712,10 @@ public Partition alterPartition(RawStore msdb, Warehouse wh, String catName, Str if (MetaStoreUtils.requireCalStats(oldTmpPart, tmpPart, tbl, environmentContext)) { // Check if stats are same, no need to update - if (MetaStoreUtils.isFastStatsSame(oldTmpPart, tmpPart)) { - MetaStoreUtils.updateBasicState(environmentContext, tmpPart.getParameters()); + if (MetaStoreServerUtils.isFastStatsSame(oldTmpPart, tmpPart)) { + MetaStoreServerUtils.updateBasicState(environmentContext, tmpPart.getParameters()); } else { - MetaStoreUtils.updatePartitionStatsFast( + MetaStoreServerUtils.updatePartitionStatsFast( tmpPart, tbl, wh, false, true, environmentContext, false); } } @@ -810,7 +811,7 @@ void alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTa List newStatsObjs = new ArrayList<>(); ColumnStatistics colStats = null; boolean updateColumnStats = !newDbName.equals(dbName) || !newTableName.equals(tableName) - || !MetaStoreUtils.columnsIncludedByNameType(oldCols, newCols); + || !MetaStoreServerUtils.columnsIncludedByNameType(oldCols, newCols); if (updateColumnStats) { List oldColNames = new ArrayList<>(oldCols.size()); for (FieldSchema oldCol : oldCols) { @@ -886,7 +887,7 @@ private ColumnStatistics updateOrGetPartitionColumnStats( || !oldPartName.equals(newPartName); // do not need to update column stats if alter partition is not for rename or changing existing columns - if (!rename && MetaStoreUtils.columnsIncludedByNameType(oldCols, newCols)) { + if (!rename && MetaStoreServerUtils.columnsIncludedByNameType(oldCols, newCols)) { return newPartsColStats; } List oldColNames = new ArrayList<>(oldCols.size()); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index a53d4be03d..8d6a11ec96 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -152,6 +152,7 @@ import org.apache.hadoop.hive.metastore.security.TUGIContainingTransport; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.hive.metastore.utils.CommonCliOptions; import org.apache.hadoop.hive.metastore.utils.FileUtils; @@ -521,7 +522,7 @@ public Configuration getHiveConf() { @Override public void init() throws MetaException { - initListeners = MetaStoreUtils.getMetaStoreListeners( + initListeners = MetaStoreServerUtils.getMetaStoreListeners( MetaStoreInitListener.class, conf, MetastoreConf.getVar(conf, ConfVars.INIT_HOOKS)); for (MetaStoreInitListener singleInitListener: initListeners) { MetaStoreInitContext context = new MetaStoreInitContext(); @@ -553,20 +554,20 @@ public void init() throws MetaException { } - preListeners = MetaStoreUtils.getMetaStoreListeners(MetaStorePreEventListener.class, + preListeners = MetaStoreServerUtils.getMetaStoreListeners(MetaStorePreEventListener.class, conf, MetastoreConf.getVar(conf, ConfVars.PRE_EVENT_LISTENERS)); preListeners.add(0, new TransactionalValidationListener(conf)); - listeners = MetaStoreUtils.getMetaStoreListeners(MetaStoreEventListener.class, conf, + listeners = MetaStoreServerUtils.getMetaStoreListeners(MetaStoreEventListener.class, conf, MetastoreConf.getVar(conf, ConfVars.EVENT_LISTENERS)); listeners.add(new SessionPropertiesListener(conf)); listeners.add(new AcidEventListener(conf)); - transactionalListeners = MetaStoreUtils.getMetaStoreListeners(TransactionalMetaStoreEventListener.class, + transactionalListeners = MetaStoreServerUtils.getMetaStoreListeners(TransactionalMetaStoreEventListener.class, conf, MetastoreConf.getVar(conf, ConfVars.TRANSACTIONAL_EVENT_LISTENERS)); if (Metrics.getRegistry() != null) { listeners.add(new HMSMetricsListener(conf)); } - endFunctionListeners = MetaStoreUtils.getMetaStoreListeners( + endFunctionListeners = MetaStoreServerUtils.getMetaStoreListeners( MetaStoreEndFunctionListener.class, conf, MetastoreConf.getVar(conf, ConfVars.END_FUNCTION_LISTENERS)); String partitionValidationRegex = @@ -1794,23 +1795,23 @@ private void create_table_core(final RawStore ms, final Table tbl, throw new InvalidObjectException(tbl.getTableName() + " is not a valid object name"); } - String validate = MetaStoreUtils.validateTblColumns(tbl.getSd().getCols()); + String validate = MetaStoreServerUtils.validateTblColumns(tbl.getSd().getCols()); if (validate != null) { throw new InvalidObjectException("Invalid column " + validate); } if (tbl.getPartitionKeys() != null) { - validate = MetaStoreUtils.validateTblColumns(tbl.getPartitionKeys()); + validate = MetaStoreServerUtils.validateTblColumns(tbl.getPartitionKeys()); if (validate != null) { throw new InvalidObjectException("Invalid partition column " + validate); } } SkewedInfo skew = tbl.getSd().getSkewedInfo(); if (skew != null) { - validate = MetaStoreUtils.validateSkewedColNames(skew.getSkewedColNames()); + validate = MetaStoreServerUtils.validateSkewedColNames(skew.getSkewedColNames()); if (validate != null) { throw new InvalidObjectException("Invalid skew column " + validate); } - validate = MetaStoreUtils.validateSkewedColNamesSubsetCol( + validate = MetaStoreServerUtils.validateSkewedColNamesSubsetCol( skew.getSkewedColNames(), tbl.getSd().getCols()); if (validate != null) { throw new InvalidObjectException("Invalid skew column " + validate); @@ -1861,8 +1862,8 @@ private void create_table_core(final RawStore ms, final Table tbl, } } if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updateTableStatsSlow(db, tbl, wh, madeDir, false, envContext); + !MetaStoreServerUtils.isView(tbl)) { + MetaStoreServerUtils.updateTableStatsSlow(db, tbl, wh, madeDir, false, envContext); } // set create time @@ -3127,7 +3128,7 @@ private Partition append_partition_common(RawStore ms, String catName, String db part.setTableName(tableName); part.setValues(part_vals); - MetaStoreUtils.validatePartitionNameCharacters(part_vals, partitionValidationPattern); + MetaStoreServerUtils.validatePartitionNameCharacters(part_vals, partitionValidationPattern); tbl = ms.getTable(part.getCatName(), part.getDbName(), part.getTableName(), null); if (tbl == null) { @@ -3174,8 +3175,8 @@ private Partition append_partition_common(RawStore ms, String catName, String db part.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(time)); if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, envContext, true); + !MetaStoreServerUtils.isView(tbl)) { + MetaStoreServerUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, envContext, true); } if (ms.addPartition(part)) { @@ -3734,7 +3735,7 @@ private int add_partitions_pspec_core(RawStore ms, String catName, String dbName private boolean startAddPartition( RawStore ms, Partition part, List partitionKeys, boolean ifNotExists) throws TException { - MetaStoreUtils.validatePartitionNameCharacters(part.getValues(), + MetaStoreServerUtils.validatePartitionNameCharacters(part.getValues(), partitionValidationPattern); boolean doesExist = ms.doesPartitionExist(part.getCatName(), part.getDbName(), part.getTableName(), partitionKeys, part.getValues()); @@ -3798,8 +3799,8 @@ private void initializeAddedPartition( private void initializeAddedPartition( final Table tbl, final PartitionSpecProxy.PartitionIterator part, boolean madeDir) throws MetaException { if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, null, true); + !MetaStoreServerUtils.isView(tbl)) { + MetaStoreServerUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, null, true); } // set create time @@ -4883,7 +4884,7 @@ private void rename_partition(String catName, String db_name, String tbl_name, try { firePreEvent(new PreAlterPartitionEvent(db_name, tbl_name, part_vals, new_part, this)); if (part_vals != null && !part_vals.isEmpty()) { - MetaStoreUtils.validatePartitionNameCharacters(new_part.getValues(), + MetaStoreServerUtils.validatePartitionNameCharacters(new_part.getValues(), partitionValidationPattern); } @@ -7011,10 +7012,10 @@ public boolean partition_name_has_valid_characters(List part_vals, Exception ex = null; try { if (throw_exception) { - MetaStoreUtils.validatePartitionNameCharacters(part_vals, partitionValidationPattern); + MetaStoreServerUtils.validatePartitionNameCharacters(part_vals, partitionValidationPattern); ret = true; } else { - ret = MetaStoreUtils.partitionNameHasValidCharacters(part_vals, + ret = MetaStoreServerUtils.partitionNameHasValidCharacters(part_vals, partitionValidationPattern); } } catch (MetaException e) { @@ -7610,7 +7611,7 @@ public boolean set_aggr_stats_for(SetPartitionsStatsRequest request) throws TExc for (ColumnStatistics csNew : csNews) { String partName = csNew.getStatsDesc().getPartName(); if (newStatsMap.containsKey(partName)) { - MetaStoreUtils.mergeColStats(csNew, newStatsMap.get(partName)); + MetaStoreServerUtils.mergeColStats(csNew, newStatsMap.get(partName)); } newStatsMap.put(partName, csNew); } @@ -7670,10 +7671,10 @@ private boolean updatePartColumnStatsWithMerge(String catName, String dbName, St csNew.setStatsObj(Lists.newArrayList()); } else { // we first use getParameters() to prune the stats - MetaStoreUtils.getMergableCols(csNew, part.getParameters()); + MetaStoreServerUtils.getMergableCols(csNew, part.getParameters()); // we merge those that can be merged if (csOld != null && csOld.getStatsObjSize() != 0 && !csNew.getStatsObj().isEmpty()) { - MetaStoreUtils.mergeColStats(csNew, csOld); + MetaStoreServerUtils.mergeColStats(csNew, csOld); } } @@ -7726,11 +7727,11 @@ private boolean updateTableColumnStatsWithMerge(String catName, String dbName, S firstColStats.setStatsObj(Lists.newArrayList()); } else { Table t = getTable(catName, dbName, tableName); - MetaStoreUtils.getMergableCols(firstColStats, t.getParameters()); + MetaStoreServerUtils.getMergableCols(firstColStats, t.getParameters()); // we merge those that can be merged if (csOld != null && csOld.getStatsObjSize() != 0 && !firstColStats.getStatsObj().isEmpty()) { - MetaStoreUtils.mergeColStats(firstColStats, csOld); + MetaStoreServerUtils.mergeColStats(firstColStats, csOld); } } @@ -7835,7 +7836,7 @@ private void authorizeProxyPrivilege() throws Exception { LOG.error("Cannot obtain username", ex); throw ex; } - if (!MetaStoreUtils.checkUserHasHostProxyPrivileges(user, conf, getIPAddress())) { + if (!MetaStoreServerUtils.checkUserHasHostProxyPrivileges(user, conf, getIPAddress())) { throw new MetaException("User " + user + " is not allowed to perform this API call"); } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 95d9fe21bd..571c789edd 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -89,8 +89,9 @@ import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeVisitor; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.apache.hive.common.util.BloomFilter; import org.datanucleus.store.rdbms.query.ForwardQueryResult; import org.slf4j.Logger; @@ -384,7 +385,7 @@ public Database getDatabase(String catName, String dbName) throws MetaException{ db.setOwnerType( (null == type || type.trim().isEmpty()) ? null : PrincipalType.valueOf(type)); db.setCatalogName(extractSqlString(dbline[6])); - db.setParameters(MetaStoreUtils.trimMapNulls(dbParams,convertMapNullsToEmptyStrings)); + db.setParameters(MetaStoreServerUtils.trimMapNulls(dbParams,convertMapNullsToEmptyStrings)); if (LOG.isDebugEnabled()){ LOG.debug("getDatabase: directsql returning db " + db.getName() + " locn["+db.getLocationUri() +"] desc [" +db.getDescription() @@ -772,7 +773,7 @@ public void apply(Partition t, Object[] fields) { }}); // Perform conversion of null map values for (Partition t : partitions.values()) { - t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); + t.setParameters(MetaStoreServerUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); } queryText = "select \"PART_ID\", \"PART_KEY_VAL\" from " + PARTITION_KEY_VALS + "" @@ -805,7 +806,7 @@ public void apply(StorageDescriptor t, Object[] fields) { }}); // Perform conversion of null map values for (StorageDescriptor t : sds.values()) { - t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); + t.setParameters(MetaStoreServerUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); } queryText = "select \"SD_ID\", \"COLUMN_NAME\", " + SORT_COLS + ".\"ORDER\"" @@ -947,7 +948,7 @@ public void apply(SerDeInfo t, Object[] fields) { }}); // Perform conversion of null map values for (SerDeInfo t : serdes.values()) { - t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); + t.setParameters(MetaStoreServerUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); } return orderedResult; @@ -1615,7 +1616,7 @@ private long partsFoundForPartitions( List partStats = getPartitionStats(catName, dbName, tableName, partNames, colNames, true); // 2. use util function to aggr stats - return MetaStoreUtils.aggrPartitionStats(partStats, catName, dbName, tableName, partNames, colNames, + return MetaStoreServerUtils.aggrPartitionStats(partStats, catName, dbName, tableName, partNames, colNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 8af164efc9..8e2f94eb69 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -96,13 +96,12 @@ import org.apache.hadoop.hive.metastore.parser.ExpressionTree.FilterBuilder; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.metastore.tools.SQLGenerator; -import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.metastore.utils.FileUtils; import org.apache.hadoop.hive.metastore.utils.JavaUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.metastore.utils.ObjectPair; -import org.apache.hive.common.util.TxnIdUtils; import org.apache.thrift.TException; import org.datanucleus.AbstractNucleusContext; import org.datanucleus.ClassLoaderResolver; @@ -1899,7 +1898,7 @@ private MTable getMTable(String catName, String db, String table) { /** Makes shallow copy of a map to avoid DataNucleus mucking with our objects. */ private Map convertMap(Map dnMap) { - return MetaStoreUtils.trimMapNulls(dnMap, + return MetaStoreServerUtils.trimMapNulls(dnMap, MetastoreConf.getBoolVar(getConf(), ConfVars.ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS)); } @@ -2296,7 +2295,7 @@ public boolean addPartitions(String catName, String dbName, String tblName, List private boolean isValidPartition( Partition part, List partitionKeys, boolean ifNotExists) throws MetaException { - MetaStoreUtils.validatePartitionNameCharacters(part.getValues(), + MetaStoreServerUtils.validatePartitionNameCharacters(part.getValues(), partitionValidationPattern); boolean doesExist = doesPartitionExist(part.getCatName(), part.getDbName(), part.getTableName(), partitionKeys, part.getValues()); @@ -8889,21 +8888,21 @@ protected String describeResult() { } @Override - public List getPartitionColStatsForDatabase(String catName, String dbName) + public List getPartitionColStatsForDatabase(String catName, String dbName) throws MetaException, NoSuchObjectException { final boolean enableBitVector = MetastoreConf.getBoolVar(getConf(), ConfVars.STATS_FETCH_BITVECTOR); - return new GetHelper>( + return new GetHelper>( catName, dbName, null, true, false) { @Override - protected List getSqlResult( - GetHelper> ctx) throws MetaException { + protected List getSqlResult( + GetHelper> ctx) throws MetaException { return directSql.getColStatsForAllTablePartitions(catName, dbName, enableBitVector); } @Override - protected List getJdoResult( - GetHelper> ctx) + protected List getJdoResult( + GetHelper> ctx) throws MetaException, NoSuchObjectException { // This is fast path for query optimizations, if we can find this info // quickly using directSql, do it. No point in failing back to slow path diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java index 8d647a0f6a..b61ee81533 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -32,7 +32,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.apache.thrift.TException; public interface RawStore extends Configurable { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 1d53244484..0445cbf909 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -59,8 +59,9 @@ import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.metastore.utils.FileUtils; import org.apache.hadoop.hive.metastore.utils.JavaUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.apache.hadoop.hive.metastore.utils.StringUtils; import org.apache.thrift.TException; import org.slf4j.Logger; @@ -1902,7 +1903,7 @@ private MergedColumnStatsForPartitions mergeColStatsForPartitions( } // Note that enableBitVector does not apply here because ColumnStatisticsObj // itself will tell whether bitvector is null or not and aggr logic can automatically apply. - return new MergedColumnStatsForPartitions(MetaStoreUtils.aggrPartitionStats(colStatsMap, + return new MergedColumnStatsForPartitions(MetaStoreServerUtils.aggrPartitionStats(colStatsMap, partNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner), partsFound); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java index 24f940ccbf..37c300e882 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java @@ -47,7 +47,7 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.TableMeta; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.StringUtils; import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator; import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator.ObjectEstimator; @@ -709,7 +709,7 @@ private void updateTableObj(Table newTable, SharedCache sharedCache) { } setTable(tblCopy); if (tblCopy.getSd() != null) { - sdHash = MetaStoreUtils.hashStorageDescriptor(tblCopy.getSd(), md); + sdHash = MetaStoreServerUtils.hashStorageDescriptor(tblCopy.getSd(), md); StorageDescriptor sd = tblCopy.getSd(); sharedCache.increSd(sd, sdHash); tblCopy.setSd(null); @@ -727,7 +727,7 @@ private PartitionWrapper makePartitionWrapper(Partition part, SharedCache shared Partition partCopy = part.deepCopy(); PartitionWrapper wrapper; if (part.getSd() != null) { - byte[] sdHash = MetaStoreUtils.hashStorageDescriptor(part.getSd(), md); + byte[] sdHash = MetaStoreServerUtils.hashStorageDescriptor(part.getSd(), md); StorageDescriptor sd = part.getSd(); sharedCache.increSd(sd, sdHash); partCopy.setSd(null); @@ -1146,7 +1146,7 @@ private TableWrapper createTableWrapper(String catName, String dbName, String tb } } if (tbl.getSd() != null) { - byte[] sdHash = MetaStoreUtils.hashStorageDescriptor(tbl.getSd(), md); + byte[] sdHash = MetaStoreServerUtils.hashStorageDescriptor(tbl.getSd(), md); StorageDescriptor sd = tbl.getSd(); increSd(sd, sdHash); tblCopy.setSd(null); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java index c18b4c79bf..c885cf2d44 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; public class BinaryColumnStatsAggregator extends ColumnStatsAggregator { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java index 7630183180..6fafab53e0 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; public class BooleanColumnStatsAggregator extends ColumnStatsAggregator { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregator.java index 0beaf60230..c4325763be 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregator.java @@ -23,7 +23,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; public abstract class ColumnStatsAggregator { public boolean useDensityFunctionForNDVEstimation; diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java index e8ff513f50..0968f0f42d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java @@ -34,7 +34,7 @@ import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java index ac7e8e35f9..643bee5d6a 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java @@ -28,15 +28,14 @@ import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; -import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils; import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -102,7 +101,7 @@ public ColumnStatisticsObj aggregate(List colStatsWit (DecimalColumnStatsDataInspector) cso.getStatsData().getDecimalStats(); lowerBound = Math.max(lowerBound, newData.getNumDVs()); higherBound += newData.getNumDVs(); - densityAvgSum += (MetaStoreUtils.decimalToDouble(newData.getHighValue()) - MetaStoreUtils + densityAvgSum += (MetaStoreServerUtils.decimalToDouble(newData.getHighValue()) - MetaStoreServerUtils .decimalToDouble(newData.getLowValue())) / newData.getNumDVs(); if (ndvEstimator != null) { ndvEstimator.mergeEstimators(newData.getNdvEstimator()); @@ -110,13 +109,13 @@ public ColumnStatisticsObj aggregate(List colStatsWit if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - if (MetaStoreUtils.decimalToDouble(aggregateData.getLowValue()) < MetaStoreUtils + if (MetaStoreServerUtils.decimalToDouble(aggregateData.getLowValue()) < MetaStoreServerUtils .decimalToDouble(newData.getLowValue())) { aggregateData.setLowValue(aggregateData.getLowValue()); } else { aggregateData.setLowValue(newData.getLowValue()); } - if (MetaStoreUtils.decimalToDouble(aggregateData.getHighValue()) > MetaStoreUtils + if (MetaStoreServerUtils.decimalToDouble(aggregateData.getHighValue()) > MetaStoreServerUtils .decimalToDouble(newData.getHighValue())) { aggregateData.setHighValue(aggregateData.getHighValue()); } else { @@ -137,7 +136,7 @@ public ColumnStatisticsObj aggregate(List colStatsWit // We have estimation, lowerbound and higherbound. We use estimation // if it is between lowerbound and higherbound. double densityAvg = densityAvgSum / partNames.size(); - estimation = (long) ((MetaStoreUtils.decimalToDouble(aggregateData.getHighValue()) - MetaStoreUtils + estimation = (long) ((MetaStoreServerUtils.decimalToDouble(aggregateData.getHighValue()) - MetaStoreServerUtils .decimalToDouble(aggregateData.getLowValue())) / densityAvg); if (estimation < lowerBound) { estimation = lowerBound; @@ -170,7 +169,7 @@ public ColumnStatisticsObj aggregate(List colStatsWit String partName = csp.getPartName(); DecimalColumnStatsData newData = cso.getStatsData().getDecimalStats(); if (useDensityFunctionForNDVEstimation) { - densityAvgSum += (MetaStoreUtils.decimalToDouble(newData.getHighValue()) - MetaStoreUtils + densityAvgSum += (MetaStoreServerUtils.decimalToDouble(newData.getHighValue()) - MetaStoreServerUtils .decimalToDouble(newData.getLowValue())) / newData.getNumDVs(); } adjustedIndexMap.put(partName, (double) indexMap.get(partName)); @@ -201,7 +200,7 @@ public ColumnStatisticsObj aggregate(List colStatsWit csd.setDecimalStats(aggregateData); adjustedStatsMap.put(pseudoPartName.toString(), csd); if (useDensityFunctionForNDVEstimation) { - densityAvgSum += (MetaStoreUtils.decimalToDouble(aggregateData.getHighValue()) - MetaStoreUtils + densityAvgSum += (MetaStoreServerUtils.decimalToDouble(aggregateData.getHighValue()) - MetaStoreServerUtils .decimalToDouble(aggregateData.getLowValue())) / aggregateData.getNumDVs(); } // reset everything @@ -220,13 +219,13 @@ public ColumnStatisticsObj aggregate(List colStatsWit if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - if (MetaStoreUtils.decimalToDouble(aggregateData.getLowValue()) < MetaStoreUtils + if (MetaStoreServerUtils.decimalToDouble(aggregateData.getLowValue()) < MetaStoreServerUtils .decimalToDouble(newData.getLowValue())) { aggregateData.setLowValue(aggregateData.getLowValue()); } else { aggregateData.setLowValue(newData.getLowValue()); } - if (MetaStoreUtils.decimalToDouble(aggregateData.getHighValue()) > MetaStoreUtils + if (MetaStoreServerUtils.decimalToDouble(aggregateData.getHighValue()) > MetaStoreServerUtils .decimalToDouble(newData.getHighValue())) { aggregateData.setHighValue(aggregateData.getHighValue()); } else { @@ -244,7 +243,7 @@ public ColumnStatisticsObj aggregate(List colStatsWit csd.setDecimalStats(aggregateData); adjustedStatsMap.put(pseudoPartName.toString(), csd); if (useDensityFunctionForNDVEstimation) { - densityAvgSum += (MetaStoreUtils.decimalToDouble(aggregateData.getHighValue()) - MetaStoreUtils + densityAvgSum += (MetaStoreServerUtils.decimalToDouble(aggregateData.getHighValue()) - MetaStoreServerUtils .decimalToDouble(aggregateData.getLowValue())) / aggregateData.getNumDVs(); } } @@ -283,8 +282,8 @@ public int compare(Map.Entry o1, double minInd = adjustedIndexMap.get(list.get(0).getKey()); double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); double lowValue = 0; - double min = MetaStoreUtils.decimalToDouble(list.get(0).getValue().getLowValue()); - double max = MetaStoreUtils.decimalToDouble(list.get(list.size() - 1).getValue().getLowValue()); + double min = MetaStoreServerUtils.decimalToDouble(list.get(0).getValue().getLowValue()); + double max = MetaStoreServerUtils.decimalToDouble(list.get(list.size() - 1).getValue().getLowValue()); if (minInd == maxInd) { lowValue = min; } else if (minInd < maxInd) { @@ -306,8 +305,8 @@ public int compare(Map.Entry o1, minInd = adjustedIndexMap.get(list.get(0).getKey()); maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); double highValue = 0; - min = MetaStoreUtils.decimalToDouble(list.get(0).getValue().getHighValue()); - max = MetaStoreUtils.decimalToDouble(list.get(list.size() - 1).getValue().getHighValue()); + min = MetaStoreServerUtils.decimalToDouble(list.get(0).getValue().getHighValue()); + max = MetaStoreServerUtils.decimalToDouble(list.get(list.size() - 1).getValue().getHighValue()); if (minInd == maxInd) { highValue = min; } else if (minInd < maxInd) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java index ece77dd51b..bcb5860f58 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java @@ -33,7 +33,7 @@ import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java index e6823d342a..70a35a8395 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java @@ -28,13 +28,12 @@ import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; -import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java index 9537647503..d411e02c74 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java @@ -28,13 +28,12 @@ import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; -import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java index 8edc387cf7..015dfd9ec9 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java @@ -80,7 +80,7 @@ import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.apache.hadoop.hive.metastore.tools.SQLGenerator; import org.apache.hadoop.hive.metastore.utils.JavaUtils; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.StringableMap; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; @@ -329,7 +329,7 @@ public void setConf(Configuration conf) { maxOpenTxns = MetastoreConf.getIntVar(conf, ConfVars.MAX_OPEN_TXNS); try { - transactionalListeners = MetaStoreUtils.getMetaStoreListeners( + transactionalListeners = MetaStoreServerUtils.getMetaStoreListeners( TransactionalMetaStoreEventListener.class, conf, MetastoreConf.getVar(conf, ConfVars.TRANSACTIONAL_EVENT_LISTENERS)); } catch(MetaException e) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/HiveStrictManagedUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/HiveStrictManagedUtils.java index fac3c22631..7213f8e268 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/HiveStrictManagedUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/HiveStrictManagedUtils.java @@ -18,10 +18,6 @@ package org.apache.hadoop.hive.metastore.utils; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -54,7 +50,7 @@ public static String validateStrictManagedTable(Configuration conf, TableType tableType = TableType.valueOf(table.getTableType()); if (tableType == TableType.MANAGED_TABLE) { - if (!MetaStoreUtils.isTransactionalTable(table.getParameters())) { + if (!MetaStoreServerUtils.isTransactionalTable(table.getParameters())) { return createValidationError(table, "Table is marked as a managed table but is not transactional."); } if (MetaStoreUtils.isNonNativeTable(table)) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java new file mode 100644 index 0000000000..69846b77d1 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java @@ -0,0 +1,939 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.utils; + +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.net.InetSocketAddress; +import java.net.ServerSocket; +import java.net.Socket; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.regex.Pattern; + +import com.google.common.base.Predicates; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections.ListUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.metastore.ColumnType; +import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.SkewedInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator; +import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory; +import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger; +import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; +import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; +import org.apache.hadoop.security.authorize.DefaultImpersonationProvider; +import org.apache.hadoop.security.authorize.ProxyUsers; +import org.apache.hadoop.util.MachineList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +/** + * Utility methods used by Hive standalone metastore server. + */ +public class MetaStoreServerUtils { + private static final Charset ENCODING = StandardCharsets.UTF_8; + private static final Logger LOG = LoggerFactory.getLogger(MetaStoreServerUtils.class); + + /** + * Helper function to transform Nulls to empty strings. + */ + private static final com.google.common.base.Function transFormNullsToEmptyString + = new com.google.common.base.Function() { + @Override + public String apply(@Nullable String string) { + return org.apache.commons.lang.StringUtils.defaultString(string); + } + }; + + /** + * We have a need to sanity-check the map before conversion from persisted objects to + * metadata thrift objects because null values in maps will cause a NPE if we send + * across thrift. Pruning is appropriate for most cases except for databases such as + * Oracle where Empty strings are stored as nulls, in which case we need to handle that. + * See HIVE-8485 for motivations for this. + */ + public static Map trimMapNulls( + Map dnMap, boolean retrieveMapNullsAsEmptyStrings){ + if (dnMap == null){ + return null; + } + // Must be deterministic order map - see HIVE-8707 + // => we use Maps.newLinkedHashMap instead of Maps.newHashMap + if (retrieveMapNullsAsEmptyStrings) { + // convert any nulls present in map values to empty strings - this is done in the case + // of backing dbs like oracle which persist empty strings as nulls. + return Maps.newLinkedHashMap(Maps.transformValues(dnMap, transFormNullsToEmptyString)); + } else { + // prune any nulls present in map values - this is the typical case. + return Maps.newLinkedHashMap(Maps.filterValues(dnMap, Predicates.notNull())); + } + } + + // Given a list of partStats, this function will give you an aggr stats + public static List aggrPartitionStats(List partStats, + String catName, String dbName, String tableName, List partNames, List colNames, + boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner) + throws MetaException { + Map> colStatsMap = + new HashMap>(); + // Group stats by colName for each partition + Map aliasToAggregator = + new HashMap(); + for (ColumnStatistics css : partStats) { + List objs = css.getStatsObj(); + for (ColumnStatisticsObj obj : objs) { + String partName = css.getStatsDesc().getPartName(); + if (aliasToAggregator.get(obj.getColName()) == null) { + aliasToAggregator.put(obj.getColName(), + ColumnStatsAggregatorFactory.getColumnStatsAggregator( + obj.getStatsData().getSetField(), useDensityFunctionForNDVEstimation, ndvTuner)); + colStatsMap.put(aliasToAggregator.get(obj.getColName()), + new ArrayList()); + } + colStatsMap.get(aliasToAggregator.get(obj.getColName())) + .add(new ColStatsObjWithSourceInfo(obj, catName, dbName, tableName, partName)); + } + } + if (colStatsMap.size() < 1) { + LOG.debug("No stats data found for: tblName= {}, partNames= {}, colNames= {}", + TableName.getQualified(catName, dbName, tableName), partNames, colNames); + return new ArrayList(); + } + return aggrPartitionStats(colStatsMap, partNames, areAllPartsFound, + useDensityFunctionForNDVEstimation, ndvTuner); + } + + public static List aggrPartitionStats( + Map> colStatsMap, + final List partNames, final boolean areAllPartsFound, + final boolean useDensityFunctionForNDVEstimation, final double ndvTuner) + throws MetaException { + List aggrColStatObjs = new ArrayList(); + int numProcessors = Runtime.getRuntime().availableProcessors(); + final ExecutorService pool = + Executors.newFixedThreadPool(Math.min(colStatsMap.size(), numProcessors), + new ThreadFactoryBuilder().setDaemon(true).setNameFormat("aggr-col-stats-%d").build()); + final List> futures = Lists.newLinkedList(); + LOG.debug("Aggregating column stats. Threads used: {}", + Math.min(colStatsMap.size(), numProcessors)); + long start = System.currentTimeMillis(); + for (final Map.Entry> entry : colStatsMap + .entrySet()) { + futures.add(pool.submit(new Callable() { + @Override + public ColumnStatisticsObj call() throws MetaException { + List colStatWithSourceInfo = entry.getValue(); + ColumnStatsAggregator aggregator = entry.getKey(); + try { + ColumnStatisticsObj statsObj = + aggregator.aggregate(colStatWithSourceInfo, partNames, areAllPartsFound); + return statsObj; + } catch (MetaException e) { + LOG.debug(e.getMessage()); + throw e; + } + } + })); + } + pool.shutdown(); + if (!futures.isEmpty()) { + for (Future future : futures) { + try { + if (future.get() != null) { + aggrColStatObjs.add(future.get()); + } + } catch (InterruptedException | ExecutionException e) { + LOG.debug(e.getMessage()); + pool.shutdownNow(); + throw new MetaException(e.toString()); + } + + } + } + LOG.debug("Time for aggr col stats in seconds: {} Threads used: {}", + ((System.currentTimeMillis() - (double) start)) / 1000, + Math.min(colStatsMap.size(), numProcessors)); + return aggrColStatObjs; + } + + public static double decimalToDouble(Decimal decimal) { + return new BigDecimal(new BigInteger(decimal.getUnscaled()), decimal.getScale()).doubleValue(); + } + + public static void validatePartitionNameCharacters(List partVals, + Pattern partitionValidationPattern) throws MetaException { + + String invalidPartitionVal = getPartitionValWithInvalidCharacter(partVals, partitionValidationPattern); + if (invalidPartitionVal != null) { + throw new MetaException("Partition value '" + invalidPartitionVal + + "' contains a character " + "not matched by whitelist pattern '" + + partitionValidationPattern.toString() + "'. " + "(configure with " + + MetastoreConf.ConfVars.PARTITION_NAME_WHITELIST_PATTERN.getVarname() + ")"); + } + } + + private static String getPartitionValWithInvalidCharacter(List partVals, + Pattern partitionValidationPattern) { + if (partitionValidationPattern == null) { + return null; + } + + for (String partVal : partVals) { + if (!partitionValidationPattern.matcher(partVal).matches()) { + return partVal; + } + } + + return null; + } + + /** + * Produce a hash for the storage descriptor + * @param sd storage descriptor to hash + * @param md message descriptor to use to generate the hash + * @return the hash as a byte array + */ + public static synchronized byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) { + // Note all maps and lists have to be absolutely sorted. Otherwise we'll produce different + // results for hashes based on the OS or JVM being used. + md.reset(); + // In case cols are null + if (sd.getCols() != null) { + for (FieldSchema fs : sd.getCols()) { + md.update(fs.getName().getBytes(ENCODING)); + md.update(fs.getType().getBytes(ENCODING)); + if (fs.getComment() != null) { + md.update(fs.getComment().getBytes(ENCODING)); + } + } + } + if (sd.getInputFormat() != null) { + md.update(sd.getInputFormat().getBytes(ENCODING)); + } + if (sd.getOutputFormat() != null) { + md.update(sd.getOutputFormat().getBytes(ENCODING)); + } + md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING)); + md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING)); + if (sd.getSerdeInfo() != null) { + SerDeInfo serde = sd.getSerdeInfo(); + if (serde.getName() != null) { + md.update(serde.getName().getBytes(ENCODING)); + } + if (serde.getSerializationLib() != null) { + md.update(serde.getSerializationLib().getBytes(ENCODING)); + } + if (serde.getParameters() != null) { + SortedMap params = new TreeMap<>(serde.getParameters()); + for (Map.Entry param : params.entrySet()) { + md.update(param.getKey().getBytes(ENCODING)); + md.update(param.getValue().getBytes(ENCODING)); + } + } + } + if (sd.getBucketCols() != null) { + List bucketCols = new ArrayList<>(sd.getBucketCols()); + for (String bucket : bucketCols) { + md.update(bucket.getBytes(ENCODING)); + } + } + if (sd.getSortCols() != null) { + SortedSet orders = new TreeSet<>(sd.getSortCols()); + for (Order order : orders) { + md.update(order.getCol().getBytes(ENCODING)); + md.update(Integer.toString(order.getOrder()).getBytes(ENCODING)); + } + } + if (sd.getSkewedInfo() != null) { + SkewedInfo skewed = sd.getSkewedInfo(); + if (skewed.getSkewedColNames() != null) { + SortedSet colnames = new TreeSet<>(skewed.getSkewedColNames()); + for (String colname : colnames) { + md.update(colname.getBytes(ENCODING)); + } + } + if (skewed.getSkewedColValues() != null) { + SortedSet sortedOuterList = new TreeSet<>(); + for (List innerList : skewed.getSkewedColValues()) { + SortedSet sortedInnerList = new TreeSet<>(innerList); + sortedOuterList.add(org.apache.commons.lang.StringUtils.join(sortedInnerList, ".")); + } + for (String colval : sortedOuterList) { + md.update(colval.getBytes(ENCODING)); + } + } + if (skewed.getSkewedColValueLocationMaps() != null) { + SortedMap sortedMap = new TreeMap<>(); + for (Map.Entry, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) { + SortedSet sortedKey = new TreeSet<>(smap.getKey()); + sortedMap.put(org.apache.commons.lang.StringUtils.join(sortedKey, "."), smap.getValue()); + } + for (Map.Entry e : sortedMap.entrySet()) { + md.update(e.getKey().getBytes(ENCODING)); + md.update(e.getValue().getBytes(ENCODING)); + } + } + md.update(sd.isStoredAsSubDirectories() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING)); + } + + return md.digest(); + } + + /* + * At the Metadata level there are no restrictions on Column Names. + */ + public static boolean validateColumnName(String name) { + return true; + } + + public static boolean isView(Table table) { + if (table == null) { + return false; + } + return TableType.VIRTUAL_VIEW.toString().equals(table.getTableType()); + } + + /** + * @param partParams + * @return True if the passed Parameters Map contains values for all "Fast Stats". + */ + static boolean containsAllFastStats(Map partParams) { + for (String stat : StatsSetupConst.FAST_STATS) { + if (!partParams.containsKey(stat)) { + return false; + } + } + return true; + } + + public static boolean isFastStatsSame(Partition oldPart, Partition newPart) { + // requires to calculate stats if new and old have different fast stats + if ((oldPart != null) && (oldPart.getParameters() != null)) { + for (String stat : StatsSetupConst.FAST_STATS) { + if (oldPart.getParameters().containsKey(stat)) { + Long oldStat = Long.parseLong(oldPart.getParameters().get(stat)); + Long newStat = Long.parseLong(newPart.getParameters().get(stat)); + if (!oldStat.equals(newStat)) { + return false; + } + } else { + return false; + } + } + return true; + } + return false; + } + + /** + * Updates the numFiles and totalSize parameters for the passed Table by querying + * the warehouse if the passed Table does not already have values for these parameters. + * NOTE: This function is rather expensive since it needs to traverse the file system to get all + * the information. + * + * @param newDir if true, the directory was just created and can be assumed to be empty + * @param forceRecompute Recompute stats even if the passed Table already has + * these parameters set + */ + public static void updateTableStatsSlow(Database db, Table tbl, Warehouse wh, + boolean newDir, boolean forceRecompute, + EnvironmentContext environmentContext) throws MetaException { + // DO_NOT_UPDATE_STATS is supposed to be a transient parameter that is only passed via RPC + // We want to avoid this property from being persistent. + // + // NOTE: If this property *is* set as table property we will remove it which is incorrect but + // we can't distinguish between these two cases + // + // This problem was introduced by HIVE-10228. A better approach would be to pass the property + // via the environment context. + Map params = tbl.getParameters(); + boolean updateStats = true; + if ((params != null) && params.containsKey(StatsSetupConst.DO_NOT_UPDATE_STATS)) { + updateStats = !Boolean.valueOf(params.get(StatsSetupConst.DO_NOT_UPDATE_STATS)); + params.remove(StatsSetupConst.DO_NOT_UPDATE_STATS); + } + + if (!updateStats || newDir || tbl.getPartitionKeysSize() != 0) { + return; + } + + // If stats are already present and forceRecompute isn't set, nothing to do + if (!forceRecompute && params != null && containsAllFastStats(params)) { + return; + } + + // NOTE: wh.getFileStatusesForUnpartitionedTable() can be REALLY slow + List fileStatus = wh.getFileStatusesForUnpartitionedTable(db, tbl); + if (params == null) { + params = new HashMap<>(); + tbl.setParameters(params); + } + // The table location already exists and may contain data. + // Let's try to populate those stats that don't require full scan. + LOG.info("Updating table stats for {}", tbl.getTableName()); + populateQuickStats(fileStatus, params); + LOG.info("Updated size of table {} to {}", + tbl.getTableName(), params.get(StatsSetupConst.TOTAL_SIZE)); + if (environmentContext != null + && environmentContext.isSetProperties() + && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( + StatsSetupConst.STATS_GENERATED))) { + StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); + } else { + StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); + } + } + + /** This method is invalid for MM and ACID tables unless fileStatus comes from AcidUtils. */ + public static void populateQuickStats(List fileStatus, Map params) { + // Why is this even in metastore? + LOG.trace("Populating quick stats based on {} files", fileStatus.size()); + int numFiles = 0; + long tableSize = 0L; + int numErasureCodedFiles = 0; + for (FileStatus status : fileStatus) { + // don't take directories into account for quick stats TODO: wtf? + if (!status.isDir()) { + tableSize += status.getLen(); + numFiles += 1; + if (status.isErasureCoded()) { + numErasureCodedFiles++; + } + } + } + params.put(StatsSetupConst.NUM_FILES, Integer.toString(numFiles)); + params.put(StatsSetupConst.TOTAL_SIZE, Long.toString(tableSize)); + params.put(StatsSetupConst.NUM_ERASURE_CODED_FILES, Integer.toString(numErasureCodedFiles)); + } + + public static void clearQuickStats(Map params) { + params.remove(StatsSetupConst.NUM_FILES); + params.remove(StatsSetupConst.TOTAL_SIZE); + params.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES); + } + + public static boolean areSameColumns(List oldCols, List newCols) { + return ListUtils.isEqualList(oldCols, newCols); + } + + public static void updateBasicState(EnvironmentContext environmentContext, Map + params) { + if (params == null) { + return; + } + if (environmentContext != null + && environmentContext.isSetProperties() + && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( + StatsSetupConst.STATS_GENERATED))) { + StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); + } else { + StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); + } + } + + /** + * Updates the numFiles and totalSize parameters for the passed Partition by querying + * the warehouse if the passed Partition does not already have values for these parameters. + * @param part + * @param wh + * @param madeDir if true, the directory was just created and can be assumed to be empty + * @param forceRecompute Recompute stats even if the passed Partition already has + * these parameters set + * @return true if the stats were updated, false otherwise + */ + public static boolean updatePartitionStatsFast(Partition part, Table tbl, Warehouse wh, + boolean madeDir, boolean forceRecompute, EnvironmentContext environmentContext, + boolean isCreate) throws MetaException { + return updatePartitionStatsFast(new PartitionSpecProxy.SimplePartitionWrapperIterator(part), + tbl, wh, madeDir, forceRecompute, environmentContext, isCreate); + } + + /** + * Updates the numFiles and totalSize parameters for the passed Partition by querying + * the warehouse if the passed Partition does not already have values for these parameters. + * @param part + * @param wh + * @param madeDir if true, the directory was just created and can be assumed to be empty + * @param forceRecompute Recompute stats even if the passed Partition already has + * these parameters set + * @return true if the stats were updated, false otherwise + */ + public static boolean updatePartitionStatsFast(PartitionSpecProxy.PartitionIterator part, + Table table, Warehouse wh, boolean madeDir, boolean forceRecompute, + EnvironmentContext environmentContext, boolean isCreate) throws MetaException { + Map params = part.getParameters(); + if (!forceRecompute && params != null && containsAllFastStats(params)) return false; + if (params == null) { + params = new HashMap<>(); + } + if (!isCreate && isTransactionalTable(table.getParameters())) { + // TODO: implement? + LOG.warn("Not updating fast stats for a transactional table " + table.getTableName()); + part.setParameters(params); + return true; + } + if (!madeDir) { + // The partition location already existed and may contain data. Lets try to + // populate those statistics that don't require a full scan of the data. + LOG.warn("Updating partition stats fast for: " + part.getTableName()); + List fileStatus = wh.getFileStatusesForLocation(part.getLocation()); + // TODO: this is invalid for ACID tables, and we cannot access AcidUtils here. + populateQuickStats(fileStatus, params); + LOG.warn("Updated size to " + params.get(StatsSetupConst.TOTAL_SIZE)); + updateBasicState(environmentContext, params); + } + part.setParameters(params); + return true; + } + + /* + * This method is to check if the new column list includes all the old columns with same name and + * type. The column comment does not count. + */ + public static boolean columnsIncludedByNameType(List oldCols, + List newCols) { + if (oldCols.size() > newCols.size()) { + return false; + } + + Map columnNameTypePairMap = new HashMap<>(newCols.size()); + for (FieldSchema newCol : newCols) { + columnNameTypePairMap.put(newCol.getName().toLowerCase(), newCol.getType()); + } + for (final FieldSchema oldCol : oldCols) { + if (!columnNameTypePairMap.containsKey(oldCol.getName()) + || !columnNameTypePairMap.get(oldCol.getName()).equalsIgnoreCase(oldCol.getType())) { + return false; + } + } + + return true; + } + + /** Duplicates AcidUtils; used in a couple places in metastore. */ + public static boolean isTransactionalTable(Map params) { + String transactionalProp = params.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); + return (transactionalProp != null && "true".equalsIgnoreCase(transactionalProp)); + } + + /** + * create listener instances as per the configuration. + * + * @param clazz Class of the listener + * @param conf configuration object + * @param listenerImplList Implementation class name + * @return instance of the listener + * @throws MetaException if there is any failure instantiating the class + */ + public static List getMetaStoreListeners(Class clazz, + Configuration conf, String listenerImplList) throws MetaException { + List listeners = new ArrayList(); + + if (StringUtils.isBlank(listenerImplList)) { + return listeners; + } + + String[] listenerImpls = listenerImplList.split(","); + for (String listenerImpl : listenerImpls) { + try { + T listener = (T) Class.forName( + listenerImpl.trim(), true, JavaUtils.getClassLoader()).getConstructor( + Configuration.class).newInstance(conf); + listeners.add(listener); + } catch (InvocationTargetException ie) { + LOG.error("Got InvocationTargetException", ie); + throw new MetaException("Failed to instantiate listener named: "+ + listenerImpl + ", reason: " + ie.getCause()); + } catch (Exception e) { + LOG.error("Got Exception", e); + throw new MetaException("Failed to instantiate listener named: "+ + listenerImpl + ", reason: " + e); + } + } + + return listeners; + } + + public static String validateSkewedColNames(List cols) { + if (CollectionUtils.isEmpty(cols)) { + return null; + } + for (String col : cols) { + if (!validateColumnName(col)) { + return col; + } + } + return null; + } + + public static String validateSkewedColNamesSubsetCol(List skewedColNames, + List cols) { + if (CollectionUtils.isEmpty(skewedColNames)) { + return null; + } + List colNames = new ArrayList<>(cols.size()); + for (FieldSchema fieldSchema : cols) { + colNames.add(fieldSchema.getName()); + } + // make a copy + List copySkewedColNames = new ArrayList<>(skewedColNames); + // remove valid columns + copySkewedColNames.removeAll(colNames); + if (copySkewedColNames.isEmpty()) { + return null; + } + return copySkewedColNames.toString(); + } + + public static boolean partitionNameHasValidCharacters(List partVals, + Pattern partitionValidationPattern) { + return getPartitionValWithInvalidCharacter(partVals, partitionValidationPattern) == null; + } + + public static void getMergableCols(ColumnStatistics csNew, Map parameters) { + List list = new ArrayList<>(); + for (int index = 0; index < csNew.getStatsObj().size(); index++) { + ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); + // canColumnStatsMerge guarantees that it is accurate before we do merge + if (StatsSetupConst.canColumnStatsMerge(parameters, statsObjNew.getColName())) { + list.add(statsObjNew); + } + // in all the other cases, we can not merge + } + csNew.setStatsObj(list); + } + + // this function will merge csOld into csNew. + public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) + throws InvalidObjectException { + List list = new ArrayList<>(); + if (csNew.getStatsObj().size() != csOld.getStatsObjSize()) { + // Some of the columns' stats are missing + // This implies partition schema has changed. We will merge columns + // present in both, overwrite stats for columns absent in metastore and + // leave alone columns stats missing from stats task. This last case may + // leave stats in stale state. This will be addressed later. + LOG.debug("New ColumnStats size is {}, but old ColumnStats size is {}", + csNew.getStatsObj().size(), csOld.getStatsObjSize()); + } + // In this case, we have to find out which columns can be merged. + Map map = new HashMap<>(); + // We build a hash map from colName to object for old ColumnStats. + for (ColumnStatisticsObj obj : csOld.getStatsObj()) { + map.put(obj.getColName(), obj); + } + for (int index = 0; index < csNew.getStatsObj().size(); index++) { + ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); + ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName()); + if (statsObjOld != null) { + // because we already confirm that the stats is accurate + // it is impossible that the column types have been changed while the + // column stats is still accurate. + assert (statsObjNew.getStatsData().getSetField() == statsObjOld.getStatsData() + .getSetField()); + // If statsObjOld is found, we can merge. + ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, + statsObjOld); + merger.merge(statsObjNew, statsObjOld); + } + // If statsObjOld is not found, we just use statsObjNew as it is accurate. + list.add(statsObjNew); + } + // in all the other cases, we can not merge + csNew.setStatsObj(list); + } + + /** + * Verify if the user is allowed to make DB notification related calls. + * Only the superusers defined in the Hadoop proxy user settings have the permission. + * + * @param user the short user name + * @param conf that contains the proxy user settings + * @return if the user has the permission + */ + public static boolean checkUserHasHostProxyPrivileges(String user, Configuration conf, String ipAddress) { + DefaultImpersonationProvider sip = ProxyUsers.getDefaultImpersonationProvider(); + // Just need to initialize the ProxyUsers for the first time, given that the conf will not change on the fly + if (sip == null) { + ProxyUsers.refreshSuperUserGroupsConfiguration(conf); + sip = ProxyUsers.getDefaultImpersonationProvider(); + } + Map> proxyHosts = sip.getProxyHosts(); + Collection hostEntries = proxyHosts.get(sip.getProxySuperuserIpConfKey(user)); + MachineList machineList = new MachineList(hostEntries); + ipAddress = (ipAddress == null) ? StringUtils.EMPTY : ipAddress; + return machineList.includes(ipAddress); + } + + public static int startMetaStore() throws Exception { + return startMetaStore(HadoopThriftAuthBridge.getBridge(), null); + } + + public static int startMetaStore(final HadoopThriftAuthBridge bridge, Configuration conf) throws + Exception { + int port = findFreePort(); + startMetaStore(port, bridge, conf); + return port; + } + + public static int startMetaStore(Configuration conf) throws Exception { + return startMetaStore(HadoopThriftAuthBridge.getBridge(), conf); + } + + public static void startMetaStore(final int port, final HadoopThriftAuthBridge bridge) throws Exception { + startMetaStore(port, bridge, null); + } + + public static void startMetaStore(final int port, + final HadoopThriftAuthBridge bridge, Configuration hiveConf) + throws Exception{ + if (hiveConf == null) { + hiveConf = MetastoreConf.newMetastoreConf(); + } + final Configuration finalHiveConf = hiveConf; + Thread thread = new Thread(new Runnable() { + @Override + public void run() { + try { + HiveMetaStore.startMetaStore(port, bridge, finalHiveConf); + } catch (Throwable e) { + LOG.error("Metastore Thrift Server threw an exception...",e); + } + } + }); + thread.setDaemon(true); + thread.start(); + loopUntilHMSReady(port); + } + + /** + * A simple connect test to make sure that the metastore is up + * @throws Exception + */ + private static void loopUntilHMSReady(int port) throws Exception { + int retries = 0; + Exception exc; + while (true) { + try { + Socket socket = new Socket(); + socket.connect(new InetSocketAddress(port), 5000); + socket.close(); + return; + } catch (Exception e) { + if (retries++ > 60) { //give up + exc = e; + break; + } + Thread.sleep(1000); + } + } + // something is preventing metastore from starting + // print the stack from all threads for debugging purposes + LOG.error("Unable to connect to metastore server: " + exc.getMessage()); + LOG.info("Printing all thread stack traces for debugging before throwing exception."); + LOG.info(getAllThreadStacksAsString()); + throw exc; + } + + private static String getAllThreadStacksAsString() { + Map threadStacks = Thread.getAllStackTraces(); + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : threadStacks.entrySet()) { + Thread t = entry.getKey(); + sb.append(System.lineSeparator()); + sb.append("Name: ").append(t.getName()).append(" State: ").append(t.getState()); + addStackString(entry.getValue(), sb); + } + return sb.toString(); + } + + private static void addStackString(StackTraceElement[] stackElems, StringBuilder sb) { + sb.append(System.lineSeparator()); + for (StackTraceElement stackElem : stackElems) { + sb.append(stackElem).append(System.lineSeparator()); + } + } + + /** + * Finds a free port on the machine. + * + * @return + * @throws IOException + */ + public static int findFreePort() throws IOException { + ServerSocket socket= new ServerSocket(0); + int port = socket.getLocalPort(); + socket.close(); + return port; + } + + /** + * Finds a free port on the machine, but allow the + * ability to specify a port number to not use, no matter what. + */ + public static int findFreePortExcepting(int portToExclude) throws IOException { + ServerSocket socket1 = null; + ServerSocket socket2 = null; + try { + socket1 = new ServerSocket(0); + socket2 = new ServerSocket(0); + if (socket1.getLocalPort() != portToExclude) { + return socket1.getLocalPort(); + } + // If we're here, then socket1.getLocalPort was the port to exclude + // Since both sockets were open together at a point in time, we're + // guaranteed that socket2.getLocalPort() is not the same. + return socket2.getLocalPort(); + } finally { + if (socket1 != null){ + socket1.close(); + } + if (socket2 != null){ + socket2.close(); + } + } + } + + public static String getIndexTableName(String dbName, String baseTblName, String indexName) { + return dbName + "__" + baseTblName + "_" + indexName + "__"; + } + + static public String validateTblColumns(List cols) { + for (FieldSchema fieldSchema : cols) { + // skip this, as validateColumnName always returns true + /* + if (!validateColumnName(fieldSchema.getName())) { + return "name: " + fieldSchema.getName(); + } + */ + String typeError = validateColumnType(fieldSchema.getType()); + if (typeError != null) { + return typeError; + } + } + return null; + } + + private static String validateColumnType(String type) { + if (type.equals(MetaStoreUtils.TYPE_FROM_DESERIALIZER)) { + return null; + } + int last = 0; + boolean lastAlphaDigit = isValidTypeChar(type.charAt(last)); + for (int i = 1; i <= type.length(); i++) { + if (i == type.length() + || isValidTypeChar(type.charAt(i)) != lastAlphaDigit) { + String token = type.substring(last, i); + last = i; + if (!ColumnType.AllTypes.contains(token)) { + return "type: " + type; + } + break; + } + } + return null; + } + + private static boolean isValidTypeChar(char c) { + return Character.isLetterOrDigit(c) || c == '_'; + } + + // ColumnStatisticsObj with info about its db, table, partition (if table is partitioned) + public static class ColStatsObjWithSourceInfo { + private final ColumnStatisticsObj colStatsObj; + private final String catName; + private final String dbName; + private final String tblName; + private final String partName; + + public ColStatsObjWithSourceInfo(ColumnStatisticsObj colStatsObj, String catName, String dbName, String tblName, + String partName) { + this.colStatsObj = colStatsObj; + this.catName = catName; + this.dbName = dbName; + this.tblName = tblName; + this.partName = partName; + } + + public ColumnStatisticsObj getColStatsObj() { + return colStatsObj; + } + + public String getCatName() { + return catName; + } + + public String getDbName() { + return dbName; + } + + public String getTblName() { + return tblName; + } + + public String getPartName() { + return partName; + } + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 7cdcd626a7..5233bee592 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -17,92 +17,45 @@ */ package org.apache.hadoop.hive.metastore.utils; -import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.metastore.api.WMPoolSchedulingPolicy; import com.google.common.base.Joiner; -import com.google.common.base.Predicates; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.apache.commons.collections.ListUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.ColumnType; -import org.apache.hadoop.hive.metastore.HiveMetaStore; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.ColumnStatistics; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator; -import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory; -import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger; -import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; import org.apache.hadoop.security.SaslRpcServer; -import org.apache.hadoop.security.authorize.DefaultImpersonationProvider; -import org.apache.hadoop.security.authorize.ProxyUsers; -import org.apache.hadoop.util.MachineList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; import java.io.File; -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.net.InetSocketAddress; -import java.net.ServerSocket; -import java.net.Socket; import java.net.URL; import java.net.URLClassLoader; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.security.MessageDigest; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; -import java.util.Map.Entry; -import java.util.SortedMap; -import java.util.SortedSet; import java.util.TimeZone; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -121,7 +74,6 @@ protected DateFormat initialValue() { // Indicates a type was derived from the deserializer rather than Hive's metadata. public static final String TYPE_FROM_DESERIALIZER = ""; - private static final Charset ENCODING = StandardCharsets.UTF_8; private static final Logger LOG = LoggerFactory.getLogger(MetaStoreUtils.class); // The following two are public for any external users who wish to use them. @@ -209,251 +161,6 @@ public static MetaException newMetaException(String errorMessage, Exception e) { return metaException; } - /** - * Helper function to transform Nulls to empty strings. - */ - private static final com.google.common.base.Function transFormNullsToEmptyString - = new com.google.common.base.Function() { - @Override - public java.lang.String apply(@Nullable java.lang.String string) { - return org.apache.commons.lang.StringUtils.defaultString(string); - } - }; - - /** - * We have a need to sanity-check the map before conversion from persisted objects to - * metadata thrift objects because null values in maps will cause a NPE if we send - * across thrift. Pruning is appropriate for most cases except for databases such as - * Oracle where Empty strings are stored as nulls, in which case we need to handle that. - * See HIVE-8485 for motivations for this. - */ - public static Map trimMapNulls( - Map dnMap, boolean retrieveMapNullsAsEmptyStrings){ - if (dnMap == null){ - return null; - } - // Must be deterministic order map - see HIVE-8707 - // => we use Maps.newLinkedHashMap instead of Maps.newHashMap - if (retrieveMapNullsAsEmptyStrings) { - // convert any nulls present in map values to empty strings - this is done in the case - // of backing dbs like oracle which persist empty strings as nulls. - return Maps.newLinkedHashMap(Maps.transformValues(dnMap, transFormNullsToEmptyString)); - } else { - // prune any nulls present in map values - this is the typical case. - return Maps.newLinkedHashMap(Maps.filterValues(dnMap, Predicates.notNull())); - } - } - - - // Given a list of partStats, this function will give you an aggr stats - public static List aggrPartitionStats(List partStats, - String catName, String dbName, String tableName, List partNames, List colNames, - boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner) - throws MetaException { - Map> colStatsMap = - new HashMap>(); - // Group stats by colName for each partition - Map aliasToAggregator = - new HashMap(); - for (ColumnStatistics css : partStats) { - List objs = css.getStatsObj(); - for (ColumnStatisticsObj obj : objs) { - String partName = css.getStatsDesc().getPartName(); - if (aliasToAggregator.get(obj.getColName()) == null) { - aliasToAggregator.put(obj.getColName(), - ColumnStatsAggregatorFactory.getColumnStatsAggregator( - obj.getStatsData().getSetField(), useDensityFunctionForNDVEstimation, ndvTuner)); - colStatsMap.put(aliasToAggregator.get(obj.getColName()), - new ArrayList()); - } - colStatsMap.get(aliasToAggregator.get(obj.getColName())) - .add(new ColStatsObjWithSourceInfo(obj, catName, dbName, tableName, partName)); - } - } - if (colStatsMap.size() < 1) { - LOG.debug("No stats data found for: tblName= {}, partNames= {}, colNames= {}", - TableName.getQualified(catName, dbName, tableName), partNames, colNames); - return new ArrayList(); - } - return aggrPartitionStats(colStatsMap, partNames, areAllPartsFound, - useDensityFunctionForNDVEstimation, ndvTuner); - } - - public static List aggrPartitionStats( - Map> colStatsMap, - final List partNames, final boolean areAllPartsFound, - final boolean useDensityFunctionForNDVEstimation, final double ndvTuner) - throws MetaException { - List aggrColStatObjs = new ArrayList(); - int numProcessors = Runtime.getRuntime().availableProcessors(); - final ExecutorService pool = - Executors.newFixedThreadPool(Math.min(colStatsMap.size(), numProcessors), - new ThreadFactoryBuilder().setDaemon(true).setNameFormat("aggr-col-stats-%d").build()); - final List> futures = Lists.newLinkedList(); - LOG.debug("Aggregating column stats. Threads used: {}", - Math.min(colStatsMap.size(), numProcessors)); - long start = System.currentTimeMillis(); - for (final Entry> entry : colStatsMap - .entrySet()) { - futures.add(pool.submit(new Callable() { - @Override - public ColumnStatisticsObj call() throws MetaException { - List colStatWithSourceInfo = entry.getValue(); - ColumnStatsAggregator aggregator = entry.getKey(); - try { - ColumnStatisticsObj statsObj = - aggregator.aggregate(colStatWithSourceInfo, partNames, areAllPartsFound); - return statsObj; - } catch (MetaException e) { - LOG.debug(e.getMessage()); - throw e; - } - } - })); - } - pool.shutdown(); - if (!futures.isEmpty()) { - for (Future future : futures) { - try { - if (future.get() != null) { - aggrColStatObjs.add(future.get()); - } - } catch (InterruptedException | ExecutionException e) { - LOG.debug(e.getMessage()); - pool.shutdownNow(); - throw new MetaException(e.toString()); - } - - } - } - LOG.debug("Time for aggr col stats in seconds: {} Threads used: {}", - ((System.currentTimeMillis() - (double) start)) / 1000, - Math.min(colStatsMap.size(), numProcessors)); - return aggrColStatObjs; - } - - public static double decimalToDouble(Decimal decimal) { - return new BigDecimal(new BigInteger(decimal.getUnscaled()), decimal.getScale()).doubleValue(); - } - - public static void validatePartitionNameCharacters(List partVals, - Pattern partitionValidationPattern) throws MetaException { - - String invalidPartitionVal = getPartitionValWithInvalidCharacter(partVals, partitionValidationPattern); - if (invalidPartitionVal != null) { - throw new MetaException("Partition value '" + invalidPartitionVal + - "' contains a character " + "not matched by whitelist pattern '" + - partitionValidationPattern.toString() + "'. " + "(configure with " + - MetastoreConf.ConfVars.PARTITION_NAME_WHITELIST_PATTERN.getVarname() + ")"); - } - } - - private static String getPartitionValWithInvalidCharacter(List partVals, - Pattern partitionValidationPattern) { - if (partitionValidationPattern == null) { - return null; - } - - for (String partVal : partVals) { - if (!partitionValidationPattern.matcher(partVal).matches()) { - return partVal; - } - } - - return null; - } - - /** - * Produce a hash for the storage descriptor - * @param sd storage descriptor to hash - * @param md message descriptor to use to generate the hash - * @return the hash as a byte array - */ - public static synchronized byte[] hashStorageDescriptor(StorageDescriptor sd, MessageDigest md) { - // Note all maps and lists have to be absolutely sorted. Otherwise we'll produce different - // results for hashes based on the OS or JVM being used. - md.reset(); - // In case cols are null - if (sd.getCols() != null) { - for (FieldSchema fs : sd.getCols()) { - md.update(fs.getName().getBytes(ENCODING)); - md.update(fs.getType().getBytes(ENCODING)); - if (fs.getComment() != null) { - md.update(fs.getComment().getBytes(ENCODING)); - } - } - } - if (sd.getInputFormat() != null) { - md.update(sd.getInputFormat().getBytes(ENCODING)); - } - if (sd.getOutputFormat() != null) { - md.update(sd.getOutputFormat().getBytes(ENCODING)); - } - md.update(sd.isCompressed() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING)); - md.update(Integer.toString(sd.getNumBuckets()).getBytes(ENCODING)); - if (sd.getSerdeInfo() != null) { - SerDeInfo serde = sd.getSerdeInfo(); - if (serde.getName() != null) { - md.update(serde.getName().getBytes(ENCODING)); - } - if (serde.getSerializationLib() != null) { - md.update(serde.getSerializationLib().getBytes(ENCODING)); - } - if (serde.getParameters() != null) { - SortedMap params = new TreeMap<>(serde.getParameters()); - for (Map.Entry param : params.entrySet()) { - md.update(param.getKey().getBytes(ENCODING)); - md.update(param.getValue().getBytes(ENCODING)); - } - } - } - if (sd.getBucketCols() != null) { - List bucketCols = new ArrayList<>(sd.getBucketCols()); - for (String bucket : bucketCols) { - md.update(bucket.getBytes(ENCODING)); - } - } - if (sd.getSortCols() != null) { - SortedSet orders = new TreeSet<>(sd.getSortCols()); - for (Order order : orders) { - md.update(order.getCol().getBytes(ENCODING)); - md.update(Integer.toString(order.getOrder()).getBytes(ENCODING)); - } - } - if (sd.getSkewedInfo() != null) { - SkewedInfo skewed = sd.getSkewedInfo(); - if (skewed.getSkewedColNames() != null) { - SortedSet colnames = new TreeSet<>(skewed.getSkewedColNames()); - for (String colname : colnames) { - md.update(colname.getBytes(ENCODING)); - } - } - if (skewed.getSkewedColValues() != null) { - SortedSet sortedOuterList = new TreeSet<>(); - for (List innerList : skewed.getSkewedColValues()) { - SortedSet sortedInnerList = new TreeSet<>(innerList); - sortedOuterList.add(org.apache.commons.lang.StringUtils.join(sortedInnerList, ".")); - } - for (String colval : sortedOuterList) { - md.update(colval.getBytes(ENCODING)); - } - } - if (skewed.getSkewedColValueLocationMaps() != null) { - SortedMap sortedMap = new TreeMap<>(); - for (Map.Entry, String> smap : skewed.getSkewedColValueLocationMaps().entrySet()) { - SortedSet sortedKey = new TreeSet<>(smap.getKey()); - sortedMap.put(org.apache.commons.lang.StringUtils.join(sortedKey, "."), smap.getValue()); - } - for (Map.Entry e : sortedMap.entrySet()) { - md.update(e.getKey().getBytes(ENCODING)); - md.update(e.getValue().getBytes(ENCODING)); - } - } - md.update(sd.isStoredAsSubDirectories() ? "true".getBytes(ENCODING) : "false".getBytes(ENCODING)); - } - - return md.digest(); - } public static List getColumnNamesForTable(Table table) { List colNames = new ArrayList<>(); @@ -464,15 +171,6 @@ private static String getPartitionValWithInvalidCharacter(List partVals, return colNames; } - public static List getColumnNamesForPartition(Partition partition) { - List colNames = new ArrayList<>(); - Iterator colsIterator = partition.getSd().getColsIterator(); - while (colsIterator.hasNext()) { - colNames.add(colsIterator.next().getName()); - } - return colNames; - } - /** * validateName * @@ -501,53 +199,6 @@ public static boolean validateName(String name, Configuration conf) { return m.matches(); } - /* - * At the Metadata level there are no restrictions on Column Names. - */ - public static boolean validateColumnName(String name) { - return true; - } - - static public String validateTblColumns(List cols) { - for (FieldSchema fieldSchema : cols) { - // skip this, as validateColumnName always returns true - /* - if (!validateColumnName(fieldSchema.getName())) { - return "name: " + fieldSchema.getName(); - } - */ - String typeError = validateColumnType(fieldSchema.getType()); - if (typeError != null) { - return typeError; - } - } - return null; - } - - private static String validateColumnType(String type) { - if (type.equals(TYPE_FROM_DESERIALIZER)) { - return null; - } - int last = 0; - boolean lastAlphaDigit = isValidTypeChar(type.charAt(last)); - for (int i = 1; i <= type.length(); i++) { - if (i == type.length() - || isValidTypeChar(type.charAt(i)) != lastAlphaDigit) { - String token = type.substring(last, i); - last = i; - if (!ColumnType.AllTypes.contains(token)) { - return "type: " + type; - } - break; - } - } - return null; - } - - private static boolean isValidTypeChar(char c) { - return Character.isLetterOrDigit(c) || c == '_'; - } - /** * Determines whether a table is an external table. * @@ -606,7 +257,7 @@ public static boolean requireCalStats(Partition oldPart, return false; } - if (isView(tbl)) { + if (MetaStoreServerUtils.isView(tbl)) { return false; } @@ -616,7 +267,7 @@ public static boolean requireCalStats(Partition oldPart, // requires to calculate stats if new partition doesn't have it if ((newPart == null) || (newPart.getParameters() == null) - || !containsAllFastStats(newPart.getParameters())) { + || !MetaStoreServerUtils.containsAllFastStats(newPart.getParameters())) { return true; } @@ -631,238 +282,9 @@ public static boolean requireCalStats(Partition oldPart, } // requires to calculate stats if new and old have different fast stats - return !isFastStatsSame(oldPart, newPart); - } - - public static boolean isView(Table table) { - if (table == null) { - return false; - } - return TableType.VIRTUAL_VIEW.toString().equals(table.getTableType()); - } - - /** - * @param partParams - * @return True if the passed Parameters Map contains values for all "Fast Stats". - */ - private static boolean containsAllFastStats(Map partParams) { - for (String stat : StatsSetupConst.FAST_STATS) { - if (!partParams.containsKey(stat)) { - return false; - } - } - return true; - } - - public static boolean isFastStatsSame(Partition oldPart, Partition newPart) { - // requires to calculate stats if new and old have different fast stats - if ((oldPart != null) && (oldPart.getParameters() != null)) { - for (String stat : StatsSetupConst.FAST_STATS) { - if (oldPart.getParameters().containsKey(stat)) { - Long oldStat = Long.parseLong(oldPart.getParameters().get(stat)); - Long newStat = Long.parseLong(newPart.getParameters().get(stat)); - if (!oldStat.equals(newStat)) { - return false; - } - } else { - return false; - } - } - return true; - } - return false; - } - - /** - * Updates the numFiles and totalSize parameters for the passed Table by querying - * the warehouse if the passed Table does not already have values for these parameters. - * NOTE: This function is rather expensive since it needs to traverse the file system to get all - * the information. - * - * @param newDir if true, the directory was just created and can be assumed to be empty - * @param forceRecompute Recompute stats even if the passed Table already has - * these parameters set - */ - public static void updateTableStatsSlow(Database db, Table tbl, Warehouse wh, - boolean newDir, boolean forceRecompute, - EnvironmentContext environmentContext) throws MetaException { - // DO_NOT_UPDATE_STATS is supposed to be a transient parameter that is only passed via RPC - // We want to avoid this property from being persistent. - // - // NOTE: If this property *is* set as table property we will remove it which is incorrect but - // we can't distinguish between these two cases - // - // This problem was introduced by HIVE-10228. A better approach would be to pass the property - // via the environment context. - Map params = tbl.getParameters(); - boolean updateStats = true; - if ((params != null) && params.containsKey(StatsSetupConst.DO_NOT_UPDATE_STATS)) { - updateStats = !Boolean.valueOf(params.get(StatsSetupConst.DO_NOT_UPDATE_STATS)); - params.remove(StatsSetupConst.DO_NOT_UPDATE_STATS); - } - - if (!updateStats || newDir || tbl.getPartitionKeysSize() != 0) { - return; - } - - // If stats are already present and forceRecompute isn't set, nothing to do - if (!forceRecompute && params != null && containsAllFastStats(params)) { - return; - } - - // NOTE: wh.getFileStatusesForUnpartitionedTable() can be REALLY slow - List fileStatus = wh.getFileStatusesForUnpartitionedTable(db, tbl); - if (params == null) { - params = new HashMap<>(); - tbl.setParameters(params); - } - // The table location already exists and may contain data. - // Let's try to populate those stats that don't require full scan. - LOG.info("Updating table stats for {}", tbl.getTableName()); - populateQuickStats(fileStatus, params); - LOG.info("Updated size of table {} to {}", - tbl.getTableName(), params.get(StatsSetupConst.TOTAL_SIZE)); - if (environmentContext != null - && environmentContext.isSetProperties() - && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( - StatsSetupConst.STATS_GENERATED))) { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); - } else { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); - } + return !MetaStoreServerUtils.isFastStatsSame(oldPart, newPart); } - /** This method is invalid for MM and ACID tables unless fileStatus comes from AcidUtils. */ - public static void populateQuickStats(List fileStatus, Map params) { - // Why is this even in metastore? - LOG.trace("Populating quick stats based on {} files", fileStatus.size()); - int numFiles = 0; - long tableSize = 0L; - int numErasureCodedFiles = 0; - for (FileStatus status : fileStatus) { - // don't take directories into account for quick stats TODO: wtf? - if (!status.isDir()) { - tableSize += status.getLen(); - numFiles += 1; - if (status.isErasureCoded()) { - numErasureCodedFiles++; - } - } - } - params.put(StatsSetupConst.NUM_FILES, Integer.toString(numFiles)); - params.put(StatsSetupConst.TOTAL_SIZE, Long.toString(tableSize)); - params.put(StatsSetupConst.NUM_ERASURE_CODED_FILES, Integer.toString(numErasureCodedFiles)); - } - - public static void clearQuickStats(Map params) { - params.remove(StatsSetupConst.NUM_FILES); - params.remove(StatsSetupConst.TOTAL_SIZE); - params.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES); - } - - - public static boolean areSameColumns(List oldCols, List newCols) { - return ListUtils.isEqualList(oldCols, newCols); - } - - public static void updateBasicState(EnvironmentContext environmentContext, Map - params) { - if (params == null) { - return; - } - if (environmentContext != null - && environmentContext.isSetProperties() - && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( - StatsSetupConst.STATS_GENERATED))) { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); - } else { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); - } - } - - /** - * Updates the numFiles and totalSize parameters for the passed Partition by querying - * the warehouse if the passed Partition does not already have values for these parameters. - * @param part - * @param wh - * @param madeDir if true, the directory was just created and can be assumed to be empty - * @param forceRecompute Recompute stats even if the passed Partition already has - * these parameters set - * @return true if the stats were updated, false otherwise - */ - public static boolean updatePartitionStatsFast(Partition part, Table tbl, Warehouse wh, - boolean madeDir, boolean forceRecompute, EnvironmentContext environmentContext, - boolean isCreate) throws MetaException { - return updatePartitionStatsFast(new PartitionSpecProxy.SimplePartitionWrapperIterator(part), - tbl, wh, madeDir, forceRecompute, environmentContext, isCreate); - } - /** - * Updates the numFiles and totalSize parameters for the passed Partition by querying - * the warehouse if the passed Partition does not already have values for these parameters. - * @param part - * @param wh - * @param madeDir if true, the directory was just created and can be assumed to be empty - * @param forceRecompute Recompute stats even if the passed Partition already has - * these parameters set - * @return true if the stats were updated, false otherwise - */ - public static boolean updatePartitionStatsFast(PartitionSpecProxy.PartitionIterator part, - Table table, Warehouse wh, boolean madeDir, boolean forceRecompute, - EnvironmentContext environmentContext, boolean isCreate) throws MetaException { - Map params = part.getParameters(); - if (!forceRecompute && params != null && containsAllFastStats(params)) return false; - if (params == null) { - params = new HashMap<>(); - } - if (!isCreate && MetaStoreUtils.isTransactionalTable(table.getParameters())) { - // TODO: implement? - LOG.warn("Not updating fast stats for a transactional table " + table.getTableName()); - part.setParameters(params); - return true; - } - if (!madeDir) { - // The partition location already existed and may contain data. Lets try to - // populate those statistics that don't require a full scan of the data. - LOG.warn("Updating partition stats fast for: " + part.getTableName()); - List fileStatus = wh.getFileStatusesForLocation(part.getLocation()); - // TODO: this is invalid for ACID tables, and we cannot access AcidUtils here. - populateQuickStats(fileStatus, params); - LOG.warn("Updated size to " + params.get(StatsSetupConst.TOTAL_SIZE)); - updateBasicState(environmentContext, params); - } - part.setParameters(params); - return true; - } - - /* - * This method is to check if the new column list includes all the old columns with same name and - * type. The column comment does not count. - */ - public static boolean columnsIncludedByNameType(List oldCols, - List newCols) { - if (oldCols.size() > newCols.size()) { - return false; - } - - Map columnNameTypePairMap = new HashMap<>(newCols.size()); - for (FieldSchema newCol : newCols) { - columnNameTypePairMap.put(newCol.getName().toLowerCase(), newCol.getType()); - } - for (final FieldSchema oldCol : oldCols) { - if (!columnNameTypePairMap.containsKey(oldCol.getName()) - || !columnNameTypePairMap.get(oldCol.getName()).equalsIgnoreCase(oldCol.getType())) { - return false; - } - } - - return true; - } - - /** Duplicates AcidUtils; used in a couple places in metastore. */ - public static boolean isTransactionalTable(Map params) { - String transactionalProp = params.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); - return (transactionalProp != null && "true".equalsIgnoreCase(transactionalProp)); - } /** Duplicates AcidUtils; used in a couple places in metastore. */ public static boolean isInsertOnlyTableParam(Map params) { @@ -870,75 +292,6 @@ public static boolean isInsertOnlyTableParam(Map params) { return (transactionalProp != null && "insert_only".equalsIgnoreCase(transactionalProp)); } - /** - * create listener instances as per the configuration. - * - * @param clazz Class of the listener - * @param conf configuration object - * @param listenerImplList Implementation class name - * @return instance of the listener - * @throws MetaException if there is any failure instantiating the class - */ - public static List getMetaStoreListeners(Class clazz, - Configuration conf, String listenerImplList) throws MetaException { - List listeners = new ArrayList(); - - if (StringUtils.isBlank(listenerImplList)) { - return listeners; - } - - String[] listenerImpls = listenerImplList.split(","); - for (String listenerImpl : listenerImpls) { - try { - T listener = (T) Class.forName( - listenerImpl.trim(), true, JavaUtils.getClassLoader()).getConstructor( - Configuration.class).newInstance(conf); - listeners.add(listener); - } catch (InvocationTargetException ie) { - LOG.error("Got InvocationTargetException", ie); - throw new MetaException("Failed to instantiate listener named: "+ - listenerImpl + ", reason: " + ie.getCause()); - } catch (Exception e) { - LOG.error("Got Exception", e); - throw new MetaException("Failed to instantiate listener named: "+ - listenerImpl + ", reason: " + e); - } - } - - return listeners; - } - - public static String validateSkewedColNames(List cols) { - if (CollectionUtils.isEmpty(cols)) { - return null; - } - for (String col : cols) { - if (!validateColumnName(col)) { - return col; - } - } - return null; - } - - public static String validateSkewedColNamesSubsetCol(List skewedColNames, - List cols) { - if (CollectionUtils.isEmpty(skewedColNames)) { - return null; - } - List colNames = new ArrayList<>(cols.size()); - for (FieldSchema fieldSchema : cols) { - colNames.add(fieldSchema.getName()); - } - // make a copy - List copySkewedColNames = new ArrayList<>(skewedColNames); - // remove valid columns - copySkewedColNames.removeAll(colNames); - if (copySkewedColNames.isEmpty()) { - return null; - } - return copySkewedColNames.toString(); - } - public static boolean isNonNativeTable(Table table) { if (table == null || table.getParameters() == null) { return false; @@ -1038,64 +391,6 @@ public static int getArchivingLevel(Partition part) throws MetaException { return part.getValues().size(); } - public static boolean partitionNameHasValidCharacters(List partVals, - Pattern partitionValidationPattern) { - return getPartitionValWithInvalidCharacter(partVals, partitionValidationPattern) == null; - } - - public static void getMergableCols(ColumnStatistics csNew, Map parameters) { - List list = new ArrayList<>(); - for (int index = 0; index < csNew.getStatsObj().size(); index++) { - ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); - // canColumnStatsMerge guarantees that it is accurate before we do merge - if (StatsSetupConst.canColumnStatsMerge(parameters, statsObjNew.getColName())) { - list.add(statsObjNew); - } - // in all the other cases, we can not merge - } - csNew.setStatsObj(list); - } - - // this function will merge csOld into csNew. - public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) - throws InvalidObjectException { - List list = new ArrayList<>(); - if (csNew.getStatsObj().size() != csOld.getStatsObjSize()) { - // Some of the columns' stats are missing - // This implies partition schema has changed. We will merge columns - // present in both, overwrite stats for columns absent in metastore and - // leave alone columns stats missing from stats task. This last case may - // leave stats in stale state. This will be addressed later. - LOG.debug("New ColumnStats size is {}, but old ColumnStats size is {}", - csNew.getStatsObj().size(), csOld.getStatsObjSize()); - } - // In this case, we have to find out which columns can be merged. - Map map = new HashMap<>(); - // We build a hash map from colName to object for old ColumnStats. - for (ColumnStatisticsObj obj : csOld.getStatsObj()) { - map.put(obj.getColName(), obj); - } - for (int index = 0; index < csNew.getStatsObj().size(); index++) { - ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); - ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName()); - if (statsObjOld != null) { - // because we already confirm that the stats is accurate - // it is impossible that the column types have been changed while the - // column stats is still accurate. - assert (statsObjNew.getStatsData().getSetField() == statsObjOld.getStatsData() - .getSetField()); - // If statsObjOld is found, we can merge. - ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, - statsObjOld); - merger.merge(statsObjNew, statsObjOld); - } - // If statsObjOld is not found, we just use statsObjNew as it is accurate. - list.add(statsObjNew); - } - // in all the other cases, we can not merge - csNew.setStatsObj(list); - } - /** * Read and return the meta store Sasl configuration. Currently it uses the default * Hadoop SASL configuration and can be configured using "hadoop.rpc.protection" @@ -1167,28 +462,6 @@ private static URL urlFromPathString(String onestr) { return oneurl; } - /** - * Verify if the user is allowed to make DB notification related calls. - * Only the superusers defined in the Hadoop proxy user settings have the permission. - * - * @param user the short user name - * @param conf that contains the proxy user settings - * @return if the user has the permission - */ - public static boolean checkUserHasHostProxyPrivileges(String user, Configuration conf, String ipAddress) { - DefaultImpersonationProvider sip = ProxyUsers.getDefaultImpersonationProvider(); - // Just need to initialize the ProxyUsers for the first time, given that the conf will not change on the fly - if (sip == null) { - ProxyUsers.refreshSuperUserGroupsConfiguration(conf); - sip = ProxyUsers.getDefaultImpersonationProvider(); - } - Map> proxyHosts = sip.getProxyHosts(); - Collection hostEntries = proxyHosts.get(sip.getProxySuperuserIpConfKey(user)); - MachineList machineList = new MachineList(hostEntries); - ipAddress = (ipAddress == null) ? StringUtils.EMPTY : ipAddress; - return machineList.includes(ipAddress); - } - /** * Convert FieldSchemas to Thrift DDL. */ @@ -1526,139 +799,6 @@ public static String getColumnCommentsFromFieldSchema(List fieldSch return sb.toString(); } - public static int startMetaStore() throws Exception { - return startMetaStore(HadoopThriftAuthBridge.getBridge(), null); - } - - public static int startMetaStore(final HadoopThriftAuthBridge bridge, Configuration conf) throws - Exception { - int port = findFreePort(); - startMetaStore(port, bridge, conf); - return port; - } - - public static int startMetaStore(Configuration conf) throws Exception { - return startMetaStore(HadoopThriftAuthBridge.getBridge(), conf); - } - - public static void startMetaStore(final int port, final HadoopThriftAuthBridge bridge) throws Exception { - startMetaStore(port, bridge, null); - } - - public static void startMetaStore(final int port, - final HadoopThriftAuthBridge bridge, Configuration hiveConf) - throws Exception{ - if (hiveConf == null) { - hiveConf = MetastoreConf.newMetastoreConf(); - } - final Configuration finalHiveConf = hiveConf; - Thread thread = new Thread(new Runnable() { - @Override - public void run() { - try { - HiveMetaStore.startMetaStore(port, bridge, finalHiveConf); - } catch (Throwable e) { - LOG.error("Metastore Thrift Server threw an exception...",e); - } - } - }); - thread.setDaemon(true); - thread.start(); - loopUntilHMSReady(port); - } - - /** - * A simple connect test to make sure that the metastore is up - * @throws Exception - */ - private static void loopUntilHMSReady(int port) throws Exception { - int retries = 0; - Exception exc; - while (true) { - try { - Socket socket = new Socket(); - socket.connect(new InetSocketAddress(port), 5000); - socket.close(); - return; - } catch (Exception e) { - if (retries++ > 60) { //give up - exc = e; - break; - } - Thread.sleep(1000); - } - } - // something is preventing metastore from starting - // print the stack from all threads for debugging purposes - LOG.error("Unable to connect to metastore server: " + exc.getMessage()); - LOG.info("Printing all thread stack traces for debugging before throwing exception."); - LOG.info(getAllThreadStacksAsString()); - throw exc; - } - - private static String getAllThreadStacksAsString() { - Map threadStacks = Thread.getAllStackTraces(); - StringBuilder sb = new StringBuilder(); - for (Map.Entry entry : threadStacks.entrySet()) { - Thread t = entry.getKey(); - sb.append(System.lineSeparator()); - sb.append("Name: ").append(t.getName()).append(" State: ").append(t.getState()); - addStackString(entry.getValue(), sb); - } - return sb.toString(); - } - - private static void addStackString(StackTraceElement[] stackElems, StringBuilder sb) { - sb.append(System.lineSeparator()); - for (StackTraceElement stackElem : stackElems) { - sb.append(stackElem).append(System.lineSeparator()); - } - } - - /** - * Finds a free port on the machine. - * - * @return - * @throws IOException - */ - public static int findFreePort() throws IOException { - ServerSocket socket= new ServerSocket(0); - int port = socket.getLocalPort(); - socket.close(); - return port; - } - - /** - * Finds a free port on the machine, but allow the - * ability to specify a port number to not use, no matter what. - */ - public static int findFreePortExcepting(int portToExclude) throws IOException { - ServerSocket socket1 = null; - ServerSocket socket2 = null; - try { - socket1 = new ServerSocket(0); - socket2 = new ServerSocket(0); - if (socket1.getLocalPort() != portToExclude) { - return socket1.getLocalPort(); - } - // If we're here, then socket1.getLocalPort was the port to exclude - // Since both sockets were open together at a point in time, we're - // guaranteed that socket2.getLocalPort() is not the same. - return socket2.getLocalPort(); - } finally { - if (socket1 != null){ - socket1.close(); - } - if (socket2 != null){ - socket2.close(); - } - } - } - - public static String getIndexTableName(String dbName, String baseTblName, String indexName) { - return dbName + "__" + baseTblName + "_" + indexName + "__"; - } - public static boolean isMaterializedViewTable(Table table) { if (table == null) { return false; @@ -1694,44 +834,6 @@ public static WMPoolSchedulingPolicy parseSchedulingPolicy(String schedulingPoli return Enum.valueOf(WMPoolSchedulingPolicy.class, schedulingPolicy); } - // ColumnStatisticsObj with info about its db, table, partition (if table is partitioned) - public static class ColStatsObjWithSourceInfo { - private final ColumnStatisticsObj colStatsObj; - private final String catName; - private final String dbName; - private final String tblName; - private final String partName; - - public ColStatsObjWithSourceInfo(ColumnStatisticsObj colStatsObj, String catName, String dbName, String tblName, - String partName) { - this.colStatsObj = colStatsObj; - this.catName = catName; - this.dbName = dbName; - this.tblName = tblName; - this.partName = partName; - } - - public ColumnStatisticsObj getColStatsObj() { - return colStatsObj; - } - - public String getCatName() { - return catName; - } - - public String getDbName() { - return dbName; - } - - public String getTblName() { - return tblName; - } - - public String getPartName() { - return partName; - } - } - private static boolean hasCatalogName(String dbName) { return dbName != null && dbName.length() > 0 && dbName.charAt(0) == CATALOG_DB_THRIFT_NAME_MARKER; diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index 09c2509b3d..0934aeb3a7 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -87,7 +87,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.api.WriteEventInfo; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; import org.apache.thrift.TException; /** diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index 3aebaf3419..70a17f51b9 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -86,7 +86,7 @@ import org.apache.hadoop.hive.metastore.api.WriteEventInfo; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.thrift.TException; import org.junit.Assert; @@ -1132,7 +1132,7 @@ public void dropWMTriggerToPoolMapping(String resourcePlanName, String triggerNa } @Override - public List getPartitionColStatsForDatabase(String catName, String dbName) + public List getPartitionColStatsForDatabase(String catName, String dbName) throws MetaException, NoSuchObjectException { // TODO Auto-generated method stub return null; diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 75ab80b439..60beab6350 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -40,12 +40,12 @@ import java.util.concurrent.TimeUnit; import com.google.common.collect.Sets; -import org.apache.hadoop.hive.metastore.api.CreationMetadata; import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder; import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.apache.hadoop.hive.metastore.utils.FileUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.SecurityUtils; import org.datanucleus.api.jdo.JDOPersistenceManager; import org.datanucleus.api.jdo.JDOPersistenceManagerFactory; @@ -83,7 +83,6 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.Type; import org.apache.hadoop.hive.metastore.api.UnknownDBException; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.util.StringUtils; import org.apache.thrift.TException; import org.junit.Test; @@ -481,7 +480,7 @@ private static Partition makePartitionObject(String dbName, String tblName, part4.setSd(tbl.getSd().deepCopy()); part4.getSd().setSerdeInfo(tbl.getSd().getSerdeInfo().deepCopy()); part4.getSd().setLocation(tbl.getSd().getLocation() + ptnLocationSuffix); - MetaStoreUtils.updatePartitionStatsFast(part4, tbl, warehouse, false, false, null, true); + MetaStoreServerUtils.updatePartitionStatsFast(part4, tbl, warehouse, false, false, null, true); return part4; } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreGetMetaConf.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreGetMetaConf.java index 515b6144da..5963cb020a 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreGetMetaConf.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreGetMetaConf.java @@ -23,7 +23,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.thrift.TException; import org.junit.After; import org.junit.AfterClass; @@ -62,7 +62,7 @@ public static void startMetaStoreServer() throws Exception { MockPartitionExpressionForMetastore.class, PartitionExpressionProxy.class); MetastoreConf.setBoolVar(metastoreConf, ConfVars.TRY_DIRECT_SQL_DDL, false); MetaStoreTestUtils.setConfForStandloneMode(metastoreConf); - int msPort = MetaStoreUtils.startMetaStore(metastoreConf); + int msPort = MetaStoreServerUtils.startMetaStore(metastoreConf); conf = MetastoreConf.newMetastoreConf(); MetastoreConf.setVar(conf, ConfVars.THRIFT_URIS, "thrift://localhost:" + msPort); MetastoreConf.setBoolVar(conf, ConfVars.HIVE_SUPPORT_CONCURRENCY, false); diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java index d5ae5d1c0d..d09ac8ced9 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java @@ -47,7 +47,7 @@ import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_ERASURE_CODED_FILES; import static org.apache.hadoop.hive.common.StatsSetupConst.STATS_GENERATED; import static org.apache.hadoop.hive.common.StatsSetupConst.TOTAL_SIZE; -import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.updateTableStatsSlow; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.updateTableStatsSlow; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; @@ -88,7 +88,7 @@ public void testTrimMapNullsXform() throws Exception { Map expected = ImmutableMap.of("akey", "aval", "blank", "", "null", ""); - Map xformed = MetaStoreUtils.trimMapNulls(m,true); + Map xformed = MetaStoreServerUtils.trimMapNulls(m,true); assertThat(xformed, is(expected)); } @@ -100,7 +100,7 @@ public void testTrimMapNullsPrune() throws Exception { m.put("null",null); Map expected = ImmutableMap.of("akey", "aval", "blank", ""); - Map pruned = MetaStoreUtils.trimMapNulls(m,false); + Map pruned = MetaStoreServerUtils.trimMapNulls(m,false); assertThat(pruned, is(expected)); } @@ -110,13 +110,13 @@ public void testcolumnsIncludedByNameType() { FieldSchema col1a = new FieldSchema("col1", "string", "col1 but with a different comment"); FieldSchema col2 = new FieldSchema("col2", "string", "col2 comment"); FieldSchema col3 = new FieldSchema("col3", "string", "col3 comment"); - Assert.assertTrue(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.columnsIncludedByNameType(Arrays.asList(col1), Arrays.asList(col1))); - Assert.assertTrue(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.columnsIncludedByNameType(Arrays.asList(col1), Arrays.asList(col1a))); - Assert.assertTrue(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.columnsIncludedByNameType(Arrays.asList(col1, col2), Arrays.asList(col1, col2))); - Assert.assertTrue(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.columnsIncludedByNameType(Arrays.asList(col1, col2), Arrays.asList(col2, col1))); - Assert.assertTrue(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.columnsIncludedByNameType(Arrays.asList(col1, col2), Arrays.asList(col1, col2, col3))); - Assert.assertTrue(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.columnsIncludedByNameType(Arrays.asList(col1, col2), Arrays.asList(col3, col2, col1))); - Assert.assertFalse(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.columnsIncludedByNameType(Arrays.asList(col1, col2), Arrays.asList(col1))); + Assert.assertTrue(MetaStoreServerUtils.columnsIncludedByNameType(Arrays.asList(col1), Arrays.asList(col1))); + Assert.assertTrue(MetaStoreServerUtils.columnsIncludedByNameType(Arrays.asList(col1), Arrays.asList(col1a))); + Assert.assertTrue(MetaStoreServerUtils.columnsIncludedByNameType(Arrays.asList(col1, col2), Arrays.asList(col1, col2))); + Assert.assertTrue(MetaStoreServerUtils.columnsIncludedByNameType(Arrays.asList(col1, col2), Arrays.asList(col2, col1))); + Assert.assertTrue(MetaStoreServerUtils.columnsIncludedByNameType(Arrays.asList(col1, col2), Arrays.asList(col1, col2, col3))); + Assert.assertTrue(MetaStoreServerUtils.columnsIncludedByNameType(Arrays.asList(col1, col2), Arrays.asList(col3, col2, col1))); + Assert.assertFalse(MetaStoreServerUtils.columnsIncludedByNameType(Arrays.asList(col1, col2), Arrays.asList(col1))); } /** -- 2.16.3