diff --git itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index cd036e6563351577999ccd3f3d9120a1819cdaa1..270aa6c010072b30f5927ac70d60c0458fc949f3 100644 --- itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -399,7 +399,11 @@ public void alterPartitions(String catName, String dbName, String tblName, List> partValsList, List newParts, long writeId, long queryTxnId, String queryValidWriteIds) throws InvalidObjectException, MetaException { - objectStore.alterPartitions(catName, dbName, tblName, partValsList, newParts, writeId, queryTxnId, queryValidWriteIds); + if (shouldEventSucceed) { + objectStore.alterPartitions(catName, dbName, tblName, partValsList, newParts, writeId, queryTxnId, queryValidWriteIds); + } else { + throw new RuntimeException("Event failed."); + } } @Override @@ -422,8 +426,10 @@ public int getNumPartitionsByExpr(String catName, String dbName, String tblName, @Override public List getPartitionsByNames(String catName, String dbName, String tblName, - List partNames) throws MetaException, NoSuchObjectException { - return objectStore.getPartitionsByNames(catName, dbName, tblName, partNames); + List partNames) + throws MetaException, NoSuchObjectException { + return objectStore.getPartitionsByNames( + catName, dbName, tblName, partNames); } @Override @@ -730,18 +736,16 @@ public boolean deletePartitionColumnStatistics(String catName, String dbName, St } @Override - public boolean updateTableColumnStatistics(ColumnStatistics statsObj) - throws NoSuchObjectException, MetaException, InvalidObjectException, - InvalidInputException { - return objectStore.updateTableColumnStatistics(statsObj); + public boolean updateTableColumnStatistics(ColumnStatistics statsObj, long txnId, String validWriteIds, long writeId) + throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { + return objectStore.updateTableColumnStatistics(statsObj, txnId, validWriteIds, writeId); } @Override public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj, - List partVals) - throws NoSuchObjectException, MetaException, InvalidObjectException, - InvalidInputException { - return objectStore.updatePartitionColumnStatistics(statsObj, partVals); + List partVals, long txnId, String validWriteIds, long writeId) + throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { + return objectStore.updatePartitionColumnStatistics(statsObj, partVals, txnId, validWriteIds, writeId); } @Override @@ -1305,4 +1309,6 @@ public int deleteRuntimeStats(int maxRetainSecs) throws MetaException { String dbName, String tableName) throws MetaException, NoSuchObjectException { return null; - }} + } + +} diff --git ql/src/java/org/apache/hadoop/hive/metastore/SynchronizedMetaStoreClient.java ql/src/java/org/apache/hadoop/hive/metastore/SynchronizedMetaStoreClient.java index 2ba6d0796cdf552e691f2a55cee1d7ad4ce31ee6..7eddc1645cb44b8f666e4de50c7585329d25c7f4 100644 --- ql/src/java/org/apache/hadoop/hive/metastore/SynchronizedMetaStoreClient.java +++ ql/src/java/org/apache/hadoop/hive/metastore/SynchronizedMetaStoreClient.java @@ -79,8 +79,8 @@ public synchronized Partition add_partition(Partition partition) throws TExcepti } public synchronized void alter_partition(String dbName, String tblName, - Partition newPart, EnvironmentContext environmentContext) throws TException { - client.alter_partition(dbName, tblName, newPart, environmentContext); + Partition newPart, EnvironmentContext environmentContext, long txnId, String writeIdList) throws TException { + client.alter_partition(dbName, tblName, newPart, environmentContext, txnId, writeIdList); } public synchronized LockResponse checkLock(long lockid) throws TException { diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index bf4d29cba771f9fc98a21b203b79827a9ee58923..b30b4b3685c8b1f8e50fa7f250a23346694ddb27 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -1423,13 +1423,14 @@ private void recordValidTxns(HiveTxnManager txnMgr) throws LockException { // Write the current set of valid write ids for the operated acid tables into the conf file so // that it can be read by the input format. - private void recordValidWriteIds(HiveTxnManager txnMgr) throws LockException { + private ValidTxnWriteIdList recordValidWriteIds(HiveTxnManager txnMgr) throws LockException { String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); if ((txnString == null) || (txnString.isEmpty())) { throw new IllegalStateException("calling recordValidWritsIdss() without initializing ValidTxnList " + JavaUtils.txnIdToString(txnMgr.getCurrentTxnId())); } List txnTables = getTransactionalTableList(plan); + LOG.error("TODO# txnTables " + txnTables); ValidTxnWriteIdList txnWriteIds = null; if (compactionWriteIds != null) { if (txnTables.size() != 1) { @@ -1466,6 +1467,7 @@ private void recordValidWriteIds(HiveTxnManager txnMgr) throws LockException { } } LOG.debug("Encoding valid txn write ids info " + writeIdStr + " txnid:" + txnMgr.getCurrentTxnId()); + return txnWriteIds; } // Make the list of transactional tables list which are getting read or written by current txn @@ -1602,10 +1604,16 @@ private void acquireLocks() throws CommandProcessorResponse { } } - // Note: the sinks and DDL cannot coexist at this time; but if they could we would - // need to make sure we don't get two write IDs for the same table. + if (plan.getAcidAnalyzeTable() != null) { + // Allocate write ID for the table being analyzed. + Table t = plan.getAcidAnalyzeTable().getTable(); + queryTxnMgr.getTableWriteId(t.getDbName(), t.getTableName()); + } + + DDLDescWithWriteId acidDdlDesc = plan.getAcidDdlDesc(); - if (acidDdlDesc != null && acidDdlDesc.mayNeedWriteId()) { + boolean hasAcidDdl = acidDdlDesc != null && acidDdlDesc.mayNeedWriteId(); + if (hasAcidDdl) { String fqTableName = acidDdlDesc.getFullTableName(); long writeId = queryTxnMgr.getTableWriteId( Utilities.getDatabaseName(fqTableName), Utilities.getTableName(fqTableName)); @@ -1620,9 +1628,11 @@ private void acquireLocks() throws CommandProcessorResponse { throw new IllegalStateException("calling recordValidTxn() more than once in the same " + JavaUtils.txnIdToString(queryTxnMgr.getCurrentTxnId())); } - if (plan.hasAcidResourcesInQuery()) { + + if (plan.hasAcidResourcesInQuery() || hasAcidDdl) { recordValidWriteIds(queryTxnMgr); } + } catch (Exception e) { errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); diff --git ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java index 79e938aebd976aa92e96cb53ef91b86784598eaa..f2201dd726aeb26791fcbdecfa30f36fb06ee10f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java +++ ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java @@ -35,6 +35,7 @@ import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; +import org.apache.curator.shaded.com.google.common.collect.Lists; import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.ExplainTask; @@ -112,6 +113,7 @@ private final HiveOperation operation; private final boolean acidResourcesInQuery; private final Set acidSinks; // Note: both full-ACID and insert-only sinks. + private final WriteEntity acidAnalyzeTable; private final DDLDesc.DDLDescWithWriteId acidDdlDesc; private Boolean autoCommitValue; @@ -125,6 +127,7 @@ protected QueryPlan(HiveOperation command) { this.acidResourcesInQuery = false; this.acidSinks = Collections.emptySet(); this.acidDdlDesc = null; + this.acidAnalyzeTable = null; } public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, String queryId, @@ -151,9 +154,11 @@ public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, S this.operation = operation; this.autoCommitValue = sem.getAutoCommitValue(); this.resultSchema = resultSchema; + // TODO: all this ACID stuff should be in some sub-object this.acidResourcesInQuery = sem.hasTransactionalInQuery(); this.acidSinks = sem.getAcidFileSinks(); this.acidDdlDesc = sem.getAcidDdlDesc(); + this.acidAnalyzeTable = sem.getAcidAnalyzeTable(); } /** @@ -162,6 +167,11 @@ public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, S public boolean hasAcidResourcesInQuery() { return acidResourcesInQuery; } + + public WriteEntity getAcidAnalyzeTable() { + return acidAnalyzeTable; + } + /** * @return Collection of FileSinkDesc representing writes to Acid resources */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index d912d4d6b3fa118ca3d755b8b07329261cc76acc..397cee2a5f5db8874b402377f36d9a33551327c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -3933,14 +3933,13 @@ private int alterTable(Hive db, AlterTableDesc alterTbl) throws HiveException { environmentContext = new EnvironmentContext(); } environmentContext.putToProperties(HiveMetaHook.ALTER_TABLE_OPERATION_TYPE, alterTbl.getOp().name()); - // Note: in the old default overloads that I've removed, "transactional" was true for tables, - // but false for partitions. Seems to be ok here because we are not updating - // partition-stats-related stuff from this call (alterTable). if (allPartitions == null) { db.alterTable(alterTbl.getOldName(), tbl, alterTbl.getIsCascade(), environmentContext, true); } else { - db.alterPartitions( - Warehouse.getQualifiedName(tbl.getTTable()), allPartitions, environmentContext, false); + // Note: this is necessary for UPDATE_STATISTICS command, that operates via ADDPROPS (why?). + // For any other updates, we don't want to do txn check on partitions when altering table. + boolean isTxn = alterTbl.getPartSpec() != null && alterTbl.getOp() == AlterTableTypes.ADDPROPS; + db.alterPartitions(Warehouse.getQualifiedName(tbl.getTTable()), allPartitions, environmentContext, isTxn); } // Add constraints if necessary addConstraints(db, alterTbl); diff --git ql/src/java/org/apache/hadoop/hive/ql/hooks/UpdateInputAccessTimeHook.java ql/src/java/org/apache/hadoop/hive/ql/hooks/UpdateInputAccessTimeHook.java index 4cf7c25a8260e86869d35cfaf97aacfece988e1f..ea0b2c357cf8098a9f31664b1b16ec6580ae951b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/hooks/UpdateInputAccessTimeHook.java +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/UpdateInputAccessTimeHook.java @@ -63,7 +63,7 @@ public void run(HookContext hookContext) throws Exception { String tblName = re.getTable().getTableName(); Table t = db.getTable(dbName, tblName); t.setLastAccessTime(lastAccessTime); - db.alterTable(dbName + "." + tblName, t, false, null, true); + db.alterTable(dbName + "." + tblName, t, false, null, false); break; } case PARTITION: { @@ -73,9 +73,9 @@ public void run(HookContext hookContext) throws Exception { Table t = db.getTable(dbName, tblName); p = db.getPartition(t, p.getSpec(), false); p.setLastAccessTime(lastAccessTime); - db.alterPartition(dbName, tblName, p, null, true); + db.alterPartition(dbName, tblName, p, null, false); t.setLastAccessTime(lastAccessTime); - db.alterTable(dbName + "." + tblName, t, false, null, true); + db.alterTable(dbName + "." + tblName, t, false, null, false); break; } default: diff --git ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java index f1cf1138120640859a94fce45a5505fb0d3bad82..3afa201fbcd5886eef057a3fdc5345f2dad3e1f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java @@ -39,6 +39,7 @@ private boolean isTempURI = false; private transient boolean isDynamicPartitionWrite = false; + private transient boolean isTxnAnalyze = false; public static enum WriteType { DDL_EXCLUSIVE, // for use in DDL statements that require an exclusive lock, @@ -223,6 +224,7 @@ public static WriteType determineAlterTableWriteType(AlterTableDesc.AlterTableTy case ADDPARTITION: case ADDSERDEPROPS: case ADDPROPS: + case UPDATESTATS: return WriteType.DDL_SHARED; case COMPACT: @@ -242,4 +244,11 @@ public String toDetailedString() { return toString() + " Type=" + getTyp() + " WriteType=" + getWriteType() + " isDP=" + isDynamicPartitionWrite(); } + public boolean isTxnAnalyze() { + return isTxnAnalyze; + } + + public void setTxnAnalyze(boolean isTxnAnalyze) { + this.isTxnAnalyze = isTxnAnalyze; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index e54afc436201f4774646006a15e80efc9d0c5570..f356682cdcd36725c98dda4932bba0bab32e8d09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -72,6 +72,7 @@ import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; + import java.nio.charset.Charset; /** @@ -1648,7 +1649,7 @@ public void setValidWriteIdList(String validWriteIdList) { @Override public String toString() { - return "[txnId=" + txnId + ", validWriteIdList=" + validWriteIdList + "]"; + return "[txnId=" + txnId + ", validWriteIdList=" + validWriteIdList + ", writeId=" + writeId + "]"; } } @@ -1661,49 +1662,60 @@ public static TableSnapshot getTableSnapshot( public static TableSnapshot getTableSnapshot( Configuration conf, Table tbl, boolean isStatsUpdater) throws LockException { + return getTableSnapshot(conf, tbl, tbl.getDbName(), tbl.getTableName(), isStatsUpdater); + } + + public static TableSnapshot getTableSnapshot(Configuration conf, + Table tbl, String dbName, String tblName, boolean isStatsUpdater) + throws LockException, AssertionError { if (!isTransactionalTable(tbl)) { return null; - } else { - long txnId = -1; - long writeId = -1; - ValidWriteIdList validWriteIdList = null; + } + if (dbName == null) { + dbName = tbl.getDbName(); + } + if (tblName == null) { + tblName = tbl.getTableName(); + } + long txnId = -1; + long writeId = -1; + ValidWriteIdList validWriteIdList = null; - HiveTxnManager sessionTxnMgr = SessionState.get().getTxnMgr(); + HiveTxnManager sessionTxnMgr = SessionState.get().getTxnMgr(); - if (sessionTxnMgr != null) { - txnId = sessionTxnMgr.getCurrentTxnId(); - } - String fullTableName = getFullTableName(tbl.getDbName(), tbl.getTableName()); - if (txnId > 0 && isTransactionalTable(tbl)) { - validWriteIdList = getTableValidWriteIdList(conf, fullTableName); - if (isStatsUpdater) { - writeId = SessionState.get().getTxnMgr() != null ? - SessionState.get().getTxnMgr().getAllocatedTableWriteId( - tbl.getDbName(), tbl.getTableName()) : -1; - if (writeId < 1) { - // TODO: this is not ideal... stats updater that doesn't have write ID is currently - // "create table"; writeId would be 0/-1 here. No need to call this w/true. - LOG.debug("Stats updater for {}.{} doesn't have a write ID", - tbl.getDbName(), tbl.getTableName()); - } + if (sessionTxnMgr != null) { + txnId = sessionTxnMgr.getCurrentTxnId(); + } + String fullTableName = getFullTableName(dbName, tblName); + if (txnId > 0) { + validWriteIdList = getTableValidWriteIdList(conf, fullTableName); + if (isStatsUpdater) { + writeId = SessionState.get().getTxnMgr() != null ? + SessionState.get().getTxnMgr().getAllocatedTableWriteId( + dbName, tblName) : -1; + if (writeId < 1) { + // TODO: this is not ideal... stats updater that doesn't have write ID is currently + // "create table"; writeId would be 0/-1 here. No need to call this w/true. + LOG.debug("Stats updater for {}.{} doesn't have a write ID ({})", + dbName, tblName, writeId); } + } - if (HiveConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST) - && conf.get(ValidTxnList.VALID_TXNS_KEY) == null) { - return null; - } - if (validWriteIdList == null) { - validWriteIdList = getTableValidWriteIdListWithTxnList( - conf, tbl.getDbName(), tbl.getTableName()); - } - if (validWriteIdList == null) { - throw new AssertionError("Cannot find valid write ID list for " + tbl.getTableName()); - } + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST) + && conf.get(ValidTxnList.VALID_TXNS_KEY) == null) { + return null; + } + if (validWriteIdList == null) { + validWriteIdList = getTableValidWriteIdListWithTxnList( + conf, dbName, tblName); + } + if (validWriteIdList == null) { + throw new AssertionError("Cannot find valid write ID list for " + tblName); } - return new TableSnapshot(txnId, writeId, - validWriteIdList != null ? validWriteIdList.toString() : null); } + return new TableSnapshot(txnId, writeId, + validWriteIdList != null ? validWriteIdList.toString() : null); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java index a05ae0c374b139f50fd3ec1e78d32521b6cf074f..d3eefb9fc9253c2fe284e5f48f7b20d7dd91b256 100644 --- ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java @@ -19,10 +19,12 @@ Licensed to the Apache Software Foundation (ASF) under one import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; +import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.LockComponentBuilder; @@ -569,7 +571,12 @@ Seems much cleaner if each stmt is identified as a particular HiveOperation (whi break; case DDL_SHARED: compBuilder.setShared(); - compBuilder.setOperationType(DataOperationType.NO_TXN); + if (!output.isTxnAnalyze()) { + // Analyze needs txn components to be present, otherwise an aborted analyze write ID + // might be rolled under the watermark by compactor while stats written by it are + // still present. + compBuilder.setOperationType(DataOperationType.NO_TXN); + } break; case UPDATE: diff --git ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java index 03f2ff31be7021bd0b4ac5aa194061441608bf40..17a2d20a001685567567b2a78f8251de4140e02f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java @@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.ql.Context; diff --git ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java index 6a01abc9fab9ff5601b8a12eee0abfed4c2ef6c2..ba1f1ffe889598d9e1bec163eef471148b49c83e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java @@ -19,6 +19,7 @@ import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; +import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.metastore.api.CommitTxnRequest; import org.apache.hadoop.hive.metastore.api.LockResponse; import org.apache.hadoop.hive.metastore.api.TxnToWriteId; diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 07fe43fc5e6be5048519ab7d23f9dd05954facdc..7a1160df672e985ef16e4038f4c208856767cce5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -113,6 +113,7 @@ import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.AcidUtils.TableSnapshot; import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.log.PerfLogger; @@ -584,7 +585,7 @@ public void createTable(String tableName, List columns, List par public void alterTable(Table newTbl, boolean cascade, EnvironmentContext environmentContext, boolean transactional) throws HiveException { - alterTable(newTbl.getDbName(), + alterTable(newTbl.getCatName(), newTbl.getDbName(), newTbl.getTableName(), newTbl, cascade, environmentContext, transactional); } @@ -605,20 +606,23 @@ public void alterTable(String fullyQlfdTblName, Table newTbl, EnvironmentContext boolean transactional) throws HiveException { String[] names = Utilities.getDbTableName(fullyQlfdTblName); - alterTable(names[0], names[1], newTbl, false, environmentContext, transactional); + alterTable(null, names[0], names[1], newTbl, false, environmentContext, transactional); } public void alterTable(String fullyQlfdTblName, Table newTbl, boolean cascade, EnvironmentContext environmentContext, boolean transactional) throws HiveException { String[] names = Utilities.getDbTableName(fullyQlfdTblName); - alterTable(names[0], names[1], newTbl, cascade, environmentContext, transactional); + alterTable(null, names[0], names[1], newTbl, cascade, environmentContext, transactional); } - public void alterTable(String dbName, String tblName, Table newTbl, boolean cascade, + public void alterTable(String catName, String dbName, String tblName, Table newTbl, boolean cascade, EnvironmentContext environmentContext, boolean transactional) throws HiveException { + if (catName == null) { + catName = getDefaultCatalog(conf); + } try { // Remove the DDL_TIME so it gets refreshed if (newTbl.getParameters() != null) { @@ -633,12 +637,22 @@ public void alterTable(String dbName, String tblName, Table newTbl, boolean casc } // Take a table snapshot and set it to newTbl. + AcidUtils.TableSnapshot tableSnapshot = null; if (transactional) { - setTableSnapshotForTransactionalTable(environmentContext, conf, newTbl, true); + // Make sure we pass in the names, so we can get the correct snapshot for rename table. + tableSnapshot = AcidUtils.getTableSnapshot(conf, newTbl, dbName, tblName, true); + if (tableSnapshot != null) { + newTbl.getTTable().setWriteId(tableSnapshot.getWriteId()); + } else { + LOG.warn("Cannot get a table snapshot for " + tblName); + } } - getMSC().alter_table_with_environmentContext( - dbName, tblName, newTbl.getTTable(), environmentContext); + // Why is alter_partitions synchronized while this isn't? + getMSC().alter_table( + catName, dbName, tblName, newTbl.getTTable(), environmentContext, + tableSnapshot == null ? -1 : tableSnapshot.getTxnId(), + tableSnapshot == null ? null : tableSnapshot.getValidWriteIdList()); } catch (MetaException e) { throw new HiveException("Unable to alter table. " + e.getMessage(), e); } catch (TException e) { @@ -703,11 +717,19 @@ public void alterPartition(String dbName, String tblName, Partition newPart, if (environmentContext == null) { environmentContext = new EnvironmentContext(); } + AcidUtils.TableSnapshot tableSnapshot = null; if (transactional) { - setTableSnapshotForTransactionalPartition(environmentContext, conf, newPart, true); + tableSnapshot = AcidUtils.getTableSnapshot(conf, newPart.getTable(), true); + if (tableSnapshot != null) { + newPart.getTPartition().setWriteId(tableSnapshot.getWriteId()); + } else { + LOG.warn("Cannot get a table snapshot for " + tblName); + } } getSynchronizedMSC().alter_partition( - dbName, tblName, newPart.getTPartition(), environmentContext); + dbName, tblName, newPart.getTPartition(), environmentContext, + tableSnapshot == null ? -1 : tableSnapshot.getTxnId(), + tableSnapshot == null ? null : tableSnapshot.getValidWriteIdList()); } catch (MetaException e) { throw new HiveException("Unable to alter partition. " + e.getMessage(), e); @@ -895,7 +917,11 @@ public void createTable(Table tbl, boolean ifNotExists, } } // Set table snapshot to api.Table to make it persistent. - setTableSnapshotForTransactionalTable(null, conf, tbl, true); + TableSnapshot tableSnapshot = AcidUtils.getTableSnapshot(conf, tbl, true); + if (tableSnapshot != null) { + tbl.getTTable().setWriteId(tableSnapshot.getWriteId()); + } + if (primaryKeys == null && foreignKeys == null && uniqueConstraints == null && notNullConstraints == null && defaultConstraints == null && checkConstraints == null) { @@ -1028,10 +1054,20 @@ public void dropTable(String dbName, String tableName, boolean deleteData, public void truncateTable(String dbDotTableName, Map partSpec) throws HiveException { try { Table table = getTable(dbDotTableName, true); + // TODO: we should refactor code to make sure snapshot is always obtained in the same layer e.g. Hive.java + AcidUtils.TableSnapshot snapshot = null; + if (AcidUtils.isTransactionalTable(table)) { + snapshot = AcidUtils.getTableSnapshot(conf, table, true); + } List partNames = ((null == partSpec) - ? null : getPartitionNames(table.getDbName(), table.getTableName(), partSpec, (short) -1)); - getMSC().truncateTable(table.getDbName(), table.getTableName(), partNames); + ? null : getPartitionNames(table.getDbName(), table.getTableName(), partSpec, (short) -1)); + if (snapshot == null) { + getMSC().truncateTable(table.getDbName(), table.getTableName(), partNames); + } else { + getMSC().truncateTable(table.getDbName(), table.getTableName(), partNames, + snapshot.getTxnId(), snapshot.getValidWriteIdList(), snapshot.getWriteId()); + } } catch (Exception e) { throw new HiveException(e); } @@ -1683,7 +1719,7 @@ public Database getDatabaseCurrent() throws HiveException { * true if there is a following task which updates the stats, so, this method need not update. * @param writeId write ID allocated for the current load operation * @param stmtId statement ID of the current load statement - * @param isInsertOverwrite + * @param isInsertOverwrite * @return Partition object being loaded with data */ public Partition loadPartition(Path loadPath, Table tbl, Map partSpec, @@ -1736,7 +1772,7 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par List newFiles = Collections.synchronizedList(new ArrayList()); perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES); - + // If config is set, table is not temporary and partition being inserted exists, capture // the list of files added. For not yet existing partitions (insert overwrite to new partition // or dynamic partition inserts), the add partition event will capture the list of files added. @@ -1799,8 +1835,11 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath); alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString()); validatePartition(newTPart); - EnvironmentContext ec = new EnvironmentContext(); - setTableSnapshotForTransactionalPartition(ec, conf, newTPart, true); + AcidUtils.TableSnapshot tableSnapshot = null; + tableSnapshot = AcidUtils.getTableSnapshot(conf, newTPart.getTable(), true); + if (tableSnapshot != null) { + newTPart.getTPartition().setWriteId(tableSnapshot.getWriteId()); + } // If config is set, table is not temporary and partition being inserted exists, capture // the list of files added. For not yet existing partitions (insert overwrite to new partition @@ -1873,7 +1912,7 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par // insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException // In that case, we want to retry with alterPartition. LOG.debug("Caught AlreadyExistsException, trying to alter partition instead"); - setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart, ec); + setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart, tableSnapshot); } catch (Exception e) { try { final FileSystem newPathFileSystem = newPartPath.getFileSystem(this.getConf()); @@ -1892,7 +1931,7 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par addWriteNotificationLog(tbl, partSpec, newFiles, writeId); } } else { - setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart, ec); + setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart, tableSnapshot); } perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_PARTITION); @@ -1989,13 +2028,16 @@ private void listFilesCreatedByQuery(Path loadPath, long writeId, int stmtId, } private void setStatsPropAndAlterPartition(boolean hasFollowingStatsTask, Table tbl, - Partition newTPart, EnvironmentContext ec) throws MetaException, TException { + Partition newTPart, TableSnapshot tableSnapshot) throws MetaException, TException { + EnvironmentContext ec = new EnvironmentContext(); if (hasFollowingStatsTask) { ec.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); } LOG.debug("Altering existing partition " + newTPart.getSpec()); - getSynchronizedMSC().alter_partition(tbl.getDbName(), tbl.getTableName(), - newTPart.getTPartition(), ec); + getSynchronizedMSC().alter_partition( + tbl.getDbName(), tbl.getTableName(), newTPart.getTPartition(), new EnvironmentContext(), + tableSnapshot == null ? -1 : tableSnapshot.getTxnId(), + tableSnapshot == null ? null : tableSnapshot.getValidWriteIdList()); } /** @@ -2516,7 +2558,7 @@ public Partition createPartition(Table tbl, Map partSpec) throws out.add(new Partition(tbl, outPart)); } getMSC().alter_partitions(addPartitionDesc.getDbName(), addPartitionDesc.getTableName(), - partsToAlter, new EnvironmentContext()); + partsToAlter, new EnvironmentContext(), -1, null, -1); for ( org.apache.hadoop.hive.metastore.api.Partition outPart : getMSC().getPartitionsByNames(addPartitionDesc.getDbName(), addPartitionDesc.getTableName(),part_names)){ @@ -5362,43 +5404,4 @@ public StorageHandlerInfo getStorageHandlerInfo(Table table) throw new HiveException(e); } } - - private void setTableSnapshotForTransactionalTable(EnvironmentContext ec, HiveConf conf, - Table newTbl, boolean isStatsUpdater) throws LockException { - - org.apache.hadoop.hive.metastore.api.Table newTTbl = newTbl.getTTable(); - AcidUtils.TableSnapshot tableSnapshot = - AcidUtils.getTableSnapshot(conf, newTbl, isStatsUpdater); - if (tableSnapshot == null) return; - if (ec != null) { // Can be null for create table case; we don't need to verify txn stats. - ec.putToProperties(StatsSetupConst.TXN_ID, Long.toString(tableSnapshot.getTxnId())); - if (tableSnapshot.getValidWriteIdList() != null) { - ec.putToProperties(StatsSetupConst.VALID_WRITE_IDS, tableSnapshot.getValidWriteIdList()); - } else { - LOG.warn("Table snapshot has null write IDs for " + newTbl); - } - } - - if (isStatsUpdater) { - newTTbl.setWriteId(tableSnapshot.getWriteId()); - } - } - - private void setTableSnapshotForTransactionalPartition(EnvironmentContext ec, HiveConf conf, - Partition partition, boolean isStatsUpdater) throws LockException { - AcidUtils.TableSnapshot tableSnapshot = - AcidUtils.getTableSnapshot(conf, partition.getTable(), isStatsUpdater); - org.apache.hadoop.hive.metastore.api.Partition tpartition = partition.getTPartition(); - if (tableSnapshot == null) return; - ec.putToProperties(StatsSetupConst.TXN_ID, Long.toString(tableSnapshot.getTxnId())); - if (tableSnapshot.getValidWriteIdList() != null) { - ec.putToProperties(StatsSetupConst.VALID_WRITE_IDS, tableSnapshot.getValidWriteIdList()); - } else { - LOG.warn("Table snapshot has null write IDs for " + partition); - } - - if (isStatsUpdater) { - tpartition.setWriteId(tableSnapshot.getWriteId()); - } - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 5d382ae6f3b4fca5c34d0358dafa8762c81e4fa5..f7c90097e05eed73d51f3d9bd93a457f7dd5ce2d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -158,6 +158,18 @@ public void truncateTable(String dbName, String tableName, List partName } @Override + public void truncateTable(String dbName, String tableName, + List partNames, long txnId, String validWriteIds, long writeId) + throws TException { + org.apache.hadoop.hive.metastore.api.Table table = getTempTable(dbName, tableName); + if (table != null) { + truncateTempTable(table); + return; + } + super.truncateTable(dbName, tableName, partNames, txnId, validWriteIds, writeId); + } + + @Override public org.apache.hadoop.hive.metastore.api.Table getTable(String dbname, String name) throws MetaException, TException, NoSuchObjectException { // First check temp tables @@ -348,6 +360,21 @@ public void alter_table(String dbname, String tbl_name, org.apache.hadoop.hive.m } @Override + public void alter_table(String catName, String dbName, String tbl_name, + org.apache.hadoop.hive.metastore.api.Table new_tbl, + EnvironmentContext envContext, long txnId, String validWriteIds) + throws InvalidOperationException, MetaException, TException { + org.apache.hadoop.hive.metastore.api.Table old_tbl = getTempTable(dbName, tbl_name); + if (old_tbl != null) { + //actually temp table does not support partitions, cascade is not applicable here + alterTempTable(dbName, tbl_name, old_tbl, new_tbl, null); + return; + } + super.alter_table(catName, dbName, tbl_name, new_tbl, envContext, txnId, + validWriteIds); + } + + @Override public void alter_table(String dbname, String tbl_name, org.apache.hadoop.hive.metastore.api.Table new_tbl) throws InvalidOperationException, MetaException, TException { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index be436867d66e273e7d5630312dbe0f756d072d1b..40039ac39d37fc5f97fbf5e0cc536dd2d6184e74 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -2297,4 +2297,8 @@ public void setCacheUsage(CacheUsage cacheUsage) { public DDLDescWithWriteId getAcidDdlDesc() { return null; } + + public WriteEntity getAcidAnalyzeTable() { + return null; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index b6825ae43ec702242054b9ef67a1ac64d940659a..04c08083bccb3edfdccb6ebd1d853aaa3576765e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -661,6 +661,11 @@ private void analyzeAlterTableUpdateStats(ASTNode ast, String tblName, Map tabNameToTabObject; @@ -11212,64 +11215,76 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String // if it is not analyze command and not column stats, then do not gatherstats if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) { tsDesc.setGatherStats(false); - } else { - if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { - String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString(); - LOG.debug("Set stats collection dir : " + statsTmpLoc); - tsDesc.setTmpStatsDir(statsTmpLoc); - } - tsDesc.setGatherStats(true); - tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + return; + } - // append additional virtual columns for storing statistics - Iterator vcs = VirtualColumn.getStatsRegistry(conf).iterator(); - List vcList = new ArrayList(); - while (vcs.hasNext()) { - VirtualColumn vc = vcs.next(); - rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), - vc.getTypeInfo(), alias, true, vc.getIsHidden())); - vcList.add(vc); + if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { + String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString(); + LOG.debug("Set stats collection dir : " + statsTmpLoc); + tsDesc.setTmpStatsDir(statsTmpLoc); + } + tsDesc.setGatherStats(true); + tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + + // append additional virtual columns for storing statistics + Iterator vcs = VirtualColumn.getStatsRegistry(conf).iterator(); + List vcList = new ArrayList(); + while (vcs.hasNext()) { + VirtualColumn vc = vcs.next(); + rwsch.put(alias, vc.getName(), new ColumnInfo(vc.getName(), + vc.getTypeInfo(), alias, true, vc.getIsHidden())); + vcList.add(vc); + } + tsDesc.addVirtualCols(vcList); + + String tblName = tab.getTableName(); + // Theoretically the key prefix could be any unique string shared + // between TableScanOperator (when publishing) and StatsTask (when aggregating). + // Here we use + // db_name.table_name + partitionSec + // as the prefix for easy of read during explain and debugging. + // Currently, partition spec can only be static partition. + String k = org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.encodeTableName(tblName) + Path.SEPARATOR; + tsDesc.setStatsAggPrefix(tab.getDbName()+"."+k); + + // set up WriteEntity for replication and txn stats + WriteEntity we = new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED); + we.setTxnAnalyze(true); + outputs.add(we); + if (AcidUtils.isTransactionalTable(tab)) { + if (acidAnalyzeTable != null) { + throw new IllegalStateException("Multiple ACID tables in analyze: " + + we + ", " + acidAnalyzeTable); + } + acidAnalyzeTable = we; + } + + // add WriteEntity for each matching partition + if (tab.isPartitioned()) { + List cols = new ArrayList(); + if (qbp.getAnalyzeRewrite() != null) { + List partitionCols = tab.getPartCols(); + for (FieldSchema fs : partitionCols) { + cols.add(fs.getName()); + } + tsDesc.setPartColumns(cols); + return; } - tsDesc.addVirtualCols(vcList); - - String tblName = tab.getTableName(); - // Theoretically the key prefix could be any unique string shared - // between TableScanOperator (when publishing) and StatsTask (when aggregating). - // Here we use - // db_name.table_name + partitionSec - // as the prefix for easy of read during explain and debugging. - // Currently, partition spec can only be static partition. - String k = org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.encodeTableName(tblName) + Path.SEPARATOR; - tsDesc.setStatsAggPrefix(tab.getDbName()+"."+k); - - // set up WriteEntity for replication - outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED)); - - // add WriteEntity for each matching partition - if (tab.isPartitioned()) { - List cols = new ArrayList(); - if (qbp.getAnalyzeRewrite() != null) { - List partitionCols = tab.getPartCols(); - for (FieldSchema fs : partitionCols) { - cols.add(fs.getName()); - } - tsDesc.setPartColumns(cols); - return; - } - TableSpec tblSpec = qbp.getTableSpec(alias); - Map partSpec = tblSpec.getPartSpec(); - if (partSpec != null) { - cols.addAll(partSpec.keySet()); - tsDesc.setPartColumns(cols); - } else { - throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg()); - } - List partitions = qbp.getTableSpec().partitions; - if (partitions != null) { - for (Partition partn : partitions) { - // inputs.add(new ReadEntity(partn)); // is this needed at all? - outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK)); - } + TableSpec tblSpec = qbp.getTableSpec(alias); + Map partSpec = tblSpec.getPartSpec(); + if (partSpec != null) { + cols.addAll(partSpec.keySet()); + tsDesc.setPartColumns(cols); + } else { + throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg()); + } + List partitions = qbp.getTableSpec().partitions; + if (partitions != null) { + for (Partition partn : partitions) { + // inputs.add(new ReadEntity(partn)); // is this needed at all? + WriteEntity pwe = new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK); + pwe.setTxnAnalyze(true); + outputs.add(pwe); } } } @@ -12745,7 +12760,6 @@ private ExprNodeDesc getExprNodeDescCached(ASTNode expr, RowResolver input) @Override public void validate() throws SemanticException { - LOG.debug("validation start"); boolean wasAcidChecked = false; // Validate inputs and outputs have right protectmode to execute the query for (ReadEntity readEntity : getInputs()) { @@ -14954,4 +14968,9 @@ protected void addPartitionColsToInsert(List partCols, StringBuilde rewrittenQueryStr.append(")"); } } + + @Override + public WriteEntity getAcidAnalyzeTable() { + return acidAnalyzeTable; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenamePartitionHandler.java ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenamePartitionHandler.java index 43f2cbcaa4e2a1fc7d4ea3f620b61e2552596598..003502645cdbbaecc28359c0f1f0ef157c0b4d3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenamePartitionHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/RenamePartitionHandler.java @@ -60,7 +60,7 @@ } RenamePartitionDesc renamePtnDesc = new RenamePartitionDesc( - tableName, oldPartSpec, newPartSpec, context.eventOnlyReplicationSpec()); + tableName, oldPartSpec, newPartSpec, context.eventOnlyReplicationSpec(), null); Task renamePtnTask = TaskFactory.get( new DDLWork(readEntitySet, writeEntitySet, renamePtnDesc), context.hiveConf); context.log.debug("Added rename ptn task : {}:{}->{}", diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AlterMaterializedViewDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AlterMaterializedViewDesc.java index 84933687e4491576752504aa06eaaaa16be34c0c..865d1431d16c21b7ce8b51c901966a06089c687d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterMaterializedViewDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterMaterializedViewDesc.java @@ -20,15 +20,16 @@ import java.io.Serializable; +import org.apache.hadoop.hive.ql.plan.DDLDesc.DDLDescWithWriteId; import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * AlterMaterializedViewDesc. */ @Explain(displayName = "Alter Materialized View", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) -public class AlterMaterializedViewDesc extends DDLDesc implements Serializable { +public class AlterMaterializedViewDesc extends DDLDesc implements Serializable, DDLDescWithWriteId { private static final long serialVersionUID = 1L; - private String materializedViewName; + private String fqMaterializedViewName; private boolean rewriteEnable; /** @@ -40,6 +41,7 @@ }; AlterMaterializedViewTypes op; + private long writeId; public AlterMaterializedViewDesc() { } @@ -53,15 +55,15 @@ public AlterMaterializedViewDesc(AlterMaterializedViewTypes type) { */ @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getMaterializedViewName() { - return materializedViewName; + return fqMaterializedViewName; } /** * @param materializedViewName * the materializedViewName to set */ - public void setMaterializedViewName(String materializedViewName) { - this.materializedViewName = materializedViewName; + public void setFqMaterializedViewName(String materializedViewName) { + this.fqMaterializedViewName = materializedViewName; } /** @@ -102,4 +104,19 @@ public void setOp(AlterMaterializedViewTypes op) { this.op = op; } + @Override + public void setWriteId(long writeId) { + this.writeId = writeId; + } + + @Override + public String getFullTableName() { + return fqMaterializedViewName; + } + + @Override + public boolean mayNeedWriteId() { + return true; // Verified when this is set as DDL Desc for ACID. + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableAlterPartDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableAlterPartDesc.java index 54687e08137b03f8e6b7b4936d3d22f76cf3045a..652c007643a30dd10a9d4f876d474af2dae0cf80 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableAlterPartDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableAlterPartDesc.java @@ -20,30 +20,31 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; -public class AlterTableAlterPartDesc extends DDLDesc { - private String tableName; +public class AlterTableAlterPartDesc extends DDLDesc implements DDLDesc.DDLDescWithWriteId { + private String fqTableName; private FieldSchema partKeySpec; + private long writeId; public AlterTableAlterPartDesc() { } /** - * @param tableName + * @param fqTableName * table containing the partition * @param partKeySpec */ - public AlterTableAlterPartDesc(String tableName, FieldSchema partKeySpec) { + public AlterTableAlterPartDesc(String fqTableName, FieldSchema partKeySpec) { super(); - this.tableName = tableName; + this.fqTableName = fqTableName; this.partKeySpec = partKeySpec; } public String getTableName() { - return tableName; + return fqTableName; } public void setTableName(String tableName) { - this.tableName = tableName; + this.fqTableName = tableName; } public FieldSchema getPartKeySpec() { @@ -53,4 +54,19 @@ public FieldSchema getPartKeySpec() { public void setPartKeySpec(FieldSchema partKeySpec) { this.partKeySpec = partKeySpec; } + + @Override + public void setWriteId(long writeId) { + this.writeId = writeId; + } + + @Override + public String getFullTableName() { + return fqTableName; + } + + @Override + public boolean mayNeedWriteId() { + return true; // Checked before setting as the acid desc. + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java index ec04a017a78ce55865e56a50b5469b76ab001b2a..680e0297edc186e40b495aa106a5cccf0b7932de 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java @@ -66,14 +66,14 @@ ALTERSKEWEDLOCATION("alter skew location"), ALTERBUCKETNUM("alter bucket number"), ALTERPARTITION("alter partition"), COMPACT("compact"), TRUNCATE("truncate"), MERGEFILES("merge files"), DROPCONSTRAINT("drop constraint"), ADDCONSTRAINT("add constraint"), - UPDATECOLUMNS("update columns"), OWNER("set owner"); + UPDATECOLUMNS("update columns"), OWNER("set owner"), UPDATESTATS("update stats"); ; private final String name; private AlterTableTypes(String name) { this.name = name; } public String getName() { return name; } - public static final List nonNativeTableAllowedTypes = + public static final List nonNativeTableAllowedTypes = ImmutableList.of(ADDPROPS, DROPPROPS, ADDCOLS); } @@ -139,6 +139,7 @@ ReplicationSpec replicationSpec; private Long writeId = null; PrincipalDesc ownerPrincipal; + private boolean isExplicitStatsUpdate, isFullAcidConversion; public AlterTableDesc() { } @@ -960,8 +961,21 @@ public String getFullTableName() { @Override public boolean mayNeedWriteId() { - return getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS - && AcidUtils.isToInsertOnlyTable(null, getProps()); + switch (getOp()) { + case ADDPROPS: { + return isExplicitStatsUpdate || AcidUtils.isToInsertOnlyTable(null, getProps()) + || (AcidUtils.isTransactionalTable(getProps()) && !isFullAcidConversion); + } + case DROPPROPS: return isExplicitStatsUpdate; + // The check for the following ones is performed before setting AlterTableDesc into the acid field. + // These need write ID and stuff because they invalidate column stats. + case RENAMECOLUMN: return true; + case RENAME: return true; + case REPLACECOLS: return true; + case ADDCOLS: return true; + // RENAMEPARTITION is handled in RenamePartitionDesc + default: return false; + } } public Long getWriteId() { @@ -972,4 +986,12 @@ public Long getWriteId() { public String toString() { return this.getClass().getSimpleName() + " for " + getFullTableName(); } + + public void setIsExplicitStatsUpdate(boolean b) { + this.isExplicitStatsUpdate = b; + } + + public void setIsFullAcidConversion(boolean b) { + this.isFullAcidConversion = b; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java index cbccd87d269f8fb0fd31b6bc9983a0f1d7c2528b..6de1a37ac53744a0c84d7f8fe198cd427a99a0be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsUpdateWork.java @@ -20,6 +20,8 @@ import java.io.Serializable; import java.util.Map; + +import org.apache.hadoop.hive.ql.plan.DDLDesc.DDLDescWithWriteId; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -32,7 +34,7 @@ * ('maxColLen'='4444','avgColLen'='44.4'); */ @Explain(displayName = "Column Stats Update Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) -public class ColumnStatsUpdateWork implements Serializable { +public class ColumnStatsUpdateWork implements Serializable, DDLDescWithWriteId { private static final long serialVersionUID = 1L; private final String partName; private final Map mapProp; @@ -40,12 +42,13 @@ private final String tableName; private final String colName; private final String colType; + private long writeId; public ColumnStatsUpdateWork(String partName, Map mapProp, String dbName, String tableName, - String colName, + String colName, String colType) { this.partName = partName; this.mapProp = mapProp; @@ -83,4 +86,19 @@ public String getColName() { public String getColType() { return colType; } + + @Override + public void setWriteId(long writeId) { + this.writeId = writeId; + } + + @Override + public String getFullTableName() { + return dbName + "." + tableName; + } + + @Override + public boolean mayNeedWriteId() { + return true; // Checked at setup time; if this is called, the table is transactional. + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/RenamePartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/RenamePartitionDesc.java index a13ac13116ee4fe26bc039400ba710f2bb929c7f..a4a31a5d5f2c07c98b888b214932f95bc414a4b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/RenamePartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/RenamePartitionDesc.java @@ -17,7 +17,9 @@ */ package org.apache.hadoop.hive.ql.plan; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; +import org.apache.hadoop.hive.ql.plan.DDLDesc.DDLDescWithWriteId; import java.io.Serializable; import java.util.LinkedHashMap; @@ -26,7 +28,7 @@ /** * Contains the information needed to rename a partition. */ -public class RenamePartitionDesc extends DDLDesc implements Serializable { +public class RenamePartitionDesc extends DDLDesc implements Serializable, DDLDescWithWriteId { private static final long serialVersionUID = 1L; @@ -35,6 +37,8 @@ private LinkedHashMap oldPartSpec; private LinkedHashMap newPartSpec; private ReplicationSpec replicationSpec; + private String fqTableName; + private long writeId; /** * For serialization only. @@ -49,13 +53,15 @@ public RenamePartitionDesc() { * old partition specification. * @param newPartSpec * new partition specification. + * @param table */ - public RenamePartitionDesc(String tableName, - Map oldPartSpec, Map newPartSpec, ReplicationSpec replicationSpec) { + public RenamePartitionDesc(String tableName, Map oldPartSpec, + Map newPartSpec, ReplicationSpec replicationSpec, Table table) { this.tableName = tableName; this.oldPartSpec = new LinkedHashMap(oldPartSpec); this.newPartSpec = new LinkedHashMap(newPartSpec); this.replicationSpec = replicationSpec; + this.fqTableName = table != null ? (table.getDbName() + "." + table.getTableName()) : tableName; } /** @@ -66,14 +72,6 @@ public String getTableName() { } /** - * @param tableName - * the table we're going to add the partitions to. - */ - public void setTableName(String tableName) { - this.tableName = tableName; - } - - /** * @return location of partition in relation to table */ public String getLocation() { @@ -123,4 +121,19 @@ public void setNewPartSpec(LinkedHashMap partSpec) { * This can result in a "RENAME IF NEWER THAN" kind of semantic */ public ReplicationSpec getReplicationSpec() { return this.replicationSpec; } + + @Override + public void setWriteId(long writeId) { + this.writeId = writeId; + } + + @Override + public String getFullTableName() { + return fqTableName; + } + + @Override + public boolean mayNeedWriteId() { + return true; // The check is done when setting this as the ACID DDLDesc. + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java index 8c3d852d0c502b7732d900de534b5ccdaa7e2547..9e83576e6bf5a761f6728dfb2c05fddd67b69501 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java @@ -27,6 +27,8 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** @@ -34,6 +36,7 @@ */ @Explain(displayName = "Truncate Table or Partition", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class TruncateTableDesc extends DDLDesc implements DDLDesc.DDLDescWithWriteId { + private final static Logger LOG = LoggerFactory.getLogger(TruncateTableDesc.class); private static final long serialVersionUID = 1L; @@ -51,9 +54,11 @@ public TruncateTableDesc() { } + public TruncateTableDesc(String tableName, Map partSpec, ReplicationSpec replicationSpec) { this(tableName, partSpec, replicationSpec, null); } + public TruncateTableDesc(String tableName, Map partSpec, ReplicationSpec replicationSpec, Table table) { this.tableName = tableName; @@ -124,10 +129,13 @@ public void setLbCtx(ListBucketingCtx lbCtx) { public void setWriteId(long writeId) { this.writeId = writeId; } + @Override public String getFullTableName() { return fullTableName; } + + @Override public boolean mayNeedWriteId() { return isTransactional; @@ -137,5 +145,4 @@ public boolean mayNeedWriteId() { public String toString() { return this.getClass().getSimpleName() + " for " + getFullTableName(); } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUpdaterThread.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUpdaterThread.java index bb181a192ac39c590c79b3c38aa1515cf16593ef..838d27757ca1fc2610ae76857de1bcb34d02f95e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUpdaterThread.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUpdaterThread.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.common.ValidReaderWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreThread; +import org.apache.hadoop.hive.metastore.ObjectStore; import org.apache.hadoop.hive.metastore.RawStore; import org.apache.hadoop.hive.metastore.RawStoreProxy; import org.apache.hadoop.hive.metastore.Warehouse; @@ -214,11 +215,17 @@ private void stopWorkers() { String skipParam = table.getParameters().get(SKIP_STATS_AUTOUPDATE_PROPERTY); if ("true".equalsIgnoreCase(skipParam)) return null; - // TODO: when txn stats are implemented, use writeIds to determine stats accuracy - @SuppressWarnings("unused") - ValidReaderWriteIdList writeIds = null; - if (AcidUtils.isTransactionalTable(table)) { - writeIds = getWriteIds(fullTableName); + // Note: ideally we should take a lock here to pretend to be a real reader. + // For now, this check is going to have race potential; it may run a spurious analyze. + String writeIdString = null; + boolean isTxn = AcidUtils.isTransactionalTable(table); + if (isTxn) { + ValidReaderWriteIdList writeIds = getWriteIds(fullTableName); + if (writeIds == null) { + LOG.error("Cannot get writeIds for transactional table " + fullTableName + "; skipping"); + return null; + } + writeIdString = writeIds.writeToString(); } List allCols = new ArrayList<>(table.getSd().getColsSize()); for (FieldSchema fs : table.getSd().getCols()) { @@ -227,9 +234,16 @@ private void stopWorkers() { Collections.sort(allCols); if (table.getPartitionKeysSize() == 0) { Map params = table.getParameters(); - List colsToUpdate = isExistingOnly - ? getExistingNonPartTableStatsToUpdate(fullTableName, cat, db, tbl, params, allCols) - : getAnyStatsToUpdate(allCols, params); + List colsToUpdate = null; + long writeId = isTxn ? table.getWriteId() : -1; + if (isExistingOnly) { + // Get the existing stats, including the txn state if any, to see if we need to update. + colsToUpdate = getExistingNonPartTableStatsToUpdate( + fullTableName, cat, db, tbl, params, writeId, allCols, writeIdString); + } else { + colsToUpdate = getAnyStatsToUpdate(db, tbl, allCols, params, writeId, writeIdString); + } + LOG.debug("Columns to update are {}; existing only: {}, out of: {} based on {}", colsToUpdate, isExistingOnly, allCols, params); @@ -241,7 +255,7 @@ private void stopWorkers() { } else { Map> partsToAnalyze = new HashMap<>(); List colsForAllParts = findPartitionsToAnalyze( - fullTableName, cat, db, tbl, allCols, partsToAnalyze); + fullTableName, cat, db, tbl, allCols, partsToAnalyze, writeIdString); LOG.debug("Columns to update are {} for all partitions; {} individual partitions." + " Existing only: {}, out of: {}", colsForAllParts, partsToAnalyze.size(), isExistingOnly, allCols); @@ -263,18 +277,30 @@ private void stopWorkers() { } private List findPartitionsToAnalyze(TableName fullTableName, String cat, String db, - String tbl, List allCols, Map> partsToAnalyze) - throws MetaException, NoSuchObjectException { + String tbl, List allCols, Map> partsToAnalyze, + String writeIdString) throws MetaException, NoSuchObjectException { // TODO: ideally when col-stats-accurate stuff is stored in some sane structure, this should - // to retrieve partsToUpdate in a single query; no checking partition params in java. + // retrieve partsToUpdate in a single query; no checking partition params in java. List partNames = null; Map> colsPerPartition = null; boolean isAllParts = true; if (isExistingOnly) { - colsPerPartition = rs.getPartitionColsWithStats(cat, db, tbl); - partNames = Lists.newArrayList(colsPerPartition.keySet()); - int partitionCount = rs.getNumPartitionsByFilter(cat, db, tbl, ""); - isAllParts = partitionCount == partNames.size(); + // Make sure the number of partitions we get, and the number of stats objects, is consistent. + rs.openTransaction(); + boolean isOk = false; + try { + colsPerPartition = rs.getPartitionColsWithStats(cat, db, tbl); + partNames = Lists.newArrayList(colsPerPartition.keySet()); + int partitionCount = rs.getNumPartitionsByFilter(cat, db, tbl, ""); + isAllParts = partitionCount == partNames.size(); + isOk = true; + } finally { + if (isOk) { + rs.commitTransaction(); + } else { + rs.rollbackTransaction(); + } + } } else { partNames = rs.listPartitionNames(cat, db, tbl, (short) -1); isAllParts = true; @@ -326,9 +352,10 @@ private void stopWorkers() { colsToMaybeUpdate = colsPerPartition.get(partName); Collections.sort(colsToMaybeUpdate); } - List colsToUpdate = getAnyStatsToUpdate(colsToMaybeUpdate, params); - LOG.debug("Updating {} based on {} and {}", colsToUpdate, colsToMaybeUpdate, params); + List colsToUpdate = getAnyStatsToUpdate(db, tbl, colsToMaybeUpdate, params, + writeIdString == null ? -1 : part.getWriteId(), writeIdString); + LOG.debug("Updating {} based on {} and {}", colsToUpdate, colsToMaybeUpdate, params); if (colsToUpdate == null || colsToUpdate.isEmpty()) { if (isAllParts) { @@ -405,8 +432,8 @@ private String buildPartColStr(Table table) { } private List getExistingNonPartTableStatsToUpdate(TableName fullTableName, - String cat, String db, String tbl, Map params, - List allCols) throws MetaException { + String cat, String db, String tbl, Map params, long statsWriteId, + List allCols, String writeIdString) throws MetaException { ColumnStatistics existingStats = null; try { // Note: this should NOT do txn verification - we want to get outdated stats, to @@ -416,12 +443,15 @@ private String buildPartColStr(Table table) { LOG.error("Cannot retrieve existing stats, skipping " + fullTableName, e); return null; } - return getExistingStatsToUpdate(existingStats, params); + // TODO: we should probably skip updating if writeId is from an active txn + boolean isTxnValid = (writeIdString == null) || ObjectStore.isCurrentStatsValidForTheQuery( + conf, db, tbl, params, statsWriteId , 0, writeIdString, false); + return getExistingStatsToUpdate(existingStats, params, isTxnValid); } private List getExistingStatsToUpdate( - ColumnStatistics existingStats, Map params) { - boolean hasAnyAccurate = StatsSetupConst.areBasicStatsUptoDate(params); + ColumnStatistics existingStats, Map params, boolean isTxnValid) { + boolean hasAnyAccurate = isTxnValid && StatsSetupConst.areBasicStatsUptoDate(params); List colsToUpdate = new ArrayList<>(); for (ColumnStatisticsObj obj : existingStats.getStatsObj()) { String col = obj.getColName(); @@ -432,12 +462,17 @@ private String buildPartColStr(Table table) { return colsToUpdate; } - private List getAnyStatsToUpdate( - List allCols, Map params) { + private List getAnyStatsToUpdate(String db, String tbl, List allCols, + Map params, long statsWriteId, String writeIdString) throws MetaException { // Note: we only run "for columns" command and assume no basic stats means no col stats. if (!StatsSetupConst.areBasicStatsUptoDate(params)) { return allCols; } + // TODO: we should probably skip updating if writeId is from an active txn + if (writeIdString != null && !ObjectStore.isCurrentStatsValidForTheQuery( + conf, db, tbl, params, statsWriteId, 0, writeIdString, false)) { + return allCols; + } List colsToUpdate = new ArrayList<>(); for (String col : allCols) { if (!StatsSetupConst.areColumnStatsUptoDate(params, col)) { @@ -460,8 +495,9 @@ private String buildPartColStr(Table table) { private ValidReaderWriteIdList getWriteIds( TableName fullTableName) throws NoSuchTxnException, MetaException { - GetValidWriteIdsRequest req = new GetValidWriteIdsRequest(); - req.setFullTableNames(Lists.newArrayList(fullTableName.toString())); + // TODO: acid utils don't support catalogs + GetValidWriteIdsRequest req = new GetValidWriteIdsRequest( + Lists.newArrayList(fullTableName.getDbTable()), null); return TxnUtils.createValidReaderWriteIdList( txnHandler.getValidWriteIds(req).getTblValidWriteIds().get(0)); } diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java index 0e436e1c62b8054cae72347dc3aa31adc0d6054a..5b8ff153aea1473360373c4b1488b8abe9ca469c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java @@ -219,7 +219,7 @@ public void testRenameTable() throws Exception { "select count(*) from COMPACTION_QUEUE where CQ_TABLE='s'")); Assert.assertEquals(1, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from WRITE_SET where WS_TABLE='s'")); - Assert.assertEquals(2, TxnDbUtil.countQueryAgent(hiveConf, + Assert.assertEquals(3, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXN_TO_WRITE_ID where T2W_TABLE='s'")); Assert.assertEquals(1, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from NEXT_WRITE_ID where NWI_TABLE='s'")); @@ -234,7 +234,7 @@ public void testRenameTable() throws Exception { "select count(*) from COMPACTION_QUEUE where CQ_TABLE='bar'")); Assert.assertEquals(1, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from WRITE_SET where WS_TABLE='bar'")); - Assert.assertEquals(2, TxnDbUtil.countQueryAgent(hiveConf, + Assert.assertEquals(4, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXN_TO_WRITE_ID where T2W_TABLE='bar'")); Assert.assertEquals(1, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from NEXT_WRITE_ID where NWI_TABLE='bar'")); diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index bbe9d5a58773a0aeca7d090450805102a2df4c95..2c98e3c9063f47db30cf31d22cc548e77fbe40ce 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -448,7 +448,7 @@ logical bucket (tranche) {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"}, {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"}, // update for "{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t60\t80" - {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t60\t88", "warehouse/t/delta_10000001_10000001_0000/bucket_00001"}, + {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t60\t88", "warehouse/t/delta_10000001_10000001_0000/bucket_00001"}, }; rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b, INPUT__FILE__NAME"); checkExpected(rs, expected3,"after converting to acid (no compaction with updates)"); @@ -783,12 +783,12 @@ public void testCompactStatsGather() throws Exception { String[][] expected = { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000003_0000003_0000/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000003_0000003_0000/bucket_00000"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/delta_0000001_0000001_0000/bucket_00000"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/delta_0000001_0000001_0000/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/delta_0000002_0000002_0000/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/delta_0000002_0000002_0000/bucket_00000"} + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/delta_0000003_0000003_0000/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/delta_0000003_0000003_0000/bucket_00000"} }; checkExpected(rs, expected, "insert data"); @@ -801,12 +801,12 @@ public void testCompactStatsGather() throws Exception { String[][] expected2 = { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/base_0000002/bucket_00000"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/base_0000002/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/base_0000002/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/base_0000002/bucket_00000"} + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000003_0000003_0000/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000003_0000003_0000/bucket_00000"}, + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/base_0000003/bucket_00000"}, + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/base_0000003/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/base_0000003/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/base_0000003/bucket_00000"} }; checkExpected(rs, expected2, "after major compaction"); diff --git ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index 7319ba0e4b09a89b3d9d532ca9eff8b759dd52e6..a2fafca9a1ca069175c046563fa262c41485dc1f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -225,7 +225,7 @@ void checkExpected(List rs, String[][] expected, String msg, Logger LOG, expected.length, rs.size()); //verify data and layout for(int i = 0; i < expected.length; i++) { - Assert.assertTrue("Actual line (data) " + i + " data: " + rs.get(i), rs.get(i).startsWith(expected[i][0])); + Assert.assertTrue("Actual line (data) " + i + " data: " + rs.get(i) + "; expected " + expected[i][0], rs.get(i).startsWith(expected[i][0])); if(checkFileName) { Assert.assertTrue("Actual line(file) " + i + " file: " + rs.get(i), rs.get(i).endsWith(expected[i][1])); } diff --git ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java index d30bbde07172b20c86f075a962ed6e4764a9e08e..ca4d36f30d85ca2d1efe215e597fb7d43673fa43 100755 --- ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java +++ ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java @@ -325,8 +325,10 @@ private void validateTable(Table tbl, String tableName) throws MetaException { tbl.getParameters().put(hive_metastoreConstants.DDL_TIME, ft.getParameters().get(hive_metastoreConstants.DDL_TIME)); // Txn stuff set by metastore - if (tbl.getTTable().isSetWriteId()) { - ft.getTTable().setWriteId(tbl.getTTable().getWriteId()); + if (tbl.getTTable().isSetWriteId() != ft.getTTable().isSetWriteId()) { + // No need to compare this field. + ft.getTTable().setWriteId(0); + tbl.getTTable().setWriteId(0); } assertTrue("Tables doesn't match: " + tableName + " (" + ft.getTTable() + "; " + tbl.getTTable() + ")", ft.getTTable().equals(tbl.getTTable())); diff --git ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUpdaterThread.java ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUpdaterThread.java index 14f86eabbcf4bfc38c92294cd5d71d4905eb5c30..6c768c0aed697beceaef9363859040a19c888acf 100644 --- ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUpdaterThread.java +++ ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUpdaterThread.java @@ -29,9 +29,11 @@ import org.apache.curator.shaded.com.google.common.collect.Lists; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Partition; @@ -140,6 +142,148 @@ public void testMultipleTables() throws Exception { msClient.close(); } + @Test(timeout=80000) + public void testTxnTable() throws Exception { + StatsUpdaterThread su = createUpdater(); + IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); + + executeQuery("create table simple_stats (s string) TBLPROPERTIES " + + "(\"transactional\"=\"true\", \"transactional_properties\"=\"insert_only\")"); + executeQuery("insert into simple_stats (s) values ('test')"); + List cols = Lists.newArrayList("s"); + String dbName = ss.getCurrentDatabase(), tblName = "simple_stats", fqName = dbName + "." + tblName; + ValidWriteIdList initialWriteIds = msClient.getValidWriteIds(fqName); + verifyStatsUpToDate(tblName, cols, msClient, 0, initialWriteIds.toString(), true); + assertFalse(su.runOneIteration()); + drainWorkQueue(su, 0); + + executeQuery("insert overwrite table simple_stats values ('test2')"); + ValidWriteIdList nextWriteIds = msClient.getValidWriteIds(fqName); + verifyStatsUpToDate(tblName, cols, msClient, 0, nextWriteIds.toString(), true); + assertFalse(su.runOneIteration()); + drainWorkQueue(su, 0); + String currentWriteIds = msClient.getValidWriteIds(fqName).toString(); + + // Overwrite the txn state to refer to an open txn. + long badTxnId = msClient.openTxn("moo"); + long badWriteId = msClient.allocateTableWriteId(badTxnId, dbName, tblName); + + Table tbl = msClient.getTable(dbName, tblName); + tbl.setWriteId(badWriteId); + msClient.alter_table( + null, dbName, tblName, tbl, new EnvironmentContext(), -1, initialWriteIds.toString()); + + // Stats should not be valid. + verifyStatsUpToDate(tblName, cols, msClient, 0, currentWriteIds, false); + + // Analyze should not be able to set valid stats for a running txn. + assertTrue(su.runOneIteration()); + drainWorkQueue(su); + + currentWriteIds = msClient.getValidWriteIds(fqName).toString(); + verifyStatsUpToDate(tblName, cols, msClient, 0, currentWriteIds, false); + + msClient.abortTxns(Lists.newArrayList(badTxnId)); + + // Analyze should be able to override stats of an aborted txn. + assertTrue(su.runOneIteration()); + drainWorkQueue(su); + + // Stats will now be valid. + currentWriteIds = msClient.getValidWriteIds(fqName).toString(); + verifyStatsUpToDate(tblName, cols, msClient, 0, currentWriteIds, true); + + // Verify that incorrect stats from a valid write ID are also handled. + badTxnId = msClient.openTxn("moo"); + badWriteId = msClient.allocateTableWriteId(badTxnId, dbName, tblName); + tbl = msClient.getTable(dbName, tblName); + tbl.setWriteId(badWriteId); + StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); + msClient.alter_table(null, dbName, tblName, tbl, new EnvironmentContext(), -1, initialWriteIds.toString()); + + // Stats should not be valid. + verifyStatsUpToDate(tblName, cols, msClient, 0, currentWriteIds, false); + + // Analyze should not be able to set valid stats for a running txn. + assertTrue(su.runOneIteration()); + drainWorkQueue(su); + + currentWriteIds = msClient.getValidWriteIds(fqName).toString(); + verifyStatsUpToDate(tblName, cols, msClient, 0, currentWriteIds, false); + + msClient.commitTxn(badTxnId); + + // Analyze should be able to override stats of an committed txn. + assertTrue(su.runOneIteration()); + drainWorkQueue(su); + + // Stats will now be valid. + currentWriteIds = msClient.getValidWriteIds(fqName).toString(); + verifyStatsUpToDate(tblName, cols, msClient, 0, currentWriteIds, true); + + msClient.close(); + } + + + @Test + public void testTxnPartitions() throws Exception { + StatsUpdaterThread su = createUpdater(); + IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); + + executeQuery("create table simple_stats (s string) partitioned by (p int) TBLPROPERTIES " + + "(\"transactional\"=\"true\", \"transactional_properties\"=\"insert_only\")"); + executeQuery("insert into simple_stats partition(p=1) values ('test')"); + executeQuery("insert into simple_stats partition(p=2) values ('test2')"); + executeQuery("insert into simple_stats partition(p=3) values ('test3')"); + assertFalse(su.runOneIteration()); + drainWorkQueue(su, 0); + + executeQuery("insert overwrite table simple_stats partition(p=1) values ('test2')"); + executeQuery("insert overwrite table simple_stats partition(p=2) values ('test3')"); + assertFalse(su.runOneIteration()); + drainWorkQueue(su, 0); + + // Overwrite the txn state to refer to an aborted txn on some partitions. + String dbName = ss.getCurrentDatabase(), tblName = "simple_stats", fqName = dbName + "." + tblName; + long badTxnId = msClient.openTxn("moo"); + long badWriteId = msClient.allocateTableWriteId(badTxnId, dbName, tblName); + msClient.abortTxns(Lists.newArrayList(badTxnId)); + + Partition part1 = msClient.getPartition(dbName, tblName, "p=1"); + Partition part2 = msClient.getPartition(dbName, tblName, "p=2"); + part1.setWriteId(badWriteId); + part2.setWriteId(badWriteId); + String currentWriteIds = msClient.getValidWriteIds(fqName).toString(); + // To update write ID we need to specify the write ID list to validate concurrent writes. + msClient.alter_partitions(dbName, tblName, + Lists.newArrayList(part1), null, -1, currentWriteIds, badWriteId); + msClient.alter_partitions(dbName, tblName, + Lists.newArrayList(part2), null, -1, currentWriteIds, badWriteId); + + // We expect two partitions to be updated. + Map> stats = msClient.getPartitionColumnStatistics( + dbName, tblName, Lists.newArrayList("p=1", "p=2", "p=3"), + Lists.newArrayList("s"), 0, currentWriteIds); + assertEquals(1, stats.size()); + + assertTrue(su.runOneIteration()); + drainWorkQueue(su, 2); + // Analyze treats stats like data (new write ID), so stats still should not be valid. + stats = msClient.getPartitionColumnStatistics( + dbName, tblName, Lists.newArrayList("p=1", "p=2", "p=3"), + Lists.newArrayList("s"), 0, currentWriteIds); + assertEquals(1, stats.size()); + + // New reader. + currentWriteIds = msClient.getValidWriteIds(fqName).toString(); + stats = msClient.getPartitionColumnStatistics( + dbName, tblName, Lists.newArrayList("p=1", "p=2", "p=3"), + Lists.newArrayList("s"), 0, currentWriteIds); + assertEquals(3, stats.size()); + + msClient.close(); + } + @Test(timeout=40000) public void testExistingOnly() throws Exception { hiveConf.set(MetastoreConf.ConfVars.STATS_AUTO_UPDATE.getVarname(), "existing"); @@ -437,13 +581,19 @@ private void verifyAndUnsetColStats(String tblName, String partName, List cols, IMetaStoreClient msClient, + private void verifyStatsUpToDate(String tbl, List cols, IMetaStoreClient msClient, boolean isUpToDate) throws Exception { Table table = msClient.getTable(ss.getCurrentDatabase(), tbl); verifyStatsUpToDate(table.getParameters(), cols, isUpToDate); } - private void verifyStatsUpToDate(Map params, ArrayList cols, + private void verifyStatsUpToDate(String tbl, List cols, IMetaStoreClient msClient, + long txnId, String validWriteIds, boolean isUpToDate) throws Exception { + Table table = msClient.getTable(ss.getCurrentDatabase(), tbl, txnId, validWriteIds); + verifyStatsUpToDate(table.getParameters(), cols, isUpToDate); + } + + private void verifyStatsUpToDate(Map params, List cols, boolean isUpToDate) { if (isUpToDate) { assertTrue(StatsSetupConst.areBasicStatsUptoDate(params)); diff --git ql/src/test/queries/clientpositive/acid_stats.q ql/src/test/queries/clientpositive/acid_stats.q index 1e1c9b005af18edb6309b0abc917b876c4457dde..15eb930db283aa2503096b7bc3e495ebe546be8f 100644 --- ql/src/test/queries/clientpositive/acid_stats.q +++ ql/src/test/queries/clientpositive/acid_stats.q @@ -35,8 +35,9 @@ drop table stats_part; create table stats2(key int,value string) tblproperties ("transactional"="true", "transactional_properties"="insert_only"); insert into table stats2 values (1, "foo"); explain select count(*) from stats2; -insert into table stats2 values (1, "bar"); +insert into table stats2 values (2, "bar"); explain select count(*) from stats2; +desc formatted stats2 key; set hive.stats.autogather=false; set hive.stats.column.autogather=false; diff --git ql/src/test/queries/clientpositive/acid_stats3.q ql/src/test/queries/clientpositive/acid_stats3.q new file mode 100644 index 0000000000000000000000000000000000000000..7a7bf19f4520c181eeced5bb0f8f76f60844f213 --- /dev/null +++ ql/src/test/queries/clientpositive/acid_stats3.q @@ -0,0 +1,56 @@ +set hive.stats.dbclass=fs; +set hive.stats.fetch.column.stats=true; +set datanucleus.cache.collections=false; + +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +set hive.stats.autogather=true; +set hive.stats.column.autogather=true; +set hive.compute.query.using.stats=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; + +set hive.fetch.task.conversion=none; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.query.results.cache.enabled=false; + +-- test truncate + +create table stats_nonpart(key int,value string) tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table stats_nonpart values (1, "foo"); +explain select count(key) from stats_nonpart; + +truncate table stats_nonpart; +explain select count(key) from stats_nonpart; + +analyze table stats_nonpart compute statistics for columns; +explain select count(key) from stats_nonpart; +drop table stats_nonpart; + + + +create table stats_part(key int,value string) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table stats_part partition(p=101) values (1, "foo"); +insert into table stats_part partition(p=102) values (2, "bar"); +insert into table stats_part partition(p=103) values (3, "baz"); +explain select count(key) from stats_part where p = 101; + +truncate table stats_part partition(p=101); +explain select count(key) from stats_part where p = 102; +explain select count(key) from stats_part; + +alter table stats_part drop partition (p=101); +explain select count(key) from stats_part; + +truncate table stats_part partition(p=102); +analyze table stats_part partition(p) compute statistics for columns; + +-- Note: this currently doesn't work from stats - for ACID tables or otherwise. + +explain select count(key) from stats_part; +drop table stats_part; + + + diff --git ql/src/test/queries/clientpositive/acid_stats4.q ql/src/test/queries/clientpositive/acid_stats4.q new file mode 100644 index 0000000000000000000000000000000000000000..20d115954192c6b2a85f9518f96eb30be0f72f8c --- /dev/null +++ ql/src/test/queries/clientpositive/acid_stats4.q @@ -0,0 +1,70 @@ +set hive.stats.dbclass=fs; +set hive.stats.fetch.column.stats=true; +set datanucleus.cache.collections=false; + +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +set hive.stats.autogather=true; +set hive.stats.column.autogather=true; +set hive.compute.query.using.stats=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; + +set hive.fetch.task.conversion=none; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.query.results.cache.enabled=false; + +-- test various alter commands + +create table stats_nonpart(key int,value string) tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table stats_nonpart values (1, "foo"); +explain select count(key) from stats_nonpart; + +ALTER TABLE stats_nonpart CHANGE COLUMN key key2 int; +explain select count(key2) from stats_nonpart; +explain select count(value) from stats_nonpart; + +analyze table stats_nonpart compute statistics for columns; +explain select count(key2) from stats_nonpart; + +alter table stats_nonpart rename to stats_nonpart2; +explain select count(key2) from stats_nonpart2; + +analyze table stats_nonpart2 compute statistics for columns; +explain select count(key2) from stats_nonpart2; + +drop table stats_nonpart; + + +create table stats_part(key int,value string) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table stats_part partition(p=101) values (1, "foo"); +insert into table stats_part partition(p=102) values (2, "bar"); +insert into table stats_part partition(p=103) values (3, "baz"); + +alter table stats_part partition column (p decimal(10,0)); +explain select count(key) from stats_part; + +analyze table stats_part partition(p) compute statistics for columns; +explain select count(key) from stats_part; + +alter table stats_part partition(p=102) rename to partition (p=104); +explain select count(key) from stats_part where p = 101; +explain select count(key) from stats_part; + +analyze table stats_part partition(p) compute statistics for columns; +explain select count(key) from stats_part; + +ALTER TABLE stats_part CHANGE COLUMN key key2 int; +explain select count(key2) from stats_part; +explain select count(value) from stats_part; + +analyze table stats_part partition(p) compute statistics for columns; +explain select count(key2) from stats_part; + + +drop table stats_part; + + + diff --git ql/src/test/results/clientpositive/acid_stats.q.out ql/src/test/results/clientpositive/acid_stats.q.out index fd4ebe75d40b849184aa00cb7536fef684cbad0c..8dcfdfbd62853d92e6cf5e642a3d285ce2d8b6c1 100644 --- ql/src/test/results/clientpositive/acid_stats.q.out +++ ql/src/test/results/clientpositive/acid_stats.q.out @@ -116,11 +116,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: insert into table stats2 values (1, "bar") +PREHOOK: query: insert into table stats2 values (2, "bar") PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@stats2 -POSTHOOK: query: insert into table stats2 values (1, "bar") +POSTHOOK: query: insert into table stats2 values (2, "bar") POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@stats2 @@ -140,6 +140,25 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: desc formatted stats2 key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats2 +POSTHOOK: query: desc formatted stats2 key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats2 +col_name key +data_type int +min 1 +max 2 +num_nulls 0 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: insert into table stats2 values (1, "baz") PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table diff --git ql/src/test/results/clientpositive/acid_stats3.q.out ql/src/test/results/clientpositive/acid_stats3.q.out new file mode 100644 index 0000000000000000000000000000000000000000..ef4d1c8730fcaccb2006facabcabfe63e3b1dd72 --- /dev/null +++ ql/src/test/results/clientpositive/acid_stats3.q.out @@ -0,0 +1,343 @@ +PREHOOK: query: create table stats_nonpart(key int,value string) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_nonpart +POSTHOOK: query: create table stats_nonpart(key int,value string) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_nonpart +PREHOOK: query: insert into table stats_nonpart values (1, "foo") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@stats_nonpart +POSTHOOK: query: insert into table stats_nonpart values (1, "foo") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@stats_nonpart +POSTHOOK: Lineage: stats_nonpart.key SCRIPT [] +POSTHOOK: Lineage: stats_nonpart.value SCRIPT [] +PREHOOK: query: explain select count(key) from stats_nonpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_nonpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: truncate table stats_nonpart +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@stats_nonpart +POSTHOOK: query: truncate table stats_nonpart +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@stats_nonpart +PREHOOK: query: explain select count(key) from stats_nonpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_nonpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: stats_nonpart + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table stats_nonpart compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@stats_nonpart +PREHOOK: Output: default@stats_nonpart +#### A masked pattern was here #### +POSTHOOK: query: analyze table stats_nonpart compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@stats_nonpart +POSTHOOK: Output: default@stats_nonpart +#### A masked pattern was here #### +PREHOOK: query: explain select count(key) from stats_nonpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_nonpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: drop table stats_nonpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@stats_nonpart +PREHOOK: Output: default@stats_nonpart +POSTHOOK: query: drop table stats_nonpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@stats_nonpart +POSTHOOK: Output: default@stats_nonpart +PREHOOK: query: create table stats_part(key int,value string) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_part +POSTHOOK: query: create table stats_part(key int,value string) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_part +PREHOOK: query: insert into table stats_part partition(p=101) values (1, "foo") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@stats_part@p=101 +POSTHOOK: query: insert into table stats_part partition(p=101) values (1, "foo") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@stats_part@p=101 +POSTHOOK: Lineage: stats_part PARTITION(p=101).key SCRIPT [] +POSTHOOK: Lineage: stats_part PARTITION(p=101).value SCRIPT [] +PREHOOK: query: insert into table stats_part partition(p=102) values (2, "bar") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@stats_part@p=102 +POSTHOOK: query: insert into table stats_part partition(p=102) values (2, "bar") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@stats_part@p=102 +POSTHOOK: Lineage: stats_part PARTITION(p=102).key SCRIPT [] +POSTHOOK: Lineage: stats_part PARTITION(p=102).value SCRIPT [] +PREHOOK: query: insert into table stats_part partition(p=103) values (3, "baz") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@stats_part@p=103 +POSTHOOK: query: insert into table stats_part partition(p=103) values (3, "baz") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@stats_part@p=103 +POSTHOOK: Lineage: stats_part PARTITION(p=103).key SCRIPT [] +POSTHOOK: Lineage: stats_part PARTITION(p=103).value SCRIPT [] +PREHOOK: query: explain select count(key) from stats_part where p = 101 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_part where p = 101 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: truncate table stats_part partition(p=101) +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@stats_part@p=101 +POSTHOOK: query: truncate table stats_part partition(p=101) +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@stats_part@p=101 +PREHOOK: query: explain select count(key) from stats_part where p = 102 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_part where p = 102 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(key) from stats_part +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 2 Data size: 8 Basic stats: PARTIAL Column stats: PARTIAL + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: alter table stats_part drop partition (p=101) +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@stats_part +PREHOOK: Output: default@stats_part@p=101 +POSTHOOK: query: alter table stats_part drop partition (p=101) +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@stats_part +POSTHOOK: Output: default@stats_part@p=101 +PREHOOK: query: explain select count(key) from stats_part +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: truncate table stats_part partition(p=102) +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@stats_part@p=102 +POSTHOOK: query: truncate table stats_part partition(p=102) +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@stats_part@p=102 +PREHOOK: query: analyze table stats_part partition(p) compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@stats_part +PREHOOK: Input: default@stats_part@p=102 +PREHOOK: Input: default@stats_part@p=103 +PREHOOK: Output: default@stats_part +PREHOOK: Output: default@stats_part@p=102 +PREHOOK: Output: default@stats_part@p=103 +#### A masked pattern was here #### +POSTHOOK: query: analyze table stats_part partition(p) compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@stats_part +POSTHOOK: Input: default@stats_part@p=102 +POSTHOOK: Input: default@stats_part@p=103 +POSTHOOK: Output: default@stats_part +POSTHOOK: Output: default@stats_part@p=102 +POSTHOOK: Output: default@stats_part@p=103 +#### A masked pattern was here #### +PREHOOK: query: explain select count(key) from stats_part +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: PARTIAL + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table stats_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@stats_part +PREHOOK: Output: default@stats_part +POSTHOOK: query: drop table stats_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@stats_part +POSTHOOK: Output: default@stats_part diff --git ql/src/test/results/clientpositive/acid_stats4.q.out ql/src/test/results/clientpositive/acid_stats4.q.out new file mode 100644 index 0000000000000000000000000000000000000000..bfb8898ea54c3bdf821195e9cd8247edbcdeccb4 --- /dev/null +++ ql/src/test/results/clientpositive/acid_stats4.q.out @@ -0,0 +1,496 @@ +PREHOOK: query: create table stats_nonpart(key int,value string) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_nonpart +POSTHOOK: query: create table stats_nonpart(key int,value string) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_nonpart +PREHOOK: query: insert into table stats_nonpart values (1, "foo") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@stats_nonpart +POSTHOOK: query: insert into table stats_nonpart values (1, "foo") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@stats_nonpart +POSTHOOK: Lineage: stats_nonpart.key SCRIPT [] +POSTHOOK: Lineage: stats_nonpart.value SCRIPT [] +PREHOOK: query: explain select count(key) from stats_nonpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_nonpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: ALTER TABLE stats_nonpart CHANGE COLUMN key key2 int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@stats_nonpart +PREHOOK: Output: default@stats_nonpart +POSTHOOK: query: ALTER TABLE stats_nonpart CHANGE COLUMN key key2 int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@stats_nonpart +POSTHOOK: Output: default@stats_nonpart +PREHOOK: query: explain select count(key2) from stats_nonpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key2) from stats_nonpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: stats_nonpart + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key2 (type: int) + outputColumnNames: key2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key2) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(value) from stats_nonpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(value) from stats_nonpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table stats_nonpart compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@stats_nonpart +PREHOOK: Output: default@stats_nonpart +#### A masked pattern was here #### +POSTHOOK: query: analyze table stats_nonpart compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@stats_nonpart +POSTHOOK: Output: default@stats_nonpart +#### A masked pattern was here #### +PREHOOK: query: explain select count(key2) from stats_nonpart +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key2) from stats_nonpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: alter table stats_nonpart rename to stats_nonpart2 +PREHOOK: type: ALTERTABLE_RENAME +PREHOOK: Input: default@stats_nonpart +PREHOOK: Output: default@stats_nonpart +POSTHOOK: query: alter table stats_nonpart rename to stats_nonpart2 +POSTHOOK: type: ALTERTABLE_RENAME +POSTHOOK: Input: default@stats_nonpart +POSTHOOK: Output: default@stats_nonpart +POSTHOOK: Output: default@stats_nonpart2 +PREHOOK: query: explain select count(key2) from stats_nonpart2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key2) from stats_nonpart2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table stats_nonpart2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@stats_nonpart2 +PREHOOK: Output: default@stats_nonpart2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table stats_nonpart2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@stats_nonpart2 +POSTHOOK: Output: default@stats_nonpart2 +#### A masked pattern was here #### +PREHOOK: query: explain select count(key2) from stats_nonpart2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key2) from stats_nonpart2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: drop table stats_nonpart +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table stats_nonpart +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table stats_part(key int,value string) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_part +POSTHOOK: query: create table stats_part(key int,value string) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_part +PREHOOK: query: insert into table stats_part partition(p=101) values (1, "foo") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@stats_part@p=101 +POSTHOOK: query: insert into table stats_part partition(p=101) values (1, "foo") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@stats_part@p=101 +POSTHOOK: Lineage: stats_part PARTITION(p=101).key SCRIPT [] +POSTHOOK: Lineage: stats_part PARTITION(p=101).value SCRIPT [] +PREHOOK: query: insert into table stats_part partition(p=102) values (2, "bar") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@stats_part@p=102 +POSTHOOK: query: insert into table stats_part partition(p=102) values (2, "bar") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@stats_part@p=102 +POSTHOOK: Lineage: stats_part PARTITION(p=102).key SCRIPT [] +POSTHOOK: Lineage: stats_part PARTITION(p=102).value SCRIPT [] +PREHOOK: query: insert into table stats_part partition(p=103) values (3, "baz") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@stats_part@p=103 +POSTHOOK: query: insert into table stats_part partition(p=103) values (3, "baz") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@stats_part@p=103 +POSTHOOK: Lineage: stats_part PARTITION(p=103).key SCRIPT [] +POSTHOOK: Lineage: stats_part PARTITION(p=103).value SCRIPT [] +PREHOOK: query: alter table stats_part partition column (p decimal(10,0)) +PREHOOK: type: ALTERTABLE_PARTCOLTYPE +PREHOOK: Input: default@stats_part +POSTHOOK: query: alter table stats_part partition column (p decimal(10,0)) +POSTHOOK: type: ALTERTABLE_PARTCOLTYPE +POSTHOOK: Input: default@stats_part +POSTHOOK: Output: default@stats_part +PREHOOK: query: explain select count(key) from stats_part +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table stats_part partition(p) compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@stats_part +PREHOOK: Input: default@stats_part@p=101 +PREHOOK: Input: default@stats_part@p=102 +PREHOOK: Input: default@stats_part@p=103 +PREHOOK: Output: default@stats_part +PREHOOK: Output: default@stats_part@p=101 +PREHOOK: Output: default@stats_part@p=102 +PREHOOK: Output: default@stats_part@p=103 +#### A masked pattern was here #### +POSTHOOK: query: analyze table stats_part partition(p) compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@stats_part +POSTHOOK: Input: default@stats_part@p=101 +POSTHOOK: Input: default@stats_part@p=102 +POSTHOOK: Input: default@stats_part@p=103 +POSTHOOK: Output: default@stats_part +POSTHOOK: Output: default@stats_part@p=101 +POSTHOOK: Output: default@stats_part@p=102 +POSTHOOK: Output: default@stats_part@p=103 +#### A masked pattern was here #### +PREHOOK: query: explain select count(key) from stats_part +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: alter table stats_part partition(p=102) rename to partition (p=104) +PREHOOK: type: ALTERTABLE_RENAMEPART +PREHOOK: Input: default@stats_part +PREHOOK: Output: default@stats_part@p=102 +POSTHOOK: query: alter table stats_part partition(p=102) rename to partition (p=104) +POSTHOOK: type: ALTERTABLE_RENAMEPART +POSTHOOK: Input: default@stats_part +POSTHOOK: Input: default@stats_part@p=102 +POSTHOOK: Output: default@stats_part@p=102 +POSTHOOK: Output: default@stats_part@p=104 +PREHOOK: query: explain select count(key) from stats_part where p = 101 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_part where p = 101 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(key) from stats_part +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table stats_part partition(p) compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@stats_part +PREHOOK: Input: default@stats_part@p=101 +PREHOOK: Input: default@stats_part@p=103 +PREHOOK: Input: default@stats_part@p=104 +PREHOOK: Output: default@stats_part +PREHOOK: Output: default@stats_part@p=101 +PREHOOK: Output: default@stats_part@p=103 +PREHOOK: Output: default@stats_part@p=104 +#### A masked pattern was here #### +POSTHOOK: query: analyze table stats_part partition(p) compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@stats_part +POSTHOOK: Input: default@stats_part@p=101 +POSTHOOK: Input: default@stats_part@p=103 +POSTHOOK: Input: default@stats_part@p=104 +POSTHOOK: Output: default@stats_part +POSTHOOK: Output: default@stats_part@p=101 +POSTHOOK: Output: default@stats_part@p=103 +POSTHOOK: Output: default@stats_part@p=104 +#### A masked pattern was here #### +PREHOOK: query: explain select count(key) from stats_part +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key) from stats_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: ALTER TABLE stats_part CHANGE COLUMN key key2 int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@stats_part +PREHOOK: Output: default@stats_part +POSTHOOK: query: ALTER TABLE stats_part CHANGE COLUMN key key2 int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@stats_part +POSTHOOK: Output: default@stats_part +PREHOOK: query: explain select count(key2) from stats_part +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key2) from stats_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key2 (type: int) + outputColumnNames: key2 + Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key2) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(value) from stats_part +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(value) from stats_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: analyze table stats_part partition(p) compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@stats_part +PREHOOK: Input: default@stats_part@p=101 +PREHOOK: Input: default@stats_part@p=103 +PREHOOK: Input: default@stats_part@p=104 +PREHOOK: Output: default@stats_part +PREHOOK: Output: default@stats_part@p=101 +PREHOOK: Output: default@stats_part@p=103 +PREHOOK: Output: default@stats_part@p=104 +#### A masked pattern was here #### +POSTHOOK: query: analyze table stats_part partition(p) compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@stats_part +POSTHOOK: Input: default@stats_part@p=101 +POSTHOOK: Input: default@stats_part@p=103 +POSTHOOK: Input: default@stats_part@p=104 +POSTHOOK: Output: default@stats_part +POSTHOOK: Output: default@stats_part@p=101 +POSTHOOK: Output: default@stats_part@p=103 +POSTHOOK: Output: default@stats_part@p=104 +#### A masked pattern was here #### +PREHOOK: query: explain select count(key2) from stats_part +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(key2) from stats_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: drop table stats_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@stats_part +PREHOOK: Output: default@stats_part +POSTHOOK: query: drop table stats_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@stats_part +POSTHOOK: Output: default@stats_part diff --git ql/src/test/results/clientpositive/acid_table_stats.q.out ql/src/test/results/clientpositive/acid_table_stats.q.out index fb064f873648cc075b7738bfb5e55ada5ea2e1db..f3ada3404b14e799a59906ae998826962e45fa1b 100644 --- ql/src/test/results/clientpositive/acid_table_stats.q.out +++ ql/src/test/results/clientpositive/acid_table_stats.q.out @@ -321,7 +321,7 @@ Partition Parameters: numFiles 4 numRows 2000 rawDataSize 208000 - totalSize 8118 + totalSize 8126 #### A masked pattern was here #### # Storage Information @@ -368,7 +368,7 @@ Partition Parameters: numFiles 4 numRows 2000 rawDataSize 416000 - totalSize 8118 + totalSize 8126 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out index 1bba3f3ec1d6aad75c5ae90f73630119fa905840..4661a03571115a635d75216e48ff2931e0c24f45 100644 --- ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out +++ ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out @@ -41,14 +41,10 @@ POSTHOOK: Lineage: change_allowincompatible_vectorization_false_date PARTITION(s PREHOOK: query: select count(*) from change_allowincompatible_vectorization_false_date PREHOOK: type: QUERY PREHOOK: Input: default@change_allowincompatible_vectorization_false_date -PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa -PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=bbb #### A masked pattern was here #### POSTHOOK: query: select count(*) from change_allowincompatible_vectorization_false_date POSTHOOK: type: QUERY POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date -POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa -POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=bbb #### A masked pattern was here #### 50 PREHOOK: query: alter table change_allowincompatible_vectorization_false_date change column ts ts timestamp @@ -62,14 +58,10 @@ POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date PREHOOK: query: select count(*) from change_allowincompatible_vectorization_false_date PREHOOK: type: QUERY PREHOOK: Input: default@change_allowincompatible_vectorization_false_date -PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa -PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=bbb #### A masked pattern was here #### POSTHOOK: query: select count(*) from change_allowincompatible_vectorization_false_date POSTHOOK: type: QUERY POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date -POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa -POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=bbb #### A masked pattern was here #### 50 PREHOOK: query: insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('2038-03-22 07:26:48.0') diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index 2ad9a4d1fb1920b7a06985e87884ef331f0ba76d..5018a4c1513372dd392537aaf7e34d83319dd0b4 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -1327,7 +1327,7 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo filterExpr: (value = 'bar') (type: boolean) - Statistics: Num rows: 4952 Data size: 1456618 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4200 Data size: 1253037 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) Statistics: Num rows: 5 Data size: 1375 Basic stats: COMPLETE Column stats: PARTIAL diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out index 0628ad84cdbe4e9b223c86e8e7c5b18416519f20..2538ac1ba9fb5c5f9155a67cc4f77e8a592ac5fe 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out @@ -488,10 +488,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2 - filterExpr: ((c > 10) and (ROW__ID.writeid > 1) and a is not null) (type: boolean) + filterExpr: ((c > 10) and (ROW__ID.writeid > 2) and a is not null) (type: boolean) Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((ROW__ID.writeid > 1) and (c > 10) and a is not null) (type: boolean) + predicate: ((ROW__ID.writeid > 2) and (c > 10) and a is not null) (type: boolean) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)) diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out index a80463785d6b3db0a5f43b37bdbb07f71a6ba0aa..fe46bfd0c5e9f154b4133afda26f76874427be60 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out @@ -730,10 +730,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2_n2 - filterExpr: ((c > 10) and (ROW__ID.writeid > 1) and a is not null) (type: boolean) + filterExpr: ((c > 10) and (ROW__ID.writeid > 2) and a is not null) (type: boolean) Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((ROW__ID.writeid > 1) and (c > 10) and a is not null) (type: boolean) + predicate: ((ROW__ID.writeid > 2) and (c > 10) and a is not null) (type: boolean) Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)), d (type: int) @@ -950,7 +950,7 @@ Table Parameters: numFiles 3 numRows 3 rawDataSize 248 - totalSize 1508 + totalSize 1500 transactional true transactional_properties default #### A masked pattern was here #### @@ -1554,10 +1554,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2_n2 - filterExpr: ((c > 10) and (ROW__ID.writeid > 4) and a is not null) (type: boolean) + filterExpr: ((c > 10) and (ROW__ID.writeid > 6) and a is not null) (type: boolean) Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((ROW__ID.writeid > 4) and (c > 10) and a is not null) (type: boolean) + predicate: ((ROW__ID.writeid > 6) and (c > 10) and a is not null) (type: boolean) Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)), d (type: int) diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out index fae47575b5efd06209143414c3d6823df8741bf7..6402db8adc179f32bf221849dbd2418a02a80024 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out @@ -278,10 +278,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2_n3 - filterExpr: ((c > 10) and (ROW__ID.writeid > 1) and a is not null) (type: boolean) + filterExpr: ((c > 10) and (ROW__ID.writeid > 2) and a is not null) (type: boolean) Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((ROW__ID.writeid > 1) and (c > 10) and a is not null) (type: boolean) + predicate: ((ROW__ID.writeid > 2) and (c > 10) and a is not null) (type: boolean) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)) @@ -958,10 +958,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2_n3 - filterExpr: ((c > 10) and (ROW__ID.writeid > 4) and a is not null) (type: boolean) + filterExpr: ((c > 10) and (ROW__ID.writeid > 6) and a is not null) (type: boolean) Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((ROW__ID.writeid > 4) and (c > 10) and a is not null) (type: boolean) + predicate: ((ROW__ID.writeid > 6) and (c > 10) and a is not null) (type: boolean) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)) diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out index fe54771bfd36aafd0287d09cd167d9109e12f9cf..193e95909e028f3c3d6b68affbd36e3ff29aa339 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_rebuild_dummy.q.out @@ -488,10 +488,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2_n0 - filterExpr: ((c > 10) and (ROW__ID.writeid > 1) and a is not null) (type: boolean) + filterExpr: ((c > 10) and (ROW__ID.writeid > 2) and a is not null) (type: boolean) Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((ROW__ID.writeid > 1) and (c > 10) and a is not null) (type: boolean) + predicate: ((ROW__ID.writeid > 2) and (c > 10) and a is not null) (type: boolean) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)) diff --git ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out index 44a866db197acda832424070891427437113c6d5..b5183bafabf20c31f0b94260f712ddd82f83e5b9 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out @@ -602,10 +602,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cmv_basetable_2_n1 - filterExpr: ((c > 10) and (ROW__ID.writeid > 1) and a is not null) (type: boolean) + filterExpr: ((c > 10) and (ROW__ID.writeid > 2) and a is not null) (type: boolean) Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((ROW__ID.writeid > 1) and (c > 10) and a is not null) (type: boolean) + predicate: ((ROW__ID.writeid > 2) and (c > 10) and a is not null) (type: boolean) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), c (type: decimal(10,2)) diff --git ql/src/test/results/clientpositive/llap/results_cache_truncate.q.out ql/src/test/results/clientpositive/llap/results_cache_truncate.q.out index 0b7a81a661a1405af6b4a8c1ded9229f91b08944..83b21449428425a66c3ebbc0132b33e86498fc58 100644 --- ql/src/test/results/clientpositive/llap/results_cache_truncate.q.out +++ ql/src/test/results/clientpositive/llap/results_cache_truncate.q.out @@ -23,54 +23,12 @@ POSTHOOK: query: explain select count(*) from rct1_1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: rct1_1 - Statistics: Num rows: 500 Data size: 35250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 35250 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -96,17 +54,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink - Cached Query Result: true PREHOOK: query: select count(*) from rct1_1 PREHOOK: type: QUERY PREHOOK: Input: default@rct1_1 +#### A masked pattern was here #### POSTHOOK: query: select count(*) from rct1_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@rct1_1 +#### A masked pattern was here #### 500 PREHOOK: query: truncate table rct1_1 PREHOOK: type: TRUNCATETABLE @@ -122,54 +81,12 @@ POSTHOOK: query: explain select count(*) from rct1_1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: rct1_1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -217,68 +134,22 @@ POSTHOOK: query: explain select count(*) from rct1_2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: rct1_2 - Statistics: Num rows: 1000 Data size: 78450 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1000 Data size: 78450 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select count(*) from rct1_2 PREHOOK: type: QUERY PREHOOK: Input: default@rct1_2 -PREHOOK: Input: default@rct1_2@p1=part1 -PREHOOK: Input: default@rct1_2@p1=part2 #### A masked pattern was here #### POSTHOOK: query: select count(*) from rct1_2 POSTHOOK: type: QUERY POSTHOOK: Input: default@rct1_2 -POSTHOOK: Input: default@rct1_2@p1=part1 -POSTHOOK: Input: default@rct1_2@p1=part2 #### A masked pattern was here #### 1000 test.comment="Query on transactional table should use cache" @@ -294,21 +165,18 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink - Cached Query Result: true PREHOOK: query: select count(*) from rct1_2 PREHOOK: type: QUERY PREHOOK: Input: default@rct1_2 -PREHOOK: Input: default@rct1_2@p1=part1 -PREHOOK: Input: default@rct1_2@p1=part2 +#### A masked pattern was here #### POSTHOOK: query: select count(*) from rct1_2 POSTHOOK: type: QUERY POSTHOOK: Input: default@rct1_2 -POSTHOOK: Input: default@rct1_2@p1=part1 -POSTHOOK: Input: default@rct1_2@p1=part2 +#### A masked pattern was here #### 1000 PREHOOK: query: truncate table rct1_2 partition (p1='part1') PREHOOK: type: TRUNCATETABLE @@ -324,68 +192,22 @@ POSTHOOK: query: explain select count(*) from rct1_2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: rct1_2 - Statistics: Num rows: 500 Data size: 39200 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 39200 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select count(*) from rct1_2 PREHOOK: type: QUERY PREHOOK: Input: default@rct1_2 -PREHOOK: Input: default@rct1_2@p1=part1 -PREHOOK: Input: default@rct1_2@p1=part2 #### A masked pattern was here #### POSTHOOK: query: select count(*) from rct1_2 POSTHOOK: type: QUERY POSTHOOK: Input: default@rct1_2 -POSTHOOK: Input: default@rct1_2@p1=part1 -POSTHOOK: Input: default@rct1_2@p1=part2 #### A masked pattern was here #### 500 PREHOOK: query: truncate table rct1_2 @@ -404,67 +226,21 @@ POSTHOOK: query: explain select count(*) from rct1_2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: rct1_2 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select count(*) from rct1_2 PREHOOK: type: QUERY PREHOOK: Input: default@rct1_2 -PREHOOK: Input: default@rct1_2@p1=part1 -PREHOOK: Input: default@rct1_2@p1=part2 #### A masked pattern was here #### POSTHOOK: query: select count(*) from rct1_2 POSTHOOK: type: QUERY POSTHOOK: Input: default@rct1_2 -POSTHOOK: Input: default@rct1_2@p1=part1 -POSTHOOK: Input: default@rct1_2@p1=part2 #### A masked pattern was here #### 0 diff --git ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out index 94d0ea3fce7e59841653ae416e23c8533004dede..eba16aae3e84446320b79af2e4df9111c98961a8 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out @@ -88,6 +88,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} bucketing_version 2 numFiles 1 numRows 1 @@ -141,12 +142,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: a (type: int) sort order: + Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: ROW__ID (type: struct) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -173,50 +174,50 @@ STAGE PLANS: 0 a (type: int) 1 a (type: int) outputColumnNames: _col0, _col4, _col5, _col6 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: struct), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Filter Operator predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: struct) outputColumnNames: _col4 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col4 (type: struct) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Filter Operator predicate: _col0 is null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Reducer 3 Execution mode: vectorized, llap @@ -224,10 +225,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 99 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -242,17 +243,17 @@ STAGE PLANS: keys: KEY._col0 (type: struct) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 > 1L) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cardinality_violation(_col0) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -261,19 +262,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int) outputColumnNames: val - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(val, 'hll') mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -284,10 +285,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -297,15 +298,15 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: a, b - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 6 Execution mode: llap @@ -314,10 +315,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -425,6 +426,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 numFiles 4 numRows 2 @@ -490,6 +492,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 numFiles 6 numRows 0 diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java index af9b0b1dd260a6d65094d093fd314faed791184c..35be3c4d72789896c5b7fa05023d78911e94859b 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -157,10 +157,6 @@ public String getAggregator(Configuration conf) { public static final String CASCADE = "CASCADE"; - // TODO: when alter calls are switched to req/resp models, replace these and the above with fields. - public static final String TXN_ID = "WRITER_TXN_ID"; - public static final String VALID_WRITE_IDS = "WRITER_WRITE_ID"; - public static final String TRUE = "true"; public static final String FALSE = "false"; @@ -275,10 +271,11 @@ public static boolean canColumnStatsMerge(Map params, String col if (params == null) { return false; } + // TODO: should this also check that the basic flag is valid? ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); return stats.columnStats.containsKey(colName); } - + public static void clearColumnStatsState(Map params) { if (params == null) { return; @@ -321,7 +318,7 @@ public static void setStatsStateForCreateTable(Map params, setColumnStatsState(params, cols); } } - + private static ColumnStatsAccurate parseStatsAcc(String statsAcc) { if (statsAcc == null) { return new ColumnStatsAccurate(); diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/AlterHandler.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/AlterHandler.java index e7cf07ff0968b21b3989dadf034d6c86d8fb9855..f3dc264a17bbf94c5394c89ad27c5e8a29c3afa9 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/AlterHandler.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/AlterHandler.java @@ -61,7 +61,7 @@ default void alterTable(RawStore msdb, Warehouse wh, String catName, String dbname, String name, Table newTable, EnvironmentContext envContext) throws InvalidOperationException, MetaException { - alterTable(msdb, wh, catName, dbname, name, newTable, envContext, null); + alterTable(msdb, wh, catName, dbname, name, newTable, envContext, null, -1, null); } /** @@ -88,7 +88,8 @@ default void alterTable(RawStore msdb, Warehouse wh, String catName, String dbna */ void alterTable(RawStore msdb, Warehouse wh, String catName, String dbname, String name, Table newTable, EnvironmentContext envContext, - IHMSHandler handler) throws InvalidOperationException, MetaException; + IHMSHandler handler, long txnId, String writeIdList) + throws InvalidOperationException, MetaException; /** * @deprecated As of release 2.2.0. Replaced by {@link #alterPartition(RawStore, Warehouse, String, @@ -145,7 +146,7 @@ Partition alterPartition(final RawStore msdb, Warehouse wh, final String dbname, Partition alterPartition(final RawStore msdb, Warehouse wh, final String catName, final String dbname, final String name, final List part_vals, final Partition new_part, EnvironmentContext environmentContext, - IHMSHandler handler) + IHMSHandler handler, long txnId, String validWriteIds) throws InvalidOperationException, InvalidObjectException, AlreadyExistsException, MetaException; /** diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 8b2a6babeb117e150a2ad51e40d48adc506731de..e8226f8b1052c97d7a3e8a5b83b17dd4f0d4353b 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -90,7 +90,8 @@ public void setConf(Configuration conf) { @Override public void alterTable(RawStore msdb, Warehouse wh, String catName, String dbname, String name, Table newt, EnvironmentContext environmentContext, - IHMSHandler handler) throws InvalidOperationException, MetaException { + IHMSHandler handler, long txnId, String writeIdList) + throws InvalidOperationException, MetaException { catName = normalizeIdentifier(catName); name = name.toLowerCase(); dbname = dbname.toLowerCase(); @@ -296,7 +297,7 @@ public void alterTable(RawStore msdb, Warehouse wh, String catName, String dbnam partValues.add(part.getValues()); } msdb.alterPartitions(catName, newDbName, newTblName, partValues, - partBatch, -1, -1, null); + partBatch, newt.getWriteId(), txnId, writeIdList); } } @@ -304,14 +305,15 @@ public void alterTable(RawStore msdb, Warehouse wh, String catName, String dbnam ColumnStatistics newPartColStats = partColStats.getValue(); newPartColStats.getStatsDesc().setDbName(newDbName); newPartColStats.getStatsDesc().setTableName(newTblName); - msdb.updatePartitionColumnStatistics(newPartColStats, partColStats.getKey().getValues()); + msdb.updatePartitionColumnStatistics(newPartColStats, partColStats.getKey().getValues(), + txnId, writeIdList, newt.getWriteId()); } } else { - alterTableUpdateTableColumnStats(msdb, oldt, newt, environmentContext); + alterTableUpdateTableColumnStats( + msdb, oldt, newt, environmentContext, txnId, writeIdList); } } else { // operations other than table rename - if (MetaStoreUtils.requireCalStats(null, null, newt, environmentContext) && !isPartitionedTable) { Database db = msdb.getDatabase(catName, newDbName); @@ -330,23 +332,26 @@ public void alterTable(RawStore msdb, Warehouse wh, String catName, String dbnam ColumnStatistics colStats = updateOrGetPartitionColumnStats(msdb, catName, dbname, name, part.getValues(), oldCols, oldt, part, null); assert(colStats == null); - // Note: we don't do txn stats validation here; this can only delete stats? if (cascade) { - msdb.alterPartition(catName, dbname, name, part.getValues(), part, -1, null); + msdb.alterPartition( + catName, dbname, name, part.getValues(), part, txnId, writeIdList); } else { // update changed properties (stats) oldPart.setParameters(part.getParameters()); - msdb.alterPartition(catName, dbname, name, part.getValues(), oldPart, -1, null); + msdb.alterPartition( + catName, dbname, name, part.getValues(), oldPart, txnId, writeIdList); } } // Don't validate table-level stats for a partitoned table. msdb.alterTable(catName, dbname, name, newt, -1, null); } else { LOG.warn("Alter table not cascaded to partitions."); - alterTableUpdateTableColumnStats(msdb, oldt, newt, environmentContext); + alterTableUpdateTableColumnStats( + msdb, oldt, newt, environmentContext, txnId, writeIdList); } } else { - alterTableUpdateTableColumnStats(msdb, oldt, newt, environmentContext); + alterTableUpdateTableColumnStats( + msdb, oldt, newt, environmentContext, txnId, writeIdList); } } @@ -426,14 +431,13 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String EnvironmentContext environmentContext) throws InvalidOperationException, InvalidObjectException, AlreadyExistsException, MetaException { return alterPartition(msdb, wh, DEFAULT_CATALOG_NAME, dbname, name, part_vals, new_part, - environmentContext, null); + environmentContext, null, -1, null); } @Override - public Partition alterPartition(final RawStore msdb, Warehouse wh, final String catName, - final String dbname, final String name, - final List part_vals, final Partition new_part, - EnvironmentContext environmentContext, IHMSHandler handler) + public Partition alterPartition(RawStore msdb, Warehouse wh, String catName, String dbname, + String name, List part_vals, final Partition new_part, + EnvironmentContext environmentContext, IHMSHandler handler, long txnId, String validWriteIds) throws InvalidOperationException, InvalidObjectException, AlreadyExistsException, MetaException { boolean success = false; Partition oldPart; @@ -449,13 +453,6 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String new_part.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(System .currentTimeMillis() / 1000)); } - long txnId = -1; - String validWriteIds = null; - if (environmentContext != null && environmentContext.isSetProperties() - && environmentContext.getProperties().containsKey(StatsSetupConst.VALID_WRITE_IDS)) { - txnId = Long.parseLong(environmentContext.getProperties().get(StatsSetupConst.TXN_ID)); - validWriteIds = environmentContext.getProperties().get(StatsSetupConst.VALID_WRITE_IDS); - } //alter partition if (part_vals == null || part_vals.size() == 0) { @@ -623,7 +620,10 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String if (cs != null) { cs.getStatsDesc().setPartName(newPartName); try { - msdb.updatePartitionColumnStatistics(cs, new_part.getValues()); + // Verifying ACID state again is not strictly needed here (alterPartition above does it), + // but we are going to use the uniform approach for simplicity. + msdb.updatePartitionColumnStatistics(cs, new_part.getValues(), + txnId, validWriteIds, new_part.getWriteId()); } catch (InvalidInputException iie) { throw new InvalidOperationException("Unable to update partition stats in table rename." + iie); } catch (NoSuchObjectException nsoe) { @@ -796,7 +796,7 @@ private Path constructRenamedPath(Path defaultNewPath, Path currentPath) { @VisibleForTesting void alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTable, - EnvironmentContext ec) + EnvironmentContext ec, long txnId, String validWriteIds) throws MetaException, InvalidObjectException { String catName = normalizeIdentifier(oldTable.isSetCatName() ? oldTable.getCatName() : getDefaultCatalog(conf)); @@ -804,77 +804,65 @@ void alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTa String tableName = normalizeIdentifier(oldTable.getTableName()); String newDbName = newTable.getDbName().toLowerCase(); String newTableName = normalizeIdentifier(newTable.getTableName()); - long txnId = -1; - String validWriteIds = null; - if (ec != null && ec.isSetProperties() && ec.getProperties().containsKey( - StatsSetupConst.VALID_WRITE_IDS)) { - txnId = Long.parseLong(ec.getProperties().get(StatsSetupConst.TXN_ID)); - validWriteIds = ec.getProperties().get(StatsSetupConst.VALID_WRITE_IDS); - } try { List oldCols = oldTable.getSd().getCols(); List newCols = newTable.getSd().getCols(); List newStatsObjs = new ArrayList<>(); ColumnStatistics colStats = null; - boolean updateColumnStats = true; - - // Nothing to update if everything is the same - if (newDbName.equals(dbName) && - newTableName.equals(tableName) && - MetaStoreUtils.columnsIncludedByNameType(oldCols, newCols)) { - updateColumnStats = false; + boolean updateColumnStats = !newDbName.equals(dbName) || !newTableName.equals(tableName) + || !MetaStoreUtils.columnsIncludedByNameType(oldCols, newCols); + if (updateColumnStats) { + List oldColNames = new ArrayList<>(oldCols.size()); + for (FieldSchema oldCol : oldCols) { + oldColNames.add(oldCol.getName()); } - if (updateColumnStats) { - List oldColNames = new ArrayList<>(oldCols.size()); - for (FieldSchema oldCol : oldCols) { - oldColNames.add(oldCol.getName()); - } - - // Collect column stats which need to be rewritten and remove old stats. - colStats = msdb.getTableColumnStatistics(catName, dbName, tableName, oldColNames); - if (colStats == null) { - updateColumnStats = false; - } else { - List statsObjs = colStats.getStatsObj(); - if (statsObjs != null) { - List deletedCols = new ArrayList<>(); - for (ColumnStatisticsObj statsObj : statsObjs) { - boolean found = false; - for (FieldSchema newCol : newCols) { - if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) - && statsObj.getColType().equalsIgnoreCase(newCol.getType())) { - found = true; - break; - } + // NOTE: this doesn't check stats being compliant, but the alterTable call below does. + // The worst we can do is delete the stats. + // Collect column stats which need to be rewritten and remove old stats. + colStats = msdb.getTableColumnStatistics(catName, dbName, tableName, oldColNames); + if (colStats == null) { + updateColumnStats = false; + } else { + List statsObjs = colStats.getStatsObj(); + if (statsObjs != null) { + List deletedCols = new ArrayList<>(); + for (ColumnStatisticsObj statsObj : statsObjs) { + boolean found = false; + for (FieldSchema newCol : newCols) { + if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) + && statsObj.getColType().equalsIgnoreCase(newCol.getType())) { + found = true; + break; } + } - if (found) { - if (!newDbName.equals(dbName) || !newTableName.equals(tableName)) { - msdb.deleteTableColumnStatistics(catName, dbName, tableName, statsObj.getColName()); - newStatsObjs.add(statsObj); - deletedCols.add(statsObj.getColName()); - } - } else { + if (found) { + if (!newDbName.equals(dbName) || !newTableName.equals(tableName)) { msdb.deleteTableColumnStatistics(catName, dbName, tableName, statsObj.getColName()); + newStatsObjs.add(statsObj); deletedCols.add(statsObj.getColName()); } + } else { + msdb.deleteTableColumnStatistics(catName, dbName, tableName, statsObj.getColName()); + deletedCols.add(statsObj.getColName()); } - StatsSetupConst.removeColumnStatsState(newTable.getParameters(), deletedCols); } + StatsSetupConst.removeColumnStatsState(newTable.getParameters(), deletedCols); } } + } - // Change to new table and append stats for the new table - msdb.alterTable(catName, dbName, tableName, newTable, txnId, validWriteIds); - if (updateColumnStats && !newStatsObjs.isEmpty()) { - ColumnStatisticsDesc statsDesc = colStats.getStatsDesc(); - statsDesc.setDbName(newDbName); - statsDesc.setTableName(newTableName); - colStats.setStatsObj(newStatsObjs); - msdb.updateTableColumnStatistics(colStats); - } + // Change to new table and append stats for the new table + msdb.alterTable(catName, dbName, tableName, newTable, txnId, validWriteIds); + if (updateColumnStats && !newStatsObjs.isEmpty()) { + ColumnStatisticsDesc statsDesc = colStats.getStatsDesc(); + statsDesc.setDbName(newDbName); + statsDesc.setTableName(newTableName); + colStats.setStatsObj(newStatsObjs); + msdb.updateTableColumnStatistics(colStats, txnId, validWriteIds, newTable.getWriteId()); + } } catch (NoSuchObjectException nsoe) { LOG.debug("Could not find db entry." + nsoe); } catch (InvalidInputException e) { @@ -907,7 +895,7 @@ private ColumnStatistics updateOrGetPartitionColumnStats( oldColNames.add(oldCol.getName()); } List oldPartNames = Lists.newArrayList(oldPartName); - // Note: doesn't take txn stats into account. This method can only remove stats. + // TODO: doesn't take txn stats into account. This method can only remove stats. List partsColStats = msdb.getPartitionColumnStatistics(catName, dbname, tblname, oldPartNames, oldColNames); assert (partsColStats.size() <= 1); diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index a46d2f940742a45ec19b34eeb3ac6a4a946589d5..091e5deab4ea2dea19220bb211fda4626678ddcb 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -2692,12 +2692,8 @@ private void updateStatsForTruncate(Map props, EnvironmentContext return; } - private void alterPartitionForTruncate(final RawStore ms, - final String catName, - final String dbName, - final String tableName, - final Table table, - final Partition partition) throws Exception { + private void alterPartitionForTruncate(RawStore ms, String catName, String dbName, String tableName, + Table table, Partition partition, long txnId, String validWriteIds, long writeId) throws Exception { EnvironmentContext environmentContext = new EnvironmentContext(); updateStatsForTruncate(partition.getParameters(), environmentContext); @@ -2713,20 +2709,21 @@ private void alterPartitionForTruncate(final RawStore ms, new AlterPartitionEvent(partition, partition, table, true, true, this)); } + if (writeId > 0) { + partition.setWriteId(writeId); + } alterHandler.alterPartition(ms, wh, catName, dbName, tableName, null, partition, - environmentContext, this); + environmentContext, this, txnId, validWriteIds); } - private void alterTableStatsForTruncate(final RawStore ms, - final String catName, - final String dbName, - final String tableName, - final Table table, - final List partNames) throws Exception { + private void alterTableStatsForTruncate(RawStore ms, String catName, String dbName, + String tableName, Table table, List partNames, + long txnId, String validWriteIds, long writeId) throws Exception { if (partNames == null) { if (0 != table.getPartitionKeysSize()) { for (Partition partition : ms.getPartitions(catName, dbName, tableName, Integer.MAX_VALUE)) { - alterPartitionForTruncate(ms, catName, dbName, tableName, table, partition); + alterPartitionForTruncate(ms, catName, dbName, tableName, table, partition, + txnId, validWriteIds, writeId); } } else { EnvironmentContext environmentContext = new EnvironmentContext(); @@ -2744,11 +2741,17 @@ private void alterTableStatsForTruncate(final RawStore ms, new AlterTableEvent(table, table, true, true, this)); } - alterHandler.alterTable(ms, wh, catName, dbName, tableName, table, environmentContext, this); + // TODO: this should actually pass thru and set writeId for txn stats. + if (writeId > 0) { + table.setWriteId(writeId); + } + alterHandler.alterTable(ms, wh, catName, dbName, tableName, table, + environmentContext, this, txnId, validWriteIds); } } else { for (Partition partition : ms.getPartitionsByNames(catName, dbName, tableName, partNames)) { - alterPartitionForTruncate(ms, catName, dbName, tableName, table, partition); + alterPartitionForTruncate(ms, catName, dbName, tableName, table, partition, + txnId, validWriteIds, writeId); } } return; @@ -2786,6 +2789,20 @@ public CmRecycleResponse cm_recycle(final CmRecycleRequest request) throws MetaE @Override public void truncate_table(final String dbName, final String tableName, List partNames) throws NoSuchObjectException, MetaException { + // Deprecated path, won't work for txn tables. + truncateTableInternal(dbName, tableName, partNames, -1, null, -1); + } + + @Override + public TruncateTableResponse truncate_table_req(TruncateTableRequest req) + throws MetaException, TException { + truncateTableInternal(req.getDbName(), req.getTableName(), req.getPartNames(), + req.getTxnId(), req.getValidWriteIdList(), req.getWriteId()); + return new TruncateTableResponse(); + } + + private void truncateTableInternal(String dbName, String tableName, List partNames, + long txnId, String validWriteIds, long writeId) throws MetaException, NoSuchObjectException { try { String[] parsedDbName = parseDbName(dbName, conf); Table tbl = get_table_core(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName); @@ -2817,7 +2834,7 @@ public void truncate_table(final String dbName, final String tableName, List new_parts) throws TException { - alter_partitions_with_environment_context( - db_name, tbl_name, new_parts, null, -1, null, -1); + String[] o = parseDbName(db_name, conf); + alter_partitions_with_environment_context(o[0], o[1], + tbl_name, new_parts, null, -1, null, -1); } @Override - public AlterPartitionsResponse alter_partitions_with_environment_context_req( - AlterPartitionsRequest req) - throws TException { - alter_partitions_with_environment_context( + public AlterPartitionsResponse alter_partitions_req(AlterPartitionsRequest req) throws TException { + alter_partitions_with_environment_context(req.getCatName(), req.getDbName(), req.getTableName(), req.getPartitions(), req.getEnvironmentContext(), req.isSetTxnId() ? req.getTxnId() : -1, req.isSetValidWriteIdList() ? req.getValidWriteIdList() : null, @@ -4912,17 +4929,23 @@ public AlterPartitionsResponse alter_partitions_with_environment_context_req( public void alter_partitions_with_environment_context(final String db_name, final String tbl_name, final List new_parts, EnvironmentContext environmentContext) throws TException { - alter_partitions_with_environment_context(db_name, tbl_name, new_parts, environmentContext, + String[] o = parseDbName(db_name, conf); + alter_partitions_with_environment_context(o[0], o[1], tbl_name, new_parts, environmentContext, -1, null, -1); } - private void alter_partitions_with_environment_context(final String db_name, final String tbl_name, + private void alter_partitions_with_environment_context(String catName, String db_name, final String tbl_name, final List new_parts, EnvironmentContext environmentContext, long txnId, String writeIdList, long writeId) throws TException { + if (environmentContext == null) { + environmentContext = new EnvironmentContext(); + } + if (catName == null) { + catName = MetaStoreUtils.getDefaultCatalog(conf); + } - String[] parsedDbName = parseDbName(db_name, conf); - startTableFunction("alter_partitions", parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tbl_name); + startTableFunction("alter_partitions", catName, db_name, tbl_name); if (LOG.isInfoEnabled()) { for (Partition tmpPart : new_parts) { @@ -4939,10 +4962,10 @@ private void alter_partitions_with_environment_context(final String db_name, fin if (!tmpPart.isSetCatName()) { tmpPart.setCatName(getDefaultCatalog(conf)); } - firePreEvent(new PreAlterPartitionEvent(parsedDbName[DB_NAME], tbl_name, null, tmpPart, this)); + firePreEvent(new PreAlterPartitionEvent(db_name, tbl_name, null, tmpPart, this)); } - oldParts = alterHandler.alterPartitions(getMS(), wh, parsedDbName[CAT_NAME], - parsedDbName[DB_NAME], tbl_name, new_parts, environmentContext, txnId, writeIdList, writeId, this); + oldParts = alterHandler.alterPartitions(getMS(), wh, + catName, db_name, tbl_name, new_parts, environmentContext, txnId, writeIdList, writeId, this); Iterator olditr = oldParts.iterator(); // Only fetch the table if we have a listener that needs it. Table table = null; @@ -4956,8 +4979,7 @@ private void alter_partitions_with_environment_context(final String db_name, fin } if (table == null) { - table = getMS().getTable( - parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tbl_name, -1, null); + table = getMS().getTable(catName, db_name, tbl_name, -1, null); } if (!listeners.isEmpty()) { @@ -4995,7 +5017,8 @@ public void alter_table(final String dbname, final String name, throws InvalidOperationException, MetaException { // Do not set an environment context. String[] parsedDbName = parseDbName(dbname, conf); - alter_table_core(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], name, newTable, null); + alter_table_core(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], name, newTable, + null, -1, null); } @Override @@ -5008,7 +5031,16 @@ public void alter_table_with_cascade(final String dbname, final String name, envContext.putToProperties(StatsSetupConst.CASCADE, StatsSetupConst.TRUE); } String[] parsedDbName = parseDbName(dbname, conf); - alter_table_core(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], name, newTable, envContext); + alter_table_core(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], name, newTable, + envContext, -1, null); + } + + @Override + public AlterTableResponse alter_table_req(AlterTableRequest req) + throws InvalidOperationException, MetaException, TException { + alter_table_core(req.getCatName(), req.getDbName(), req.getTableName(), + req.getTable(), req.getEnvironmentContext(), req.getTxnId(), req.getValidWriteIdList()); + return new AlterTableResponse(); } @Override @@ -5017,14 +5049,21 @@ public void alter_table_with_environment_context(final String dbname, final EnvironmentContext envContext) throws InvalidOperationException, MetaException { String[] parsedDbName = parseDbName(dbname, conf); - alter_table_core(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], name, newTable, envContext); + alter_table_core(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], + name, newTable, envContext, -1, null); } - private void alter_table_core(final String catName, final String dbname, final String name, - final Table newTable, final EnvironmentContext envContext) + private void alter_table_core(String catName, String dbname, String name, Table newTable, + EnvironmentContext envContext, long txnId, String validWriteIdList) throws InvalidOperationException, MetaException { startFunction("alter_table", ": " + TableName.getQualified(catName, dbname, name) + " newtbl=" + newTable.getTableName()); + if (envContext == null) { + envContext = new EnvironmentContext(); + } + if (catName == null) { + catName = MetaStoreUtils.getDefaultCatalog(conf); + } // Update the time if it hasn't been specified. if (newTable.getParameters() == null || @@ -5052,7 +5091,7 @@ private void alter_table_core(final String catName, final String dbname, final S Table oldt = get_table_core(catName, dbname, name); firePreEvent(new PreAlterTableEvent(oldt, newTable, this)); alterHandler.alterTable(getMS(), wh, catName, dbname, name, newTable, - envContext, this); + envContext, this, txnId, validWriteIdList); success = true; } catch (NoSuchObjectException e) { // thrown when the table to be altered does not exist @@ -5600,6 +5639,7 @@ public boolean drop_partition_by_name_with_environment_context(final String db_n } private String lowerCaseConvertPartName(String partName) throws MetaException { + if (partName == null) return partName; boolean isFirst = true; Map partSpec = Warehouse.makeEscSpecFromName(partName); String convertedPartName = new String(); @@ -5618,6 +5658,7 @@ private String lowerCaseConvertPartName(String partName) throws MetaException { return convertedPartName; } + @Deprecated @Override public ColumnStatistics get_table_column_statistics(String dbName, String tableName, String colName) throws TException { @@ -5660,7 +5701,13 @@ public TableStatsResult get_table_statistics_req(TableStatsRequest request) thro ColumnStatistics cs = getMS().getTableColumnStatistics( catName, dbName, tblName, lowerCaseColNames, request.getTxnId(), request.getValidWriteIdList()); - result = new TableStatsResult((cs == null || cs.getStatsObj() == null) + // Note: stats compliance is not propagated to the client; instead, we just return nothing + // if stats are not compliant for now. This won't work for stats merging, but that + // is currently only done on metastore size (see set_aggr...). + // For some optimizations we might make use of incorrect stats that are "better than + // nothing", so this may change in future. + result = new TableStatsResult((cs == null || cs.getStatsObj() == null + || (cs.isSetIsStatsCompliant() && !cs.isIsStatsCompliant())) ? Lists.newArrayList() : cs.getStatsObj()); } finally { endFunction("get_table_statistics_req", result == null, null, tblName); @@ -5725,8 +5772,16 @@ public PartitionsStatsResult get_partitions_statistics_req(PartitionsStatsReques request.isSetTxnId() ? request.getTxnId() : -1, request.isSetValidWriteIdList() ? request.getValidWriteIdList() : null); Map> map = new HashMap<>(); - for (ColumnStatistics stat : stats) { - map.put(stat.getStatsDesc().getPartName(), stat.getStatsObj()); + if (stats != null) { + for (ColumnStatistics stat : stats) { + // Note: stats compliance is not propagated to the client; instead, we just return nothing + // if stats are not compliant for now. This won't work for stats merging, but that + // is currently only done on metastore size (see set_aggr...). + // For some optimizations we might make use of incorrect stats that are "better than + // nothing", so this may change in future. + if (stat.isSetIsStatsCompliant() && !stat.isIsStatsCompliant()) continue; + map.put(stat.getStatsDesc().getPartName(), stat.getStatsObj()); + } } result = new PartitionsStatsResult(map); } finally { @@ -5737,79 +5792,73 @@ public PartitionsStatsResult get_partitions_statistics_req(PartitionsStatsReques @Override public boolean update_table_column_statistics(ColumnStatistics colStats) throws TException { - String catName; - String dbName; - String tableName; - String colName; - ColumnStatisticsDesc statsDesc = colStats.getStatsDesc(); - catName = statsDesc.isSetCatName() ? statsDesc.getCatName().toLowerCase() : getDefaultCatalog(conf); - dbName = statsDesc.getDbName().toLowerCase(); - tableName = statsDesc.getTableName().toLowerCase(); - - statsDesc.setCatName(catName); - statsDesc.setDbName(dbName); - statsDesc.setTableName(tableName); - long time = System.currentTimeMillis() / 1000; - statsDesc.setLastAnalyzed(time); - - List statsObjs = colStats.getStatsObj(); + // Deprecated API, won't work for transactional tables + return updateTableColumnStatsInternal(colStats, -1, null, -1); + } - startFunction("write_column_statistics", ": table=" + - TableName.getQualified(catName, dbName, tableName)); - for (ColumnStatisticsObj statsObj:statsObjs) { - colName = statsObj.getColName().toLowerCase(); - statsObj.setColName(colName); - statsObj.setColType(statsObj.getColType().toLowerCase()); + @Override + public SetPartitionsStatsResponse update_table_column_statistics_req( + SetPartitionsStatsRequest req) throws NoSuchObjectException, + InvalidObjectException, MetaException, InvalidInputException, + TException { + if (req.getColStatsSize() != 1) { + throw new InvalidInputException("Only one stats object expected"); + } + if (req.isNeedMerge()) { + throw new InvalidInputException("Merge is not supported for non-aggregate stats"); } + ColumnStatistics colStats = req.getColStatsIterator().next(); + boolean ret = updateTableColumnStatsInternal(colStats, + req.getTxnId(), req.getValidWriteIdList(), req.getWriteId()); + return new SetPartitionsStatsResponse(ret); + } - colStats.setStatsDesc(statsDesc); - colStats.setStatsObj(statsObjs); + private boolean updateTableColumnStatsInternal(ColumnStatistics colStats, + long txnId, String validWriteIds, long writeId) + throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { + normalizeColStatsInput(colStats); - boolean ret = false; + startFunction("write_column_statistics", ": table=" + TableName.getQualified( + colStats.getStatsDesc().getCatName(), colStats.getStatsDesc().getDbName(), + colStats.getStatsDesc().getTableName())); + boolean ret = false; try { - ret = getMS().updateTableColumnStatistics(colStats); - return ret; + ret = getMS().updateTableColumnStatistics(colStats, txnId, validWriteIds, writeId); } finally { - endFunction("write_column_statistics", ret != false, null, tableName); + endFunction("write_column_statistics", ret != false, null, + colStats.getStatsDesc().getTableName()); } + return ret; } - private boolean updatePartitonColStats(Table tbl, ColumnStatistics colStats) - throws MetaException, InvalidObjectException, NoSuchObjectException, InvalidInputException { - String catName; - String dbName; - String tableName; - String partName; - String colName; - + private void normalizeColStatsInput(ColumnStatistics colStats) throws MetaException { + // TODO: is this really needed? this code is propagated from HIVE-1362 but most of it is useless. ColumnStatisticsDesc statsDesc = colStats.getStatsDesc(); - catName = statsDesc.isSetCatName() ? statsDesc.getCatName().toLowerCase() : getDefaultCatalog(conf); - dbName = statsDesc.getDbName().toLowerCase(); - tableName = statsDesc.getTableName().toLowerCase(); - partName = lowerCaseConvertPartName(statsDesc.getPartName()); - - statsDesc.setCatName(catName); - statsDesc.setDbName(dbName); - statsDesc.setTableName(tableName); - statsDesc.setPartName(partName); - + statsDesc.setCatName(statsDesc.isSetCatName() ? statsDesc.getCatName().toLowerCase() : getDefaultCatalog(conf)); + statsDesc.setDbName(statsDesc.getDbName().toLowerCase()); + statsDesc.setTableName(statsDesc.getTableName().toLowerCase()); + statsDesc.setPartName(lowerCaseConvertPartName(statsDesc.getPartName())); long time = System.currentTimeMillis() / 1000; statsDesc.setLastAnalyzed(time); - List statsObjs = colStats.getStatsObj(); - - startFunction("write_partition_column_statistics", - ": db=" + dbName + " table=" + tableName - + " part=" + partName); - for (ColumnStatisticsObj statsObj:statsObjs) { - colName = statsObj.getColName().toLowerCase(); - statsObj.setColName(colName); + for (ColumnStatisticsObj statsObj : colStats.getStatsObj()) { + statsObj.setColName(statsObj.getColName().toLowerCase()); statsObj.setColType(statsObj.getColType().toLowerCase()); } - colStats.setStatsDesc(statsDesc); - colStats.setStatsObj(statsObjs); + colStats.setStatsObj(colStats.getStatsObj()); + } + + private boolean updatePartitonColStatsInternal(Table tbl, ColumnStatistics colStats, + long txnId, String validWriteIds, long writeId) + throws MetaException, InvalidObjectException, NoSuchObjectException, InvalidInputException { + normalizeColStatsInput(colStats); + + ColumnStatisticsDesc csd = colStats.getStatsDesc(); + String catName = csd.getCatName(), dbName = csd.getDbName(), tableName = csd.getTableName(); + startFunction("write_partition_column_statistics", ": db=" + dbName + " table=" + tableName + + " part=" + csd.getPartName()); boolean ret = false; @@ -5817,9 +5866,9 @@ private boolean updatePartitonColStats(Table tbl, ColumnStatistics colStats) if (tbl == null) { tbl = getTable(catName, dbName, tableName); } - List partVals = getPartValsFromName(tbl, partName); - ret = getMS().updatePartitionColumnStatistics(colStats, partVals); - return ret; + List partVals = getPartValsFromName(tbl, csd.getPartName()); + return getMS().updatePartitionColumnStatistics( + colStats, partVals, txnId, validWriteIds, writeId); } finally { endFunction("write_partition_column_statistics", ret != false, null, tableName); } @@ -5827,7 +5876,26 @@ private boolean updatePartitonColStats(Table tbl, ColumnStatistics colStats) @Override public boolean update_partition_column_statistics(ColumnStatistics colStats) throws TException { - return updatePartitonColStats(null, colStats); + // Deprecated API. + return updatePartitonColStatsInternal(null, colStats, -1, null, -1); + } + + + @Override + public SetPartitionsStatsResponse update_partition_column_statistics_req( + SetPartitionsStatsRequest req) throws NoSuchObjectException, + InvalidObjectException, MetaException, InvalidInputException, + TException { + if (req.getColStatsSize() != 1) { + throw new InvalidInputException("Only one stats object expected"); + } + if (req.isNeedMerge()) { + throw new InvalidInputException("Merge is not supported for non-aggregate stats"); + } + ColumnStatistics colStats = req.getColStatsIterator().next(); + boolean ret = updatePartitonColStatsInternal(null, colStats, + req.getTxnId(), req.getValidWriteIdList(), req.getWriteId()); + return new SetPartitionsStatsResponse(ret); } @Override @@ -5843,13 +5911,25 @@ public boolean delete_partition_column_statistics(String dbName, String tableNam startFunction("delete_column_statistics_by_partition",": table=" + TableName.getQualified(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName) + " partition=" + convertedPartName + " column=" + colName); - boolean ret = false; + boolean ret = false, committed = false; + getMS().openTransaction(); try { List partVals = getPartValsFromName(getMS(), parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, convertedPartName); + Table table = getMS().getTable(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName); + // This API looks unused; if it were used we'd need to update stats state and write ID. + // We cannot just randomly nuke some txn stats. + if (TxnUtils.isTransactionalTable(table)) { + throw new MetaException("Cannot delete stats via this API for a transactional table"); + } + ret = getMS().deletePartitionColumnStatistics(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, convertedPartName, partVals, colName); + committed = getMS().commitTransaction(); } finally { + if (!committed) { + getMS().rollbackTransaction(); + } endFunction("delete_column_statistics_by_partition", ret != false, null, tableName); } return ret; @@ -5870,10 +5950,23 @@ public boolean delete_table_column_statistics(String dbName, String tableName, S TableName.getQualified(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName) + " column=" + colName); - boolean ret = false; + + boolean ret = false, committed = false; + getMS().openTransaction(); try { + Table table = getMS().getTable(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName); + // This API looks unused; if it were used we'd need to update stats state and write ID. + // We cannot just randomly nuke some txn stats. + if (TxnUtils.isTransactionalTable(table)) { + throw new MetaException("Cannot delete stats via this API for a transactional table"); + } + ret = getMS().deleteTableColumnStatistics(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, colName); + committed = getMS().commitTransaction(); } finally { + if (!committed) { + getMS().rollbackTransaction(); + } endFunction("delete_column_statistics_by_table", ret != false, null, tableName); } return ret; @@ -7489,31 +7582,13 @@ public boolean set_aggr_stats_for(SetPartitionsStatsRequest request) throws TExc throw new MetaException( "Expecting only 1 ColumnStatistics for table's column stats, but find " + request.getColStatsSize()); + } + if (request.isSetNeedMerge() && request.isNeedMerge()) { + return updateTableColumnStatsWithMerge(catName, dbName, tableName, colNames, request); } else { - if (request.isSetNeedMerge() && request.isNeedMerge()) { - // one single call to get all column stats - ColumnStatistics csOld = - getMS().getTableColumnStatistics( - catName, dbName, tableName, colNames, - request.getTxnId(), request.getValidWriteIdList()); - Table t = getTable(catName, dbName, tableName); - // we first use t.getParameters() to prune the stats - MetaStoreUtils.getMergableCols(firstColStats, t.getParameters()); - // we merge those that can be merged - if (csOld != null && csOld.getStatsObjSize() != 0 - && !firstColStats.getStatsObj().isEmpty()) { - MetaStoreUtils.mergeColStats(firstColStats, csOld); - } - if (!firstColStats.getStatsObj().isEmpty()) { - return update_table_column_statistics(firstColStats); - } else { - LOG.debug("All the column stats are not accurate to merge."); - return true; - } - } else { - // This is the overwrite case, we do not care about the accuracy. - return update_table_column_statistics(firstColStats); - } + // This is the overwrite case, we do not care about the accuracy. + return updateTableColumnStatsInternal(firstColStats, request.getTxnId(), + request.getValidWriteIdList(), request.getWriteId()); } } else { // partition level column stats merging @@ -7529,54 +7604,151 @@ public boolean set_aggr_stats_for(SetPartitionsStatsRequest request) throws TExc newStatsMap.put(partName, csNew); } - Map oldStatsMap = new HashMap<>(); - Map mapToPart = new HashMap<>(); if (request.isSetNeedMerge() && request.isNeedMerge()) { - // a single call to get all column stats for all partitions - List partitionNames = new ArrayList<>(); - partitionNames.addAll(newStatsMap.keySet()); - List csOlds = - getMS().getPartitionColumnStatistics( - catName, dbName, tableName, partitionNames, colNames, - request.getTxnId(), request.getValidWriteIdList()); - if (newStatsMap.values().size() != csOlds.size()) { - // some of the partitions miss stats. - LOG.debug("Some of the partitions miss stats."); - } - for (ColumnStatistics csOld : csOlds) { - oldStatsMap.put(csOld.getStatsDesc().getPartName(), csOld); + ret = updatePartColumnStatsWithMerge(catName, dbName, tableName, + colNames, newStatsMap, request); + } else { // No merge. + Table t = getTable(catName, dbName, tableName); + for (Entry entry : newStatsMap.entrySet()) { + // We don't short-circuit on errors here anymore. That can leave acid stats invalid. + ret = updatePartitonColStatsInternal(t, entry.getValue(), request.getTxnId(), + request.getValidWriteIdList(), request.getWriteId()) && ret; } + } + } + return ret; + } - // another single call to get all the partition objects - partitions = getMS().getPartitionsByNames(catName, dbName, tableName, partitionNames); - for (int index = 0; index < partitionNames.size(); index++) { - mapToPart.put(partitionNames.get(index), partitions.get(index)); - } + private boolean updatePartColumnStatsWithMerge(String catName, String dbName, String tableName, + List colNames, Map newStatsMap, SetPartitionsStatsRequest request) + throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { + RawStore ms = getMS(); + ms.openTransaction(); + boolean isCommitted = false, result = false; + try { + // a single call to get all column stats for all partitions + List partitionNames = new ArrayList<>(); + partitionNames.addAll(newStatsMap.keySet()); + List csOlds = ms.getPartitionColumnStatistics(catName, dbName, tableName, + partitionNames, colNames, request.getTxnId(), request.getValidWriteIdList()); + if (newStatsMap.values().size() != csOlds.size()) { + // some of the partitions miss stats. + LOG.debug("Some of the partitions miss stats."); } - Table t = getTable(catName, dbName, tableName, - request.getTxnId(), request.getValidWriteIdList()); + Map oldStatsMap = new HashMap<>(); + for (ColumnStatistics csOld : csOlds) { + oldStatsMap.put(csOld.getStatsDesc().getPartName(), csOld); + } + + // another single call to get all the partition objects + List partitions = ms.getPartitionsByNames(catName, dbName, tableName, partitionNames); + Map mapToPart = new HashMap<>(); + for (int index = 0; index < partitionNames.size(); index++) { + mapToPart.put(partitionNames.get(index), partitions.get(index)); + } + + Table t = getTable(catName, dbName, tableName); for (Entry entry : newStatsMap.entrySet()) { ColumnStatistics csNew = entry.getValue(); ColumnStatistics csOld = oldStatsMap.get(entry.getKey()); - if (request.isSetNeedMerge() && request.isNeedMerge()) { + boolean isInvalidTxnStats = csOld != null + && csOld.isSetIsStatsCompliant() && !csOld.isIsStatsCompliant(); + Partition part = mapToPart.get(entry.getKey()); + if (isInvalidTxnStats) { + // No columns can be merged; a shortcut for getMergableCols. + csNew.setStatsObj(Lists.newArrayList()); + } else { // we first use getParameters() to prune the stats - MetaStoreUtils.getMergableCols(csNew, mapToPart.get(entry.getKey()).getParameters()); + MetaStoreUtils.getMergableCols(csNew, part.getParameters()); // we merge those that can be merged if (csOld != null && csOld.getStatsObjSize() != 0 && !csNew.getStatsObj().isEmpty()) { MetaStoreUtils.mergeColStats(csNew, csOld); } - if (!csNew.getStatsObj().isEmpty()) { - ret = ret && updatePartitonColStats(t, csNew); - } else { - LOG.debug("All the column stats " + csNew.getStatsDesc().getPartName() - + " are not accurate to merge."); - } + } + + if (!csNew.getStatsObj().isEmpty()) { + // We don't short-circuit on errors here anymore. That can leave acid stats invalid. + result = updatePartitonColStatsInternal(t, csNew, request.getTxnId(), + request.getValidWriteIdList(), request.getWriteId()) && result; + } else if (isInvalidTxnStats) { + // For now because the stats state is such as it is, we will invalidate everything. + // Overall the sematics here are not clear - we could invalide only some columns, but does + // that make any physical sense? Could query affect some columns but not others? + part.setWriteId(request.getWriteId()); + StatsSetupConst.clearColumnStatsState(part.getParameters()); + StatsSetupConst.setBasicStatsState(part.getParameters(), StatsSetupConst.FALSE); + ms.alterPartition(catName, dbName, tableName, part.getValues(), part, + request.getTxnId(), request.getValidWriteIdList()); + result = false; } else { - ret = ret && updatePartitonColStats(t, csNew); + // TODO: why doesn't the original call for non acid tables invalidate the stats? + LOG.debug("All the column stats " + csNew.getStatsDesc().getPartName() + + " are not accurate to merge."); } } + ms.commitTransaction(); + isCommitted = true; + } finally { + if (!isCommitted) { + ms.rollbackTransaction(); + } } - return ret; + return result; + } + + + private boolean updateTableColumnStatsWithMerge(String catName, String dbName, String tableName, + List colNames, SetPartitionsStatsRequest request) throws MetaException, + NoSuchObjectException, InvalidObjectException, InvalidInputException { + ColumnStatistics firstColStats = request.getColStats().get(0); + RawStore ms = getMS(); + ms.openTransaction(); + boolean isCommitted = false, result = false; + try { + ColumnStatistics csOld = ms.getTableColumnStatistics(catName, dbName, tableName, colNames, + request.getTxnId(), request.getValidWriteIdList()); + // we first use the valid stats list to prune the stats + boolean isInvalidTxnStats = csOld != null + && csOld.isSetIsStatsCompliant() && !csOld.isIsStatsCompliant(); + if (isInvalidTxnStats) { + // No columns can be merged; a shortcut for getMergableCols. + firstColStats.setStatsObj(Lists.newArrayList()); + } else { + Table t = getTable(catName, dbName, tableName); + MetaStoreUtils.getMergableCols(firstColStats, t.getParameters()); + + // we merge those that can be merged + if (csOld != null && csOld.getStatsObjSize() != 0 && !firstColStats.getStatsObj().isEmpty()) { + MetaStoreUtils.mergeColStats(firstColStats, csOld); + } + } + + if (!firstColStats.getStatsObj().isEmpty()) { + result = updateTableColumnStatsInternal(firstColStats, request.getTxnId(), + request.getValidWriteIdList(), request.getWriteId()); + } else if (isInvalidTxnStats) { + // For now because the stats state is such as it is, we will invalidate everything. + // Overall the sematics here are not clear - we could invalide only some columns, but does + // that make any physical sense? Could query affect some columns but not others? + Table t = getTable(catName, dbName, tableName); + t.setWriteId(request.getWriteId()); + StatsSetupConst.clearColumnStatsState(t.getParameters()); + StatsSetupConst.setBasicStatsState(t.getParameters(), StatsSetupConst.FALSE); + ms.alterTable(catName, dbName, tableName, t, request.getTxnId(), request.getValidWriteIdList()); + } else { + // TODO: why doesn't the original call for non acid tables invalidate the stats? + LOG.debug("All the column stats are not accurate to merge."); + result = true; + } + + ms.commitTransaction(); + isCommitted = true; + } finally { + if (!isCommitted) { + ms.rollbackTransaction(); + } + } + return result; } private Table getTable(String catName, String dbName, String tableName) diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index cc417eab3b0c557fc78f8ea71bdf54a83f82e716..38327d74bc4fb640be702f5827e4dcdab9da28fe 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -130,7 +130,7 @@ //copied from ErrorMsg.java private static final String REPL_EVENTS_MISSING_IN_METASTORE = "Notification events are missing in the meta store."; - + static final protected Logger LOG = LoggerFactory.getLogger(HiveMetaStoreClient.class); public HiveMetaStoreClient(Configuration conf) throws MetaException { @@ -404,15 +404,36 @@ public void alter_table_with_environmentContext(String dbname, String tbl_name, if (hook != null) { hook.preAlterTable(new_tbl, envContext); } - client.alter_table_with_environment_context(prependCatalogToDbName(dbname, conf), - tbl_name, new_tbl, envContext); + AlterTableRequest req = new AlterTableRequest(dbname, tbl_name, new_tbl); + req.setCatName(MetaStoreUtils.getDefaultCatalog(conf)); + req.setEnvironmentContext(envContext); + client.alter_table_req(req); } @Override public void alter_table(String catName, String dbName, String tblName, Table newTable, EnvironmentContext envContext) throws TException { - client.alter_table_with_environment_context(prependCatalogToDbName(catName, - dbName, conf), tblName, newTable, envContext); + // This never used to call the hook. Why? There's overload madness in metastore... + AlterTableRequest req = new AlterTableRequest(dbName, tblName, newTable); + req.setCatName(catName); + req.setEnvironmentContext(envContext); + client.alter_table_req(req); + } + + @Override + public void alter_table(String catName, String dbName, String tbl_name, Table new_tbl, + EnvironmentContext envContext, long txnId, String validWriteIds) + throws InvalidOperationException, MetaException, TException { + HiveMetaHook hook = getHook(new_tbl); + if (hook != null) { + hook.preAlterTable(new_tbl, envContext); + } + AlterTableRequest req = new AlterTableRequest(dbName, tbl_name, new_tbl); + req.setCatName(catName); + req.setTxnId(txnId); + req.setValidWriteIdList(validWriteIds); + req.setEnvironmentContext(envContext); + client.alter_table_req(req); } @Override @@ -1339,14 +1360,33 @@ public void dropTable(String catName, String dbname, String name, boolean delete } @Override + public void truncateTable(String dbName, String tableName, List partNames, + long txnId, String validWriteIds, long writeId) throws TException { + truncateTableInternal(getDefaultCatalog(conf), + dbName, tableName, partNames, txnId, validWriteIds, writeId); + } + + @Override public void truncateTable(String dbName, String tableName, List partNames) throws TException { - truncateTable(getDefaultCatalog(conf), dbName, tableName, partNames); + truncateTableInternal(getDefaultCatalog(conf), dbName, tableName, partNames, -1, null, -1); } @Override public void truncateTable(String catName, String dbName, String tableName, List partNames) throws TException { - client.truncate_table(prependCatalogToDbName(catName, dbName, conf), tableName, partNames); + truncateTableInternal(catName, dbName, tableName, partNames, -1, null, -1); + } + + private void truncateTableInternal(String catName, String dbName, String tableName, + List partNames, long txnId, String validWriteIds, long writeId) + throws MetaException, TException { + TruncateTableRequest req = new TruncateTableRequest( + prependCatalogToDbName(catName, dbName, conf), tableName); + req.setPartNames(partNames); + req.setTxnId(txnId); + req.setValidWriteIdList(validWriteIds); + req.setWriteId(writeId); + client.truncate_table_req(req); } /** @@ -1870,7 +1910,8 @@ public void alter_partition(String dbName, String tblName, Partition newPart) } @Override - public void alter_partition(String dbName, String tblName, Partition newPart, EnvironmentContext environmentContext) + public void alter_partition(String dbName, String tblName, Partition newPart, + EnvironmentContext environmentContext) throws InvalidOperationException, MetaException, TException { alter_partition(getDefaultCatalog(conf), dbName, tblName, newPart, environmentContext); } @@ -1878,11 +1919,26 @@ public void alter_partition(String dbName, String tblName, Partition newPart, En @Override public void alter_partition(String catName, String dbName, String tblName, Partition newPart, EnvironmentContext environmentContext) throws TException { - client.alter_partition_with_environment_context(prependCatalogToDbName(catName, dbName, conf), tblName, - newPart, environmentContext); + AlterPartitionsRequest req = new AlterPartitionsRequest(dbName, tblName, Lists.newArrayList(newPart)); + req.setCatName(catName); + req.setEnvironmentContext(environmentContext); + client.alter_partitions_req(req); } @Override + public void alter_partition(String dbName, String tblName, Partition newPart, + EnvironmentContext environmentContext, long txnId, String writeIdList) + throws InvalidOperationException, MetaException, TException { + AlterPartitionsRequest req = new AlterPartitionsRequest( + dbName, tblName, Lists.newArrayList(newPart)); + req.setEnvironmentContext(environmentContext); + req.setTxnId(txnId); + req.setValidWriteIdList(writeIdList); + client.alter_partitions_req(req); + } + + @Deprecated + @Override public void alter_partitions(String dbName, String tblName, List newParts) throws TException { alter_partitions( @@ -1901,8 +1957,6 @@ public void alter_partitions(String dbName, String tblName, List newP EnvironmentContext environmentContext, long txnId, String writeIdList, long writeId) throws InvalidOperationException, MetaException, TException { - //client.alter_partition_with_environment_context(getDefaultCatalog(conf), - // dbName, tblName, newParts, environmentContext); alter_partitions(getDefaultCatalog(conf), dbName, tblName, newParts, environmentContext, txnId, writeIdList, writeId); @@ -1914,14 +1968,15 @@ public void alter_partitions(String catName, String dbName, String tblName, EnvironmentContext environmentContext, long txnId, String writeIdList, long writeId) throws TException { AlterPartitionsRequest req = new AlterPartitionsRequest(); - req.setDbName(prependCatalogToDbName(catName, dbName, conf)); + req.setCatName(catName); + req.setDbName(dbName); req.setTableName(tblName); req.setPartitions(newParts); req.setEnvironmentContext(environmentContext); req.setTxnId(txnId); req.setValidWriteIdList(writeIdList); req.setWriteId(writeId); - client.alter_partitions_with_environment_context_req(req); + client.alter_partitions_req(req); } @Override @@ -2005,7 +2060,11 @@ public boolean updateTableColumnStatistics(ColumnStatistics statsObj) throws TEx if (!statsObj.getStatsDesc().isSetCatName()) { statsObj.getStatsDesc().setCatName(getDefaultCatalog(conf)); } - return client.update_table_column_statistics(statsObj); + // Note: currently this method doesn't set txn properties and thus won't work on txn tables. + SetPartitionsStatsRequest req = new SetPartitionsStatsRequest(); + req.addToColStats(statsObj); + req.setNeedMerge(false); + return client.update_table_column_statistics_req(req).isResult(); } @Override @@ -2013,7 +2072,11 @@ public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj) throws if (!statsObj.getStatsDesc().isSetCatName()) { statsObj.getStatsDesc().setCatName(getDefaultCatalog(conf)); } - return client.update_partition_column_statistics(statsObj); + // Note: currently this method doesn't set txn properties and thus won't work on txn tables. + SetPartitionsStatsRequest req = new SetPartitionsStatsRequest(); + req.addToColStats(statsObj); + req.setNeedMerge(false); + return client.update_partition_column_statistics_req(req).isResult(); } @Override @@ -3431,5 +3494,4 @@ public void addRuntimeStat(RuntimeStat stat) throws TException { req.setMaxCreateTime(maxCreateTime); return client.get_runtime_stats(req); } - } diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index 27d96e5f079f6b9aaf50945c50451654e14466f0..c328992bb9d26288e64339b868cf484e97750bcd 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -623,6 +623,9 @@ default void dropTable(String catName, String dbName, String tableName) */ void truncateTable(String dbName, String tableName, List partNames) throws MetaException, TException; + void truncateTable(String dbName, String tableName, List partNames, + long txnId, String validWriteIds, long writeId) throws TException; + /** * Truncate the table/partitions in the DEFAULT database. * @param catName catalog name @@ -1641,10 +1644,14 @@ void alter_table(String defaultDatabaseName, String tblName, Table table, * @throws MetaException something went wrong, usually in the RDBMS * @throws TException general thrift exception */ + @Deprecated void alter_table_with_environmentContext(String databaseName, String tblName, Table table, EnvironmentContext environmentContext) throws InvalidOperationException, MetaException, TException; + void alter_table(String catName, String databaseName, String tblName, Table table, + EnvironmentContext environmentContext, long txnId, String validWriteIdList) + throws InvalidOperationException, MetaException, TException; /** * Create a new database. * @param db database object. If the catalog name is null it will be assumed to be @@ -2049,6 +2056,7 @@ void alter_partition(String dbName, String tblName, Partition newPart) * @throws TException * if error in communicating with metastore server */ + @Deprecated default void alter_partition(String catName, String dbName, String tblName, Partition newPart) throws InvalidOperationException, MetaException, TException { alter_partition(catName, dbName, tblName, newPart, null); @@ -2070,9 +2078,15 @@ default void alter_partition(String catName, String dbName, String tblName, Part * @throws TException * if error in communicating with metastore server */ + @Deprecated void alter_partition(String dbName, String tblName, Partition newPart, EnvironmentContext environmentContext) throws InvalidOperationException, MetaException, TException; + + void alter_partition(String dbName, String tblName, Partition newPart, + EnvironmentContext environmentContext, long txnId, String writeIdList) + throws InvalidOperationException, MetaException, TException; + /** * updates a partition to new partition * @param catName catalog name. @@ -2109,6 +2123,7 @@ void alter_partition(String catName, String dbName, String tblName, Partition ne * @throws TException * if error in communicating with metastore server */ + @Deprecated void alter_partitions(String dbName, String tblName, List newParts) throws InvalidOperationException, MetaException, TException; @@ -2129,6 +2144,7 @@ void alter_partitions(String dbName, String tblName, List newParts) * @throws TException * if error in communicating with metastore server */ + @Deprecated void alter_partitions(String dbName, String tblName, List newParts, EnvironmentContext environmentContext) throws InvalidOperationException, MetaException, TException; @@ -2154,6 +2170,7 @@ void alter_partitions(String dbName, String tblName, List newParts, * @throws TException * if error in communicating with metastore server */ + @Deprecated default void alter_partitions(String catName, String dbName, String tblName, List newParts) throws InvalidOperationException, MetaException, TException { @@ -3736,5 +3753,4 @@ void createOrDropTriggerToPoolMapping(String resourcePlanName, String triggerNam /** Reads runtime statistics. */ List getRuntimeStats(int maxWeight, int maxCreateTime) throws TException; - } diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index f45b71fb1fb8ae286af0ca66f0ec9618a445000b..07be1bae843f63f107406392623ed199cbd28ba5 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -112,6 +112,7 @@ private static final Logger LOG = LoggerFactory.getLogger(MetaStoreDirectSql.class); private final PersistenceManager pm; + private final Configuration conf; private final String schema; /** @@ -146,8 +147,10 @@ SKEWED_COL_VALUE_LOC_MAP, COLUMNS_V2, PARTITION_KEYS, SERDE_PARAMS, PART_COL_STATS, KEY_CONSTRAINTS, TAB_COL_STATS, PARTITION_KEY_VALS, PART_PRIVS, PART_COL_PRIVS, SKEWED_STRING_LIST, CDS; + public MetaStoreDirectSql(PersistenceManager pm, Configuration conf, String schema) { this.pm = pm; + this.conf = conf; this.schema = schema; DatabaseProduct dbType = null; try { @@ -645,8 +648,8 @@ private boolean isViewTable(String catName, String dbName, String tblName) throw + " " + SERDES + ".\"SERDE_ID\", " + PARTITIONS + ".\"CREATE_TIME\"," + " " + PARTITIONS + ".\"LAST_ACCESS_TIME\", " + SDS + ".\"INPUT_FORMAT\", " + SDS + ".\"IS_COMPRESSED\"," + " " + SDS + ".\"IS_STOREDASSUBDIRECTORIES\", " + SDS + ".\"LOCATION\", " + SDS + ".\"NUM_BUCKETS\"," - + " " + SDS + ".\"OUTPUT_FORMAT\", " + SERDES + ".\"NAME\", " + SERDES + ".\"SLIB\" " - + "from " + PARTITIONS + "" + + " " + SDS + ".\"OUTPUT_FORMAT\", " + SERDES + ".\"NAME\", " + SERDES + ".\"SLIB\", " + PARTITIONS + + ".\"WRITE_ID\"" + " from " + PARTITIONS + "" + " left outer join " + SDS + " on " + PARTITIONS + ".\"SD_ID\" = " + SDS + ".\"SD_ID\" " + " left outer join " + SERDES + " on " + SDS + ".\"SERDE_ID\" = " + SERDES + ".\"SERDE_ID\" " + "where \"PART_ID\" in (" + partIds + ") order by \"PART_NAME\" asc"; @@ -697,8 +700,13 @@ private boolean isViewTable(String catName, String dbName, String tblName) throw part.setTableName(tblName); if (fields[4] != null) part.setCreateTime(extractSqlInt(fields[4])); if (fields[5] != null) part.setLastAccessTime(extractSqlInt(fields[5])); + Long writeId = extractSqlLong(fields[14]); + if (writeId != null) { + part.setWriteId(writeId); + } partitions.put(partitionId, part); + if (sdId == null) continue; // Probably a view. assert serdeId != null; @@ -747,6 +755,7 @@ private boolean isViewTable(String catName, String dbName, String tblName) throw serde.setSerializationLib((String)fields[13]); serdeSb.append(serdeId).append(","); sd.setSerdeInfo(serde); + Deadline.checkTimeout(); } query.closeAll(); @@ -2489,7 +2498,9 @@ public void dropPartitionsViaSqlFilter(final String catName, final String dbName */ private void dropPartitionsByPartitionIds(List partitionIdList) throws MetaException { String queryText; - + if (partitionIdList.isEmpty()) { + return; + } String partitionIds = getIdListForIn(partitionIdList); // Get the corresponding SD_ID-s, CD_ID-s, SERDE_ID-s @@ -2570,6 +2581,9 @@ private void dropPartitionsByPartitionIds(List partitionIdList) throws M * MetaException */ private void dropStorageDescriptors(List storageDescriptorIdList) throws MetaException { + if (storageDescriptorIdList.isEmpty()) { + return; + } String queryText; String sdIds = getIdListForIn(storageDescriptorIdList); @@ -2657,6 +2671,9 @@ private void dropStorageDescriptors(List storageDescriptorIdList) throws */ private void dropSerdes(List serdeIdList) throws MetaException { String queryText; + if (serdeIdList.isEmpty()) { + return; + } String serdeIds = getIdListForIn(serdeIdList); try { @@ -2683,6 +2700,9 @@ private void dropSerdes(List serdeIdList) throws MetaException { */ private void dropDanglingColumnDescriptors(List columnDescriptorIdList) throws MetaException { + if (columnDescriptorIdList.isEmpty()) { + return; + } String queryText; String colIds = getIdListForIn(columnDescriptorIdList); diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index aa29dd911345f8a1543697b710a3bf36990b3efb..b43e6f330850d92808a6b1832d1044de1498cae5 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -1448,7 +1448,7 @@ public Table getTable(String catName, String dbName, String tableName, if (tbl != null && TxnUtils.isTransactionalTable(tbl) && tbl.getPartitionKeysSize() == 0) { - if (isCurrentStatsValidForTheQuery(mtable, txnId, writeIdList)) { + if (isCurrentStatsValidForTheQuery(mtable, txnId, writeIdList, false)) { tbl.setIsStatsCompliant(true); } else { tbl.setIsStatsCompliant(false); @@ -1943,11 +1943,14 @@ private Table convertToTable(MTable mtbl) throws MetaException { t.setRewriteEnabled(mtbl.isRewriteEnabled()); t.setCatName(mtbl.getDatabase().getCatalogName()); + t.setWriteId(mtbl.getWriteId()); return t; } private MTable convertToMTable(Table tbl) throws InvalidObjectException, MetaException { + // NOTE: we don't set writeId in this method. Write ID is only set after validating the + // existing write ID against the caller's valid list. if (tbl == null) { return null; } @@ -1986,9 +1989,6 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, convertToMFieldSchemas(tbl.getPartitionKeys()), tbl.getParameters(), tbl.getViewOriginalText(), tbl.getViewExpandedText(), tbl.isRewriteEnabled(), tableType); - if (TxnUtils.isTransactionalTable(tbl)) { - mtable.setWriteId(tbl.getWriteId()); - } return mtable; } @@ -2450,21 +2450,24 @@ public Partition getPartition(String catName, String dbName, String tableName, + part_vals.toString()); } part.setValues(part_vals); + setPartitionStatsParam(part, table.getParameters(), mpart.getWriteId(), txnId, writeIdList); + return part; + } + + private void setPartitionStatsParam(Partition part, Map tableParams, + long partWriteId, long reqTxnId, String reqWriteIdList) throws MetaException { // If transactional table partition, check whether the current version partition // statistics in the metastore comply with the client query's snapshot isolation. - if (writeIdList != null) { - if (TxnUtils.isTransactionalTable(table.getParameters())) { - if (isCurrentStatsValidForTheQuery(mpart, txnId, writeIdList)) { - part.setIsStatsCompliant(true); - } else { - part.setIsStatsCompliant(false); - // Do not make persistent the following state since it is query specific (not global). - StatsSetupConst.setBasicStatsState(part.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from Partition object's parameters."); - } - } + if (reqWriteIdList == null) return; + if (!TxnUtils.isTransactionalTable(tableParams)) return; + if (isCurrentStatsValidForTheQuery(part, partWriteId, reqTxnId, reqWriteIdList, false)) { + part.setIsStatsCompliant(true); + } else { + part.setIsStatsCompliant(false); + // Do not make persistent the following state since it is query specific (not global). + StatsSetupConst.setBasicStatsState(part.getParameters(), StatsSetupConst.FALSE); + LOG.info("Removed COLUMN_STATS_ACCURATE from Partition object's parameters."); } - return part; } /** @@ -2570,6 +2573,8 @@ private MPartition getMPartition(String catName, String dbName, String tableName */ private MPartition convertToMPart(Partition part, MTable mt, boolean useTableCD) throws InvalidObjectException, MetaException { + // NOTE: we don't set writeId in this method. Write ID is only set after validating the + // existing write ID against the caller's valid list. if (part == null) { return null; } @@ -2597,9 +2602,6 @@ private MPartition convertToMPart(Partition part, MTable mt, boolean useTableCD) .getPartitionKeys()), part.getValues()), mt, part.getValues(), part .getCreateTime(), part.getLastAccessTime(), msd, part.getParameters()); - if (TxnUtils.isTransactionalTable(mt.getParameters())) { - mpart.setWriteId(part.getWriteId()); - } return mpart; } @@ -2612,6 +2614,7 @@ private Partition convertToPart(MPartition mpart) throws MetaException { mpart.getLastAccessTime(), convertToStorageDescriptor(mpart.getSd()), convertMap(mpart.getParameters())); p.setCatName(mpart.getTable().getDatabase().getCatalogName()); + p.setWriteId(mpart.getWriteId()); return p; } @@ -2624,6 +2627,7 @@ private Partition convertToPart(String catName, String dbName, String tblName, M mpart.getCreateTime(), mpart.getLastAccessTime(), convertToStorageDescriptor(mpart.getSd(), false), convertMap(mpart.getParameters())); p.setCatName(catName); + p.setWriteId(mpart.getWriteId()); return p; } @@ -4113,6 +4117,16 @@ public void alterTable(String catName, String dbname, String name, Table newTabl // For now only alter name, owner, parameters, cols, bucketcols are allowed oldt.setDatabase(newt.getDatabase()); oldt.setTableName(normalizeIdentifier(newt.getTableName())); + boolean isTxn = TxnUtils.isTransactionalTable(newTable); + if (isTxn) { + // Transactional table is altered without a txn. Make sure there are no changes to the flag. + String errorMsg = verifyStatsChangeCtx(oldt.getParameters(), newTable.getParameters(), + newTable.getWriteId(), queryValidWriteIds, false); + if (errorMsg != null) { + throw new MetaException(errorMsg); + } + } + boolean isToTxn = isTxn && !TxnUtils.isTransactionalTable(oldt.getParameters()); oldt.setParameters(newt.getParameters()); oldt.setOwner(newt.getOwner()); oldt.setOwnerType(newt.getOwnerType()); @@ -4135,13 +4149,16 @@ public void alterTable(String catName, String dbname, String name, Table newTabl // If transactional, update MTable to have txnId and the writeIdList // for the current Stats updater query. - if (TxnUtils.isTransactionalTable(newTable) && queryValidWriteIds != null) { + // Don't update for conversion to acid - it doesn't modify stats but passes in qVWIds. + // The fact that it doesn't update stats is verified above. + if (isTxn && queryValidWriteIds != null && (!isToTxn || newTable.getWriteId() > 0)) { // Check concurrent INSERT case and set false to the flag. - if (!isCurrentStatsValidForTheQuery(oldt, queryTxnId, queryValidWriteIds)) { + if (!isCurrentStatsValidForTheQuery(oldt, queryTxnId, queryValidWriteIds, true)) { StatsSetupConst.setBasicStatsState(oldt.getParameters(), StatsSetupConst.FALSE); LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the table " + dbname + "." + name + ". will be made persistent."); } + assert newTable.getWriteId() > 0; oldt.setWriteId(newTable.getWriteId()); } @@ -4154,6 +4171,32 @@ public void alterTable(String catName, String dbname, String name, Table newTabl } } + /** + * Verifies that the stats JSON string is unchanged for alter table (txn stats). + * @return Error message with the details of the change, or null if the value has not changed. + */ + private static String verifyStatsChangeCtx(Map oldP, Map newP, + long writeId, String validWriteIds, boolean isColStatsChange) { + if (validWriteIds != null && writeId > 0) return null; // We have txn context. + String oldVal = oldP == null ? null : oldP.get(StatsSetupConst.COLUMN_STATS_ACCURATE); + String newVal = newP == null ? null : newP.get(StatsSetupConst.COLUMN_STATS_ACCURATE); + // We don't need txn context is that stats state is not being changed. + if (StringUtils.isEmpty(oldVal) && StringUtils.isEmpty(newVal)) return null; + if (StringUtils.equalsIgnoreCase(oldVal, newVal)) { + if (!isColStatsChange) return null; // No change in col stats or parameters => assume no change. + // Col stats change while json stays "valid" implies stats change. If the new value is invalid, + // then we don't care. This is super ugly and idiotic. + // It will all become better when we get rid of JSON and store a flag and write ID per stats. + if (!StatsSetupConst.areBasicStatsUptoDate(newP)) return null; + } + // Some change to the stats state is being made; it can only be made with a write ID. + // Note - we could do this: if (writeId > 0 && (validWriteIds != null || !StatsSetupConst.areBasicStatsUptoDate(newP))) { return null; + // However the only way ID list can be absent is if WriteEntity wasn't generated for the alter, which is a separate bug. + return "Cannot change stats state for a transactional table without providing the transactional" + + " write state for verification (new write ID " + writeId + ", valid write IDs " + + validWriteIds + "; current state " + oldVal + "; new state " + newVal; + } + @Override public void updateCreationMetadata(String catName, String dbname, String tablename, CreationMetadata cm) throws MetaException { @@ -4210,6 +4253,15 @@ private MColumnDescriptor alterPartitionNoTxn(String catName, String dbname, Str } oldp.setValues(newp.getValues()); oldp.setPartitionName(newp.getPartitionName()); + boolean isTxn = TxnUtils.isTransactionalTable(table.getParameters()); + if (isTxn) { + // Transactional table is altered without a txn. Make sure there are no changes to the flag. + String errorMsg = verifyStatsChangeCtx(oldp.getParameters(), newPart.getParameters(), + newPart.getWriteId(), queryValidWriteIds, false); + if (errorMsg != null) { + throw new MetaException(errorMsg); + } + } oldp.setParameters(newPart.getParameters()); if (!TableType.VIRTUAL_VIEW.name().equals(oldp.getTable().getTableType())) { copyMSD(newp.getSd(), oldp.getSd()); @@ -4223,15 +4275,16 @@ private MColumnDescriptor alterPartitionNoTxn(String catName, String dbname, Str // If transactional, add/update the MUPdaterTransaction // for the current updater query. - if (queryValidWriteIds != null && TxnUtils.isTransactionalTable(table.getParameters())) { + if (isTxn && queryValidWriteIds != null && newPart.getWriteId() > 0) { // Check concurrent INSERT case and set false to the flag. - if (!isCurrentStatsValidForTheQuery(oldp, queryTxnId, queryValidWriteIds)) { + if (!isCurrentStatsValidForTheQuery(oldp, queryTxnId, queryValidWriteIds, true)) { StatsSetupConst.setBasicStatsState(oldp.getParameters(), StatsSetupConst.FALSE); LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the partition " + dbname + "." + name + "." + oldp.getPartitionName() + " will be made persistent."); } oldp.setWriteId(newPart.getWriteId()); } + return oldCD; } @@ -4239,7 +4292,7 @@ private MColumnDescriptor alterPartitionNoTxn(String catName, String dbname, Str public void alterPartition(String catName, String dbname, String name, List part_vals, Partition newPart, long queryTxnId, String queryValidWriteIds) throws InvalidObjectException, MetaException { boolean success = false; - Exception e = null; + Throwable e = null; try { openTransaction(); if (newPart.isSetWriteId()) { @@ -4250,7 +4303,8 @@ public void alterPartition(String catName, String dbname, String name, List parameters = table.getParameters(); - StatsSetupConst.setColumnStatsState(parameters, colNames); - oldt.setParameters(parameters); + Map newParams = new HashMap<>(table.getParameters()); + StatsSetupConst.setColumnStatsState(newParams, colNames); + boolean isTxn = TxnUtils.isTransactionalTable(oldt.getParameters()); + if (isTxn) { + String errorMsg = verifyStatsChangeCtx( + oldt.getParameters(), newParams, writeId, validWriteIds, true); + if (errorMsg != null) { + throw new MetaException(errorMsg); + } + if (!isCurrentStatsValidForTheQuery(oldt, txnId, validWriteIds, true)) { + // Make sure we set the flag to invalid regardless of the current value. + StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE); + LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the table " + + dbname + "." + name); + } + oldt.setWriteId(writeId); + } + oldt.setParameters(newParams); committed = commitTransaction(); return committed; @@ -8427,8 +8499,9 @@ public boolean updateTableColumnStatistics(ColumnStatistics colStats) } @Override - public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List partVals) - throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { + public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List partVals, + long txnId, String validWriteIds, long writeId) + throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { boolean committed = false; try { @@ -8460,9 +8533,26 @@ public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List parameters = mPartition.getParameters(); - StatsSetupConst.setColumnStatsState(parameters, colNames); - mPartition.setParameters(parameters); + // TODO## ideally the col stats stats should be in colstats, not in the partition! + Map newParams = new HashMap<>(mPartition.getParameters()); + StatsSetupConst.setColumnStatsState(newParams, colNames); + boolean isTxn = TxnUtils.isTransactionalTable(table); + if (isTxn) { + String errorMsg = verifyStatsChangeCtx( + mPartition.getParameters(), newParams, writeId, validWriteIds, true); + if (errorMsg != null) { + throw new MetaException(errorMsg); + } + if (!isCurrentStatsValidForTheQuery(mPartition, txnId, validWriteIds, true)) { + // Make sure we set the flag to invalid regardless of the current value. + StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE); + LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the partition " + + statsDesc.getDbName() + "." + statsDesc.getTableName() + "." + statsDesc.getPartName()); + } + mPartition.setWriteId(writeId); + } + + mPartition.setParameters(newParams); committed = commitTransaction(); return committed; } finally { @@ -8565,19 +8655,20 @@ public ColumnStatistics getTableColumnStatistics( List colNames, long txnId, String writeIdList) throws MetaException, NoSuchObjectException { - Boolean iLL = null; // If the current stats in the metastore doesn't comply with // the isolation level of the query, set No to the compliance flag. + Boolean isCompliant = null; if (writeIdList != null) { MTable table = this.getMTable(catName, dbName, tableName); - iLL = isCurrentStatsValidForTheQuery(table, txnId, writeIdList); + isCompliant = !TxnUtils.isTransactionalTable(table.getParameters()) + || isCurrentStatsValidForTheQuery(table, txnId, writeIdList, false); } - ColumnStatistics cS = getTableColumnStatisticsInternal( + ColumnStatistics stats = getTableColumnStatisticsInternal( catName, dbName, tableName, colNames, true, true); - if (cS != null && iLL != null) { - cS.setIsStatsCompliant(iLL); + if (stats != null && isCompliant != null) { + stats.setIsStatsCompliant(isCompliant); } - return cS; + return stats; } protected ColumnStatistics getTableColumnStatisticsInternal( @@ -8634,30 +8725,31 @@ protected ColumnStatistics getJdoResult( List partNames, List colNames, long txnId, String writeIdList) throws MetaException, NoSuchObjectException { - - // If any of the current partition stats in the metastore doesn't comply with - // the isolation level of the query, return null. + if (partNames == null && partNames.isEmpty()) { + LOG.warn("The given partNames does not have any name."); + return null; + } + List allStats = getPartitionColumnStatisticsInternal( + catName, dbName, tableName, partNames, colNames, true, true); if (writeIdList != null) { - if (partNames == null && partNames.isEmpty()) { - LOG.warn("The given partNames does not have any name."); - return null; - } - // TODO## this is not correct; stats updater patch will fix it to return stats for valid partitions, - // and no stats for invalid. Remove this comment when merging that patch. - // Loop through the given "partNames" list - // checking isolation-level-compliance of each partition column stats. - for(String partName : partNames) { - MPartition mpart = getMPartition(catName, dbName, tableName, Warehouse.getPartValuesFromPartName(partName)); - if (!isCurrentStatsValidForTheQuery(mpart, txnId, writeIdList)) { - LOG.debug("The current metastore transactional partition column statistics for {}.{}.{} " - + "(write ID {}) are not valid for current query ({} {})", dbName, tableName, - mpart.getPartitionName(), mpart.getWriteId(), txnId, writeIdList); - return Lists.newArrayList(); + // TODO## this could be improved to get partitions in bulk + for (ColumnStatistics cs : allStats) { + MPartition mpart = getMPartition(catName, dbName, tableName, + Warehouse.getPartValuesFromPartName(cs.getStatsDesc().getPartName())); + if (mpart == null + || !isCurrentStatsValidForTheQuery(mpart, txnId, writeIdList, false)) { + if (mpart != null) { + LOG.debug("The current metastore transactional partition column statistics for {}.{}.{} " + + "(write ID {}) are not valid for current query ({} {})", dbName, tableName, + mpart.getPartitionName(), mpart.getWriteId(), txnId, writeIdList); + } + cs.setIsStatsCompliant(false); + } else { + cs.setIsStatsCompliant(true); } } } - return getPartitionColumnStatisticsInternal( - catName, dbName, tableName, partNames, colNames, true, true); + return allStats; } protected List getPartitionColumnStatisticsInternal( @@ -8726,7 +8818,7 @@ public AggrStats get_aggr_stats_for(String catName, String dbName, String tblNam // checking isolation-level-compliance of each partition column stats. for(String partName : partNames) { MPartition mpart = getMPartition(catName, dbName, tblName, Warehouse.getPartValuesFromPartName(partName)); - if (!isCurrentStatsValidForTheQuery(mpart, txnId, writeIdList)) { + if (!isCurrentStatsValidForTheQuery(mpart, txnId, writeIdList, false)) { LOG.debug("The current metastore transactional partition column statistics " + "for " + dbName + "." + tblName + "." + mpart.getPartitionName() + " is not valid " + "for the current query."); @@ -8891,6 +8983,8 @@ public boolean deletePartitionColumnStatistics(String catName, String dbName, St throw new NoSuchObjectException("Table " + tableName + " for which stats deletion is requested doesn't exist"); } + // Note: this does not verify ACID state; called internally when removing cols/etc. + // Also called via an unused metastore API that checks for ACID tables. MPartition mPartition = getMPartition(catName, dbName, tableName, partVals); if (mPartition == null) { throw new NoSuchObjectException("Partition " + partName @@ -8973,6 +9067,8 @@ public boolean deleteTableColumnStatistics(String catName, String dbName, String TableName.getQualified(catName, dbName, tableName) + " for which stats deletion is requested doesn't exist"); } + // Note: this does not verify ACID state; called internally when removing cols/etc. + // Also called via an unused metastore API that checks for ACID tables. query = pm.newQuery(MTableColumnStatistics.class); String filter; String parameters; @@ -12305,10 +12401,10 @@ public int deleteRuntimeStats(int maxRetainSecs) throws MetaException { * @param queryWriteId writeId of the query * @Precondition "tbl" should be retrieved from the TBLS table. */ - private boolean isCurrentStatsValidForTheQuery( - MTable tbl, long queryTxnId, String queryValidWriteIdList) throws MetaException { - return isCurrentStatsValidForTheQuery(tbl.getDatabase().getName(), tbl.getTableName(), - tbl.getParameters(), tbl.getWriteId(), queryTxnId, queryValidWriteIdList); + private boolean isCurrentStatsValidForTheQuery(MTable tbl, long queryTxnId, String queryValidWriteIdList, + boolean isCompleteStatsWriter) throws MetaException { + return isCurrentStatsValidForTheQuery(conf, tbl.getDatabase().getName(), tbl.getTableName(), + tbl.getParameters(), tbl.getWriteId(), queryTxnId, queryValidWriteIdList, isCompleteStatsWriter); } /** @@ -12325,30 +12421,39 @@ private boolean isCurrentStatsValidForTheQuery( * @param queryValidWriteIdList valid writeId list of the query * @Precondition "part" should be retrieved from the PARTITIONS table. */ - private boolean isCurrentStatsValidForTheQuery( - MPartition part, long queryTxnId, String queryValidWriteIdList) + private boolean isCurrentStatsValidForTheQuery(MPartition part, long queryTxnId, + String queryValidWriteIdList, boolean isCompleteStatsWriter) throws MetaException { - return isCurrentStatsValidForTheQuery(part.getTable().getDatabase().getName(), + return isCurrentStatsValidForTheQuery(conf, part.getTable().getDatabase().getName(), part.getTable().getTableName(), part.getParameters(), part.getWriteId(), - queryTxnId, queryValidWriteIdList); + queryTxnId, queryValidWriteIdList, isCompleteStatsWriter); + } + + private boolean isCurrentStatsValidForTheQuery(Partition part, long partWriteId, long queryTxnId, + String queryValidWriteIdList, boolean isCompleteStatsWriter) + throws MetaException { + return isCurrentStatsValidForTheQuery(conf, part.getDbName(), part.getTableName(), + part.getParameters(), partWriteId, queryTxnId, queryValidWriteIdList, isCompleteStatsWriter); } - private boolean isCurrentStatsValidForTheQuery(String dbName, String tblName, - Map statsParams, long statsWriteId, long queryTxnId, - String queryValidWriteIdList) throws MetaException { + // TODO: move to somewhere else + public static boolean isCurrentStatsValidForTheQuery(Configuration conf, String dbName, + String tblName, Map statsParams, long statsWriteId, long queryTxnId, + String queryValidWriteIdList, boolean isCompleteStatsWriter) throws MetaException { // Note: can be changed to debug/info to verify the calls. - LOG.trace("Called with stats write ID {}; query {}, {}; params {}", - statsWriteId, queryTxnId, queryValidWriteIdList, statsParams); - // if statsWriteIdList is null, + // TODO## change this to debug when merging + LOG.info("isCurrentStatsValidForTheQuery with stats write ID {}; query {}, {}; writer: {} params {}", + statsWriteId, queryTxnId, queryValidWriteIdList, isCompleteStatsWriter, statsParams); // return true since the stats does not seem to be transactional. + // stats write ID 1; query 2, default.stats_part:1:9223372036854775807::; if (statsWriteId < 1) { return true; } // This COLUMN_STATS_ACCURATE(CSA) state checking also includes the case that the stats is // written by an aborted transaction but TXNS has no entry for the transaction - // after compaction. - if (!StatsSetupConst.areBasicStatsUptoDate(statsParams)) { + // after compaction. Don't check for a complete stats writer - it may replace invalid stats. + if (!isCompleteStatsWriter && !StatsSetupConst.areBasicStatsUptoDate(statsParams)) { return false; } @@ -12359,12 +12464,22 @@ private boolean isCurrentStatsValidForTheQuery(String dbName, String tblName, return true; } - ValidWriteIdList list4TheQuery = new ValidReaderWriteIdList(queryValidWriteIdList); - // Just check if the write ID is valid. If it's valid (i.e. we are allowed to see it), - // that means it cannot possibly be a concurrent write. If it's not valid (we are not - // allowed to see it), that means it's either concurrent or aborted, same thing for us. - if (list4TheQuery.isWriteIdValid(statsWriteId)) { - return true; + if (queryValidWriteIdList != null) { // Can be null when stats are being reset to invalid. + ValidWriteIdList list4TheQuery = new ValidReaderWriteIdList(queryValidWriteIdList); + // Just check if the write ID is valid. If it's valid (i.e. we are allowed to see it), + // that means it cannot possibly be a concurrent write. If it's not valid (we are not + // allowed to see it), that means it's either concurrent or aborted, same thing for us. + if (list4TheQuery.isWriteIdValid(statsWriteId)) { + return true; + } + // Updater is also allowed to overwrite stats from aborted txns, as long as they are not concurrent. + if (isCompleteStatsWriter && list4TheQuery.isWriteIdAborted(statsWriteId)) { + return true; + } + } + + if (queryTxnId < 1) { + return false; // The caller is outside of a txn; no need to check the same-txn case. } // This assumes that all writes within the same txn are sequential and can see each other. diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java index 681e1e544a135fc1f57448365e406e7aa2a62ddd..95e84453cd8e57b6ed4d6515cb7928336fea27d7 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -864,7 +864,7 @@ Partition getPartitionWithAuth(String catName, String dbName, String tblName, * @throws InvalidObjectException the stats object is invalid * @throws InvalidInputException unable to record the stats for the table */ - boolean updateTableColumnStatistics(ColumnStatistics colStats) + boolean updateTableColumnStatistics(ColumnStatistics colStats, long txnId, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException; /** Persists the given column statistics object to the metastore @@ -875,9 +875,10 @@ boolean updateTableColumnStatistics(ColumnStatistics colStats) * @throws MetaException error accessing the RDBMS. * @throws InvalidObjectException the stats object is invalid * @throws InvalidInputException unable to record the stats for the table + * @throws TException */ boolean updatePartitionColumnStatistics(ColumnStatistics statsObj, - List partVals) + List partVals, long txnId, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException; /** diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 8539605e0f5b17dc1b3ba64a0be2982ab24ac8e5..9bee0dbd0f428c3bd5866f10a7a37de6c1046467 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -547,6 +547,7 @@ private void updateTableColStats(RawStore rawStore, String catName, String dbNam if (!table.isSetPartitionKeys()) { List colNames = MetaStoreUtils.getColumnNamesForTable(table); Deadline.startTimer("getTableColumnStatistics"); + // TODO## should this take write ID into account? or at least cache write ID to verify? ColumnStatistics tableColStats = rawStore.getTableColumnStatistics(catName, dbName, tblName, colNames); Deadline.stopTimer(); @@ -1598,9 +1599,9 @@ public Partition getPartitionWithAuth(String catName, String dbName, String tblN } @Override - public boolean updateTableColumnStatistics(ColumnStatistics colStats) + public boolean updateTableColumnStatistics(ColumnStatistics colStats, long txnId, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { - boolean succ = rawStore.updateTableColumnStatistics(colStats); + boolean succ = rawStore.updateTableColumnStatistics(colStats, txnId, validWriteIds, writeId); if (succ) { String catName = colStats.getStatsDesc().isSetCatName() ? normalizeIdentifier(colStats.getStatsDesc().getCatName()) : @@ -1676,9 +1677,10 @@ public boolean deleteTableColumnStatistics(String catName, String dbName, String } @Override - public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List partVals) + public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List partVals, + long txnId, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { - boolean succ = rawStore.updatePartitionColumnStatistics(colStats, partVals); + boolean succ = rawStore.updatePartitionColumnStatistics(colStats, partVals, txnId, validWriteIds, writeId); if (succ) { String catName = colStats.getStatsDesc().isSetCatName() ? normalizeIdentifier(colStats.getStatsDesc().getCatName()) : DEFAULT_CATALOG_NAME; diff --git standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift index 1b4f01a715e4e3e3d78e07a47dabe80e95340cba..a357030a0cd1b774f6edd7450143334389ec6a98 100644 --- standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift +++ standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift @@ -278,6 +278,18 @@ struct GrantRevokePrivilegeResponse { 1: optional bool success; } +struct TruncateTableRequest { + 1: required string dbName, + 2: required string tableName, + 3: optional list partNames, + 4: optional i64 txnId=-1, + 5: optional i64 writeId=-1, + 6: optional string validWriteIdList +} + +struct TruncateTableResponse { +} + struct Role { 1: string roleName, 2: i32 createTime, @@ -571,9 +583,7 @@ struct ColumnStatisticsDesc { struct ColumnStatistics { 1: required ColumnStatisticsDesc statsDesc, 2: required list statsObj, -3: optional i64 txnId=-1, // transaction id of the query that sends this structure TODO## needed? -4: optional string validWriteIdList, // valid write id list for the table for which this struct is being sent -5: optional bool isStatsCompliant // Are the stats isolation-level-compliant with the +3: optional bool isStatsCompliant // Are the stats isolation-level-compliant with the // the calling query? } @@ -591,6 +601,10 @@ struct SetPartitionsStatsRequest { 5: optional string validWriteIdList // valid write id list for the table for which this struct is being sent } +struct SetPartitionsStatsResponse { +1: required bool result; +} + // schema of the table/query results etc. struct Schema { // column names, types, comments @@ -1598,18 +1612,34 @@ struct GetRuntimeStatsRequest { } struct AlterPartitionsRequest { - 1: required string dbName, - 2: required string tableName, - 3: required list partitions, - 4: required EnvironmentContext environmentContext, - 5: optional i64 txnId=-1, - 6: optional i64 writeId=-1, - 7: optional string validWriteIdList + 1: optional string catName, + 2: required string dbName, + 3: required string tableName, + 4: required list partitions, + 5: optional EnvironmentContext environmentContext, + 6: optional i64 txnId=-1, + 7: optional i64 writeId=-1, + 8: optional string validWriteIdList } struct AlterPartitionsResponse { } +struct AlterTableRequest { + 1: optional string catName, + 2: required string dbName, + 3: required string tableName, + 4: required Table table, + 5: optional EnvironmentContext environmentContext, + 6: optional i64 txnId=-1, + 7: optional i64 writeId=-1, + 8: optional string validWriteIdList +// TODO: also add cascade here, out of envCtx +} + +struct AlterTableResponse { +} + // Exceptions. exception MetaException { @@ -1754,6 +1784,7 @@ service ThriftHiveMetastore extends fb303.FacebookService throws(1:NoSuchObjectException o1, 2:MetaException o3) void truncate_table(1:string dbName, 2:string tableName, 3:list partNames) throws(1:MetaException o1) + TruncateTableResponse truncate_table_req(1:TruncateTableRequest req) throws(1:MetaException o1) list get_tables(1: string db_name, 2: string pattern) throws (1: MetaException o1) list get_tables_by_type(1: string db_name, 2: string pattern, 3: string tableType) throws (1: MetaException o1) list get_materialized_views_for_rewriting(1: string db_name) throws (1: MetaException o1) @@ -1819,6 +1850,11 @@ service ThriftHiveMetastore extends fb303.FacebookService // alter table not only applies to future partitions but also cascade to existing partitions void alter_table_with_cascade(1:string dbname, 2:string tbl_name, 3:Table new_tbl, 4:bool cascade) throws (1:InvalidOperationException o1, 2:MetaException o2) + AlterTableResponse alter_table_req(1:AlterTableRequest req) + throws (1:InvalidOperationException o1, 2:MetaException o2) + + + // the following applies to only tables that have partitions // * See notes on DDL_TIME Partition add_partition(1:Partition new_part) @@ -1943,7 +1979,7 @@ service ThriftHiveMetastore extends fb303.FacebookService void alter_partitions_with_environment_context(1:string db_name, 2:string tbl_name, 3:list new_parts, 4:EnvironmentContext environment_context) throws (1:InvalidOperationException o1, 2:MetaException o2) - AlterPartitionsResponse alter_partitions_with_environment_context_req(1:AlterPartitionsRequest req) + AlterPartitionsResponse alter_partitions_req(1:AlterPartitionsRequest req) throws (1:InvalidOperationException o1, 2:MetaException o2) void alter_partition_with_environment_context(1:string db_name, @@ -2012,6 +2048,12 @@ service ThriftHiveMetastore extends fb303.FacebookService bool update_partition_column_statistics(1:ColumnStatistics stats_obj) throws (1:NoSuchObjectException o1, 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4) + SetPartitionsStatsResponse update_table_column_statistics_req(1:SetPartitionsStatsRequest req) throws (1:NoSuchObjectException o1, + 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4) + SetPartitionsStatsResponse update_partition_column_statistics_req(1:SetPartitionsStatsRequest req) throws (1:NoSuchObjectException o1, + 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4) + + // get APIs return the column statistics corresponding to db_name, tbl_name, [part_name], col_name if // such statistics exists. If the required statistics doesn't exist, get APIs throw NoSuchObjectException // For instance, if get_table_column_statistics is called on a partitioned table for which only diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index 106d9f21d75d129164c73e0a2788c41e2e69eb16..a5e6918a8225fe95831356b1d8f467debd53c03d 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -694,18 +694,18 @@ public boolean deletePartitionColumnStatistics(String catName, String dbName, St } @Override - public boolean updateTableColumnStatistics(ColumnStatistics statsObj) + public boolean updateTableColumnStatistics(ColumnStatistics statsObj, long txnId, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { - return objectStore.updateTableColumnStatistics(statsObj); + return objectStore.updateTableColumnStatistics(statsObj, txnId, validWriteIds, writeId); } @Override public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj, - List partVals) + List partVals, long txnId, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { - return objectStore.updatePartitionColumnStatistics(statsObj, partVals); + return objectStore.updatePartitionColumnStatistics(statsObj, partVals, txnId, validWriteIds, writeId); } @Override diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index 2587a98130dd3e2e1f4c7230bdd26b5e369267d6..8270f6a3dd600fbcd273a985c98648f50698a500 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -743,13 +743,15 @@ public boolean deletePartitionColumnStatistics(String catName, String dbName, St } @Override - public boolean updateTableColumnStatistics(ColumnStatistics statsObj) + public boolean updateTableColumnStatistics(ColumnStatistics statsObj, + long txnId, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException { return false; } @Override - public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj,List partVals) + public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj,List partVals, + long txnId, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException { return false; } diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java index 6ef416f101f40f64f60e9c7c29d42da485756d0c..868a546bc638b968cf5f48fa47a6c47b0d455007 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java @@ -1635,13 +1635,7 @@ public void alter_partitions(String dbName, String tblName, List newP req.setTableName(tblName); req.setPartitions(newParts); req.setEnvironmentContext(environmentContext); - // TODO: this is ugly... account for ability to pass via EC for the old API. - if (environmentContext != null && environmentContext.isSetProperties() - && environmentContext.getProperties().containsKey(StatsSetupConst.VALID_WRITE_IDS)) { - req.setTxnId(Long.parseLong(environmentContext.getProperties().get(StatsSetupConst.TXN_ID))); - req.setValidWriteIdList(environmentContext.getProperties().get(StatsSetupConst.VALID_WRITE_IDS)); - } - client.alter_partitions_with_environment_context_req(req); + client.alter_partitions_req(req); } @Override @@ -1656,7 +1650,7 @@ public void alter_partitions(String dbName, String tblName, List newP req.setEnvironmentContext(environmentContext); req.setTxnId(txnId); req.setValidWriteIdList(writeIdList); - client.alter_partitions_with_environment_context_req(req); + client.alter_partitions_req(req); } @Override @@ -3532,4 +3526,24 @@ public void addRuntimeStat(RuntimeStat stat) throws TException { throw new UnsupportedOperationException(); } + @Override + public void alter_table(String catName, String databaseName, String tblName, Table table, + EnvironmentContext environmentContext, long txnId, String validWriteIdList) + throws InvalidOperationException, MetaException, TException { + throw new UnsupportedOperationException(); + } + + @Override + public void alter_partition(String dbName, String tblName, Partition newPart, + EnvironmentContext environmentContext, long txnId, String writeIdList) + throws InvalidOperationException, MetaException, TException { + throw new UnsupportedOperationException(); + } + + @Override + public void truncateTable(String dbName, String tableName, + List partNames, long txnId, String validWriteIds, long writeId) + throws TException { + throw new UnsupportedOperationException(); + } } diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestHiveAlterHandler.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestHiveAlterHandler.java index d9dd954f7e791008fec9a460712802541e1e3109..533cabb5a3c21102f9daf1eb3a6bdb7244b2291b 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestHiveAlterHandler.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestHiveAlterHandler.java @@ -60,7 +60,7 @@ public void testAlterTableAddColNotUpdateStats() throws MetaException, InvalidOb getDefaultCatalog(conf), oldTable.getDbName(), oldTable.getTableName(), Arrays.asList("col1", "col2", "col3")); HiveAlterHandler handler = new HiveAlterHandler(); handler.setConf(conf); - handler.alterTableUpdateTableColumnStats(msdb, oldTable, newTable, null); + handler.alterTableUpdateTableColumnStats(msdb, oldTable, newTable, null, -1, null); } @Test @@ -85,7 +85,7 @@ public void testAlterTableDelColUpdateStats() throws MetaException, InvalidObjec RawStore msdb = Mockito.mock(RawStore.class); HiveAlterHandler handler = new HiveAlterHandler(); handler.setConf(conf); - handler.alterTableUpdateTableColumnStats(msdb, oldTable, newTable, null); + handler.alterTableUpdateTableColumnStats(msdb, oldTable, newTable, null, -1, null); Mockito.verify(msdb, Mockito.times(1)).getTableColumnStatistics( getDefaultCatalog(conf), oldTable.getDbName(), oldTable.getTableName(), Arrays.asList("col1", "col2", "col3", "col4") ); @@ -115,7 +115,7 @@ public void testAlterTableChangePosNotUpdateStats() throws MetaException, Invali getDefaultCatalog(conf), oldTable.getDbName(), oldTable.getTableName(), Arrays.asList("col1", "col2", "col3", "col4")); HiveAlterHandler handler = new HiveAlterHandler(); handler.setConf(conf); - handler.alterTableUpdateTableColumnStats(msdb, oldTable, newTable, null); + handler.alterTableUpdateTableColumnStats(msdb, oldTable, newTable, null, -1, null); } } diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index cb32236d548ecff58fd190f1f4df8da45a42fa1a..75ab80b439cdd42fc3dfc1aa5c9f8cb692316b97 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -244,15 +244,13 @@ private static void partitionTester(HiveMetaStoreClient client, Configuration co assertNotNull("Unable to create partition " + part4, retp4); Partition part_get = client.getPartition(dbName, tblName, part.getValues()); - if(isThriftClient) { - // since we are using thrift, 'part' will not have the create time and - // last DDL time set since it does not get updated in the add_partition() - // call - likewise part2 and part3 - set it correctly so that equals check - // doesn't fail - adjust(client, part, dbName, tblName); - adjust(client, part2, dbName, tblName); - adjust(client, part3, dbName, tblName); - } + // since we are using thrift, 'part' will not have the create time and + // last DDL time set since it does not get updated in the add_partition() + // call - likewise part2 and part3 - set it correctly so that equals check + // doesn't fail + adjust(client, part, dbName, tblName, isThriftClient); + adjust(client, part2, dbName, tblName, isThriftClient); + adjust(client, part3, dbName, tblName, isThriftClient); assertTrue("Partitions are not same", part.equals(part_get)); // check null cols schemas for a partition @@ -383,12 +381,10 @@ private static void partitionTester(HiveMetaStoreClient client, Configuration co Partition mpart3 = makePartitionObject(dbName, tblName, mvals3, tbl, "/mpart3"); client.add_partitions(Arrays.asList(mpart1,mpart2,mpart3)); - if(isThriftClient) { - // do DDL time munging if thrift mode - adjust(client, mpart1, dbName, tblName); - adjust(client, mpart2, dbName, tblName); - adjust(client, mpart3, dbName, tblName); - } + // do DDL time munging if thrift mode + adjust(client, mpart1, dbName, tblName, isThriftClient); + adjust(client, mpart2, dbName, tblName, isThriftClient); + adjust(client, mpart3, dbName, tblName, isThriftClient); verifyPartitionsPublished(client, dbName, tblName, Arrays.asList(mvals1.get(0)), Arrays.asList(mpart1,mpart2,mpart3)); @@ -418,10 +414,8 @@ private static void partitionTester(HiveMetaStoreClient client, Configuration co // add_partitions(5) : ok client.add_partitions(Arrays.asList(mpart5)); - if(isThriftClient) { - // do DDL time munging if thrift mode - adjust(client, mpart5, dbName, tblName); - } + // do DDL time munging if thrift mode + adjust(client, mpart5, dbName, tblName, isThriftClient); verifyPartitionsPublished(client, dbName, tblName, Arrays.asList(mvals1.get(0)), @@ -1976,12 +1970,17 @@ public void testGetConfigValue() { } private static void adjust(HiveMetaStoreClient client, Partition part, - String dbName, String tblName) throws TException { + String dbName, String tblName, boolean isThriftClient) throws TException { Partition part_get = client.getPartition(dbName, tblName, part.getValues()); - part.setCreateTime(part_get.getCreateTime()); - part.putToParameters(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DDL_TIME, Long.toString(part_get.getCreateTime())); + if (isThriftClient) { + part.setCreateTime(part_get.getCreateTime()); + part.putToParameters(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DDL_TIME, Long.toString(part_get.getCreateTime())); + } + part.setWriteId(part_get.getWriteId()); } + + private static void silentDropDatabase(String dbName) throws TException { try { for (String tableName : client.getTables(dbName, "*")) { diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreEventListener.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreEventListener.java index fb4a761c284ea5ae87cff0ce83101ca8ac9e3e0e..fe64a91b56ecd2b08e7c2e6f8fdfae838d07990f 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreEventListener.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreEventListener.java @@ -244,6 +244,7 @@ public void testListener() throws Exception { Assert.assertTrue(partEvent.getStatus()); Partition part = msc.getPartition("hive2038", "tmptbl", "b=2011"); Partition partAdded = partEvent.getPartitionIterator().next(); + partAdded.setWriteId(part.getWriteId()); validateAddPartition(part, partAdded); validateTableInAddPartition(tbl, partEvent.getTable()); validateAddPartition(part, prePartEvent.getPartitions().get(0)); diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java index c40d45d7fa1bed5c1dc054bc95043b45080f325b..995271a6ee6b19b67c7e04c9f5fd2cd7a3df6bda 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -559,7 +559,7 @@ private void createPartitionedTable(boolean withPrivileges, boolean withStatisti ColumnStatisticsObj partStats = new ColumnStatisticsObj("test_part_col", "int", data); statsObjList.add(partStats); - objectStore.updatePartitionColumnStatistics(stats, part.getValues()); + objectStore.updatePartitionColumnStatistics(stats, part.getValues(), -1, null, -1); } } if (withPrivileges) { diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java index 717c5ee848b158f28b3233c976d2f4891e7a1650..01a8f816ebe550b00a4defd1652f127205bfd3eb 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java @@ -175,7 +175,7 @@ public void testPartitionOps() throws Exception { data.setLongStats(dcsd); obj.setStatsData(data); cs.addToStatsObj(obj); - store.updatePartitionColumnStatistics(cs, partVal); + store.updatePartitionColumnStatistics(cs, partVal, -1, null, -1); } diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java index 7cf5c267bbe5e7ffcc36e0055979d201d454bd27..e4854f90b070b89ce19a2b5a324dfbce0e857b26 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java @@ -455,7 +455,7 @@ public void testTableColStatsOps() throws Exception { stats.setStatsObj(colStatObjs); // Save to DB - objectStore.updateTableColumnStatistics(stats); + objectStore.updateTableColumnStatistics(stats, -1, null, -1); // Prewarm CachedStore CachedStore.setCachePrewarmedState(false); @@ -720,8 +720,8 @@ public void testAggrStatsRepeatedRead() throws Exception { stats.setStatsDesc(statsDesc); stats.setStatsObj(colStatObjs); - cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1); - cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2); + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1, -1, null, -1); + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2, -1, null, -1); List colNames = new ArrayList<>(); colNames.add(colName); @@ -790,10 +790,10 @@ public void testPartitionAggrStats() throws Exception { stats.setStatsDesc(statsDesc); stats.setStatsObj(colStatObjs); - cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1); + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1, -1, null, -1); longStats.setNumDVs(40); - cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2); + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2, -1, null, -1); List colNames = new ArrayList<>(); colNames.add(colName); @@ -871,7 +871,7 @@ public void testPartitionAggrStatsBitVector() throws Exception { stats.setStatsDesc(statsDesc); stats.setStatsObj(colStatObjs); - cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1); + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1, -1, null, -1); longStats.setNumDVs(40); hll = HyperLogLog.builder().build(); @@ -881,7 +881,7 @@ public void testPartitionAggrStatsBitVector() throws Exception { hll.addLong(5); longStats.setBitVectors(hll.serialize()); - cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2); + cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2, -1, null, -1); List colNames = new ArrayList<>(); colNames.add(colName); diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestAlterPartitions.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestAlterPartitions.java index 9b9b101675bdebe6d8f832f73bae54353d3887d1..2ec20c2afb4294a9062d9f56fd4d5c3237dccf40 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestAlterPartitions.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestAlterPartitions.java @@ -410,11 +410,15 @@ public void testAlterPartitionNoDbName() throws Exception { client.alter_partition("", TABLE_NAME, partitions.get(3)); } - @Test(expected = MetaException.class) + @Test public void testAlterPartitionNullDbName() throws Exception { createTable4PartColsParts(client); List partitions = client.listPartitions(DB_NAME, TABLE_NAME, (short)-1); - client.alter_partition(null, TABLE_NAME, partitions.get(3)); + try { + client.alter_partition(null, TABLE_NAME, partitions.get(3)); + Assert.fail("Expected exception"); + } catch (MetaException | TProtocolException ex) { + } } @Test(expected = InvalidOperationException.class) @@ -424,11 +428,15 @@ public void testAlterPartitionNoTblName() throws Exception { client.alter_partition(DB_NAME, "", partitions.get(3)); } - @Test(expected = MetaException.class) + @Test public void testAlterPartitionNullTblName() throws Exception { createTable4PartColsParts(client); List partitions = client.listPartitions(DB_NAME, TABLE_NAME, (short)-1); - client.alter_partition(DB_NAME, null, partitions.get(3)); + try { + client.alter_partition(DB_NAME, null, partitions.get(3)); + Assert.fail("Expected exception"); + } catch (MetaException | TProtocolException ex) { + } } @Test @@ -536,11 +544,15 @@ public void testAlterPartitionWithEnvironmentCtxNoDbName() throws Exception { client.alter_partition("", TABLE_NAME, partitions.get(3), new EnvironmentContext()); } - @Test(expected = MetaException.class) + @Test public void testAlterPartitionWithEnvironmentCtxNullDbName() throws Exception { createTable4PartColsParts(client); List partitions = client.listPartitions(DB_NAME, TABLE_NAME, (short)-1); - client.alter_partition(null, TABLE_NAME, partitions.get(3), new EnvironmentContext()); + try { + client.alter_partition(null, TABLE_NAME, partitions.get(3), new EnvironmentContext()); + Assert.fail("Expected exception"); + } catch (MetaException | TProtocolException ex) { + } } @Test(expected = InvalidOperationException.class) @@ -550,11 +562,15 @@ public void testAlterPartitionWithEnvironmentCtxNoTblName() throws Exception { client.alter_partition(DB_NAME, "", partitions.get(3), new EnvironmentContext()); } - @Test(expected = MetaException.class) + @Test public void testAlterPartitionWithEnvironmentCtxNullTblName() throws Exception { createTable4PartColsParts(client); List partitions = client.listPartitions(DB_NAME, TABLE_NAME, (short)-1); - client.alter_partition(DB_NAME, null, partitions.get(3), new EnvironmentContext()); + try { + client.alter_partition(DB_NAME, null, partitions.get(3), new EnvironmentContext()); + Assert.fail("Expected exception"); + } catch (MetaException | TProtocolException ex) { + } } @Test @@ -680,11 +696,15 @@ public void testAlterPartitionsNoDbName() throws Exception { client.alter_partitions("", TABLE_NAME, Lists.newArrayList(part)); } - @Test(expected = MetaException.class) + @Test public void testAlterPartitionsNullDbName() throws Exception { createTable4PartColsParts(client); Partition part = client.listPartitions(DB_NAME, TABLE_NAME, (short)-1).get(0); - client.alter_partitions(null, TABLE_NAME, Lists.newArrayList(part)); + try { + client.alter_partitions(null, TABLE_NAME, Lists.newArrayList(part)); + Assert.fail("Expected exception"); + } catch (MetaException | TProtocolException ex) { + } } @Test(expected = InvalidOperationException.class) @@ -853,11 +873,15 @@ public void testAlterPartitionsWithEnvironmentCtxNoDbName() throws Exception { client.alter_partitions("", TABLE_NAME, Lists.newArrayList(part), new EnvironmentContext()); } - @Test(expected = MetaException.class) + @Test public void testAlterPartitionsWithEnvironmentCtxNullDbName() throws Exception { createTable4PartColsParts(client); Partition part = client.listPartitions(DB_NAME, TABLE_NAME, (short)-1).get(0); - client.alter_partitions(null, TABLE_NAME, Lists.newArrayList(part), new EnvironmentContext()); + try { + client.alter_partitions(null, TABLE_NAME, Lists.newArrayList(part), new EnvironmentContext()); + Assert.fail("Expected exception"); + } catch (MetaException | TProtocolException ex) { + } } @Test(expected = InvalidOperationException.class) diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestAppendPartitions.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestAppendPartitions.java index 8ce8531e9a15bcebdd017018f986d0a4c161330c..462584ac662597d116a6b305fbe957a9d44022d1 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestAppendPartitions.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestAppendPartitions.java @@ -123,6 +123,7 @@ public void testAppendPartition() throws Exception { Assert.assertNotNull(appendedPart); Partition partition = client.getPartition(table.getDbName(), table.getTableName(), partitionValues); + appendedPart.setWriteId(partition.getWriteId()); Assert.assertEquals(partition, appendedPart); verifyPartition(partition, table, partitionValues, "year=2017/month=may"); verifyPartitionNames(table, Lists.newArrayList("year=2017/month=march", "year=2017/month=april", @@ -141,6 +142,7 @@ public void testAppendPartitionToExternalTable() throws Exception { Assert.assertNotNull(appendedPart); Partition partition = client.getPartition(table.getDbName(), table.getTableName(), partitionValues); + appendedPart.setWriteId(partition.getWriteId()); Assert.assertEquals(partition, appendedPart); verifyPartition(partition, table, partitionValues, "year=2017/month=may"); verifyPartitionNames(table, Lists.newArrayList("year=2017/month=may")); @@ -294,6 +296,7 @@ public void testAppendPart() throws Exception { Assert.assertNotNull(appendedPart); Partition partition = client.getPartition(table.getDbName(), table.getTableName(), getPartitionValues(partitionName)); + appendedPart.setWriteId(partition.getWriteId()); Assert.assertEquals(partition, appendedPart); verifyPartition(partition, table, getPartitionValues(partitionName), partitionName); verifyPartitionNames(table, Lists.newArrayList("year=2017/month=march", "year=2017/month=april", @@ -312,6 +315,7 @@ public void testAppendPartToExternalTable() throws Exception { Assert.assertNotNull(appendedPart); Partition partition = client.getPartition(table.getDbName(), table.getTableName(), getPartitionValues(partitionName)); + appendedPart.setWriteId(partition.getWriteId()); Assert.assertEquals(partition, appendedPart); verifyPartition(partition, table, getPartitionValues(partitionName), partitionName); verifyPartitionNames(table, Lists.newArrayList(partitionName)); @@ -475,12 +479,14 @@ public void otherCatalog() throws TException { Assert.assertEquals("a1", created.getValues().get(0)); Partition fetched = client.getPartition(catName, dbName, tableName, Collections.singletonList("a1")); + created.setWriteId(fetched.getWriteId()); Assert.assertEquals(created, fetched); created = client.appendPartition(catName, dbName, tableName, "partcol=a2"); Assert.assertEquals(1, created.getValuesSize()); Assert.assertEquals("a2", created.getValues().get(0)); fetched = client.getPartition(catName, dbName, tableName, Collections.singletonList("a2")); + created.setWriteId(fetched.getWriteId()); Assert.assertEquals(created, fetched); } diff --git standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java index efa3e7c8b4116ffdc789ac685c1ff641c51d2466..c1674bf06ef18eaada0b4b2de6ba8a605959aff8 100644 --- standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java +++ standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java @@ -237,6 +237,7 @@ public void testCreateGetDeleteTable() throws Exception { // Reset the parameters, so we can compare table.setParameters(createdTable.getParameters()); table.setCreationMetadata(createdTable.getCreationMetadata()); + table.setWriteId(createdTable.getWriteId()); Assert.assertEquals("create/get table data", table, createdTable); // Check that the directory is created @@ -703,6 +704,7 @@ public void testAlterTable() throws Exception { // Some of the data is set on the server side, so reset those newTable.setCreateTime(alteredTable.getCreateTime()); newTable.setCreationMetadata(alteredTable.getCreationMetadata()); + newTable.setWriteId(alteredTable.getWriteId()); Assert.assertEquals("The table data should be the same", newTable, alteredTable); } @@ -916,13 +918,18 @@ public void testAlterTableNullDatabaseInNew() throws Exception { client.alter_table(originalTable.getDbName(), originalTable.getTableName(), newTable); } - @Test(expected = MetaException.class) + @Test public void testAlterTableNullTableNameInNew() throws Exception { Table originalTable = testTables[0]; Table newTable = originalTable.deepCopy(); newTable.setTableName(null); - client.alter_table(originalTable.getDbName(), originalTable.getTableName(), newTable); + try { + client.alter_table(originalTable.getDbName(), originalTable.getTableName(), newTable); + Assert.fail("Expected exception"); + } catch (MetaException | TProtocolException ex) { + // Expected. + } } @Test(expected = InvalidOperationException.class) @@ -951,20 +958,28 @@ public void testAlterTableNullStorageDescriptorInNew() throws Exception { client.alter_table(originalTable.getDbName(), originalTable.getTableName(), newTable); } - @Test(expected = MetaException.class) + @Test public void testAlterTableNullDatabase() throws Exception { Table originalTable = testTables[0]; Table newTable = originalTable.deepCopy(); - - client.alter_table(null, originalTable.getTableName(), newTable); + try { + client.alter_table(null, originalTable.getTableName(), newTable); + Assert.fail("Expected exception"); + } catch (MetaException | TProtocolException ex) { + } } - @Test(expected = MetaException.class) + @Test public void testAlterTableNullTableName() throws Exception { Table originalTable = testTables[0]; Table newTable = originalTable.deepCopy(); - client.alter_table(originalTable.getDbName(), null, newTable); + try { + client.alter_table(originalTable.getDbName(), null, newTable); + Assert.fail("Expected exception"); + } catch (MetaException | TProtocolException ex) { + // Expected. + } } @Test @@ -977,7 +992,7 @@ public void testAlterTableNullNewTable() throws Exception { Assert.fail("Expected a NullPointerException or TTransportException to be thrown"); } catch (NullPointerException exception) { // Expected exception - Embedded MetaStore - } catch (TTransportException exception) { + } catch (TProtocolException exception) { // Expected exception - Remote MetaStore } }