diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 4ee8b9d0e3..37b0dcb8ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -2425,6 +2425,42 @@ public static TableSnapshot getTableSnapshot(Configuration conf, validWriteIdList != null ? validWriteIdList.toString() : null); } + /** + * This is called by Hive.java for all write operations (DDL). Advance write id + * for the table via transaction manager, and store it in config. The write id + * will be marked as committed instantly in config, as all DDL are auto + * committed, there's no chance to rollback. + */ + public static ValidWriteIdList advanceWriteId(HiveConf conf, Table tbl) throws LockException { + if (!isTransactionalTable(tbl)) { + return null; + } + HiveTxnManager txnMgr = SessionState.get().getTxnMgr(); + long writeId = SessionState.get().getTxnMgr().getTableWriteId(tbl.getDbName(), tbl.getTableName()); + List txnTables = new ArrayList<>(); + String fullTableName = getFullTableName(tbl.getDbName(), tbl.getTableName()); + txnTables.add(fullTableName); + ValidTxnWriteIdList txnWriteIds; + if (conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY) != null) { + txnWriteIds = new ValidTxnWriteIdList(conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY)); + } else { + String txnString; + if (conf.get(ValidTxnList.VALID_TXNS_KEY) != null) { + txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); + } else { + ValidTxnList txnIds = txnMgr.getValidTxns(); + txnString = txnIds.toString(); + } + txnWriteIds = txnMgr.getValidWriteIds(txnTables, txnString); + } + ValidWriteIdList writeIds = txnWriteIds.getTableValidWriteIdList(fullTableName); + if (writeIds != null) { + writeIds.locallyCommitWriteId(writeId); + conf.set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY, txnWriteIds.toString()); + } + return writeIds; + } + /** * Returns ValidWriteIdList for the table with the given "dbName" and "tableName". * This is called when HiveConf has no list for the table. diff --git ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java index 0383881acc..cf86db4740 100644 --- ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DummyTxnManager.java @@ -55,6 +55,9 @@ private HiveLockManagerCtx lockManagerCtx; + private long txnId = 0; + private int numTxn = 0; + @Override public long openTxn(Context ctx, String user, TxnType txnType) throws LockException { // No-op @@ -73,11 +76,11 @@ public long openTxn(Context ctx, String user) throws LockException { @Override public boolean isTxnOpen() { - return false; + return numTxn != 0; } @Override public long getCurrentTxnId() { - return 0L; + return txnId; } @Override public int getStmtIdAndIncrement() { @@ -236,7 +239,7 @@ public void releaseLocks(List hiveLocks) throws LockException { @Override public void commitTxn() throws LockException { - // No-op + numTxn--; } @Override @@ -246,7 +249,7 @@ public void replCommitTxn(CommitTxnRequest rqst) throws LockException { @Override public void rollbackTxn() throws LockException { - // No-op + numTxn--; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index a2dbeebc7c..a19ea4ba8a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -178,12 +178,14 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.AbstractFileMergeOperator; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.FunctionUtils; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.AcidUtils.TableSnapshot; import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; @@ -193,6 +195,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAugmentMaterializationRule; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils; +import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType; @@ -392,7 +395,12 @@ private static Hive getInternal(HiveConf c, boolean needsRefresh, boolean isFast } db = create(c, doRegisterAllFns); } - if (c != null) { + if (c != null && db.conf != null && db.conf != c) { + if (db.conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY) != null) { + c.set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY, db.conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY)); + } else { + c.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY); + } db.conf = c; } return db; @@ -776,6 +784,7 @@ public void alterTable(String catName, String dbName, String tblName, Table newT EnvironmentContext environmentContext, boolean transactional, long replWriteId) throws HiveException { + boolean txnOpened = false; if (catName == null) { catName = getDefaultCatalog(conf); } @@ -795,7 +804,13 @@ public void alterTable(String catName, String dbName, String tblName, Table newT // Take a table snapshot and set it to newTbl. AcidUtils.TableSnapshot tableSnapshot = null; if (transactional) { - if (replWriteId > 0) { + if (AcidUtils.isTransactionalTable(newTbl) && !inReplication(newTbl) && replWriteId > 0) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, newTbl); + } + + if (inReplication(newTbl) && replWriteId > 0) { // We need a valid writeId list for a transactional table modification. During // replication we do not have a valid writeId list which was used to modify the table // on the source. But we know for sure that the writeId associated with it was valid @@ -824,6 +839,12 @@ public void alterTable(String catName, String dbName, String tblName, Table newT throw new HiveException("Unable to alter table. " + e.getMessage(), e); } catch (TException e) { throw new HiveException("Unable to alter table. " + e.getMessage(), e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().commitTxn(); + } + } } } @@ -875,6 +896,7 @@ public void alterPartition(String tblName, Partition newPart, public void alterPartition(String catName, String dbName, String tblName, Partition newPart, EnvironmentContext environmentContext, boolean transactional) throws InvalidOperationException, HiveException { + boolean txnOpened = false; try { if (catName == null) { catName = getDefaultCatalog(conf); @@ -888,8 +910,14 @@ public void alterPartition(String catName, String dbName, String tblName, Partit if (environmentContext == null) { environmentContext = new EnvironmentContext(); } + AcidUtils.TableSnapshot tableSnapshot = null; if (transactional) { + if (AcidUtils.isTransactionalTable(newPart.getTable()) && !inReplication(newPart.getTable())) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, newPart.getTable()); + } tableSnapshot = AcidUtils.getTableSnapshot(conf, newPart.getTable(), true); if (tableSnapshot != null) { newPart.getTPartition().setWriteId(tableSnapshot.getWriteId()); @@ -905,6 +933,12 @@ public void alterPartition(String catName, String dbName, String tblName, Partit throw new HiveException("Unable to alter partition. " + e.getMessage(), e); } catch (TException e) { throw new HiveException("Unable to alter partition. " + e.getMessage(), e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().commitTxn(); + } + } } } @@ -932,12 +966,18 @@ private void validatePartition(Partition newPart) throws HiveException { public void alterPartitions(String tblName, List newParts, EnvironmentContext environmentContext, boolean transactional) throws InvalidOperationException, HiveException { + boolean txnOpened = false; String[] names = Utilities.getDbTableName(tblName); List newTParts = new ArrayList(); try { AcidUtils.TableSnapshot tableSnapshot = null; if (transactional) { + if (AcidUtils.isTransactionalTable(newParts.get(0).getTable()) && !inReplication(newParts.get(0).getTable())) { + // Advance writeId for ddl on transactional table + txnOpened = openTxnIfNeeded(); + AcidUtils.advanceWriteId(conf, newParts.get(0).getTable()); + } tableSnapshot = AcidUtils.getTableSnapshot(conf, newParts.get(0).getTable(), true); } // Remove the DDL time so that it gets refreshed @@ -959,6 +999,12 @@ public void alterPartitions(String tblName, List newParts, throw new HiveException("Unable to alter partition. " + e.getMessage(), e); } catch (TException e) { throw new HiveException("Unable to alter partition. " + e.getMessage(), e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().commitTxn(); + } + } } } /** @@ -975,6 +1021,7 @@ public void alterPartitions(String tblName, List newParts, public void renamePartition(Table tbl, Map oldPartSpec, Partition newPart, long replWriteId) throws HiveException { + boolean txnOpened = false; try { Map newPartSpec = newPart.getSpec(); if (oldPartSpec.keySet().size() != tbl.getPartCols().size() @@ -997,8 +1044,13 @@ public void renamePartition(Table tbl, Map oldPartSpec, Partitio } String validWriteIds = null; if (AcidUtils.isTransactionalTable(tbl)) { + if (!inReplication(tbl)) { + // Advance writeId for ddl on transactional table + txnOpened = openTxnIfNeeded(); + AcidUtils.advanceWriteId(conf, tbl); + } TableSnapshot tableSnapshot; - if (replWriteId > 0) { + if (inReplication(tbl)) { // We need a valid writeId list for a transactional table modification. During // replication we do not have a valid writeId list which was used to modify the table // on the source. But we know for sure that the writeId associated with it was valid @@ -1028,6 +1080,12 @@ public void renamePartition(Table tbl, Map oldPartSpec, Partitio throw new HiveException("Unable to rename partition. " + e.getMessage(), e); } catch (TException e) { throw new HiveException("Unable to rename partition. " + e.getMessage(), e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().commitTxn(); + } + } } } @@ -1087,6 +1145,7 @@ public void createTable(Table tbl, boolean ifNotExists, List defaultConstraints, List checkConstraints) throws HiveException { + boolean txnOpened = false; try { if (org.apache.commons.lang3.StringUtils.isBlank(tbl.getDbName())) { tbl.setDbName(SessionState.get().getCurrentDatabase()); @@ -1111,6 +1170,11 @@ public void createTable(Table tbl, boolean ifNotExists, tTbl.setPrivileges(principalPrivs); } } + if (AcidUtils.isTransactionalTable(tbl) && !inReplication(tbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, tbl); + } // Set table snapshot to api.Table to make it persistent. A transactional table being // replicated may have a valid write Id copied from the source. Use that instead of // crafting one on the replica. @@ -1136,6 +1200,12 @@ public void createTable(Table tbl, boolean ifNotExists, } } catch (Exception e) { throw new HiveException(e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().commitTxn(); + } + } } } @@ -1230,7 +1300,18 @@ public void dropTable(String dbName, String tableName, boolean deleteData, */ public void dropTable(String dbName, String tableName, boolean deleteData, boolean ignoreUnknownTab, boolean ifPurge) throws HiveException { + boolean txnOpened = false; try { + Table tbl = null; + try { + tbl = getTable(dbName, tableName); + } catch (InvalidTableException e) { + } + if (tbl != null && AcidUtils.isTransactionalTable(tbl) && !inReplication(tbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, tbl); + } getMSC().dropTable(dbName, tableName, deleteData, ignoreUnknownTab, ifPurge); } catch (NoSuchObjectException e) { if (!ignoreUnknownTab) { @@ -1238,6 +1319,12 @@ public void dropTable(String dbName, String tableName, boolean deleteData, } } catch (Exception e) { throw new HiveException(e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().commitTxn(); + } + } } } @@ -1251,10 +1338,19 @@ public void dropTable(String dbName, String tableName, boolean deleteData, * @throws HiveException */ public void truncateTable(String dbDotTableName, Map partSpec, Long writeId) throws HiveException { + boolean txnOpened = false; try { Table table = getTable(dbDotTableName, true); + AcidUtils.TableSnapshot snapshot = null; if (AcidUtils.isTransactionalTable(table)) { + + if (!inReplication(table)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, table); + } + if (writeId <= 0) { snapshot = AcidUtils.getTableSnapshot(conf, table, true); } else { @@ -1355,7 +1451,7 @@ public Table getTable(TableName tableName) throws HiveException { */ public Table getTable(final String dbName, final String tableName, boolean throwException) throws HiveException { - return this.getTable(dbName, tableName, throwException, false); + return this.getTable(dbName, tableName, throwException, true); } /** @@ -3537,8 +3633,14 @@ public boolean dropPartition(String dbName, String tableName, List parti public List dropPartitions(String dbName, String tableName, List> partitionExpressions, PartitionDropOptions dropOptions) throws HiveException { + boolean txnOpened = false; try { Table table = getTable(dbName, tableName); + if (AcidUtils.isTransactionalTable(table) && !inReplication(table)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, table); + } List partitions = getMSC().dropPartitions(dbName, tableName, partitionExpressions, dropOptions); return convertFromMetastore(table, partitions); @@ -3546,6 +3648,12 @@ public boolean dropPartition(String dbName, String tableName, List parti throw new HiveException("Partition or table doesn't exist.", e); } catch (Exception e) { throw new HiveException(e.getMessage(), e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().commitTxn(); + } + } } } @@ -5057,7 +5165,20 @@ public static boolean isHadoop1() { public List exchangeTablePartitions(Map partitionSpecs, String sourceDb, String sourceTable, String destDb, String destinationTableName) throws HiveException { + boolean txnOpened = false; try { + Table srcTbl = getTable(sourceDb, sourceTable); + if (AcidUtils.isTransactionalTable(srcTbl) && !inReplication(srcTbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, srcTbl); + } + Table descTbl = getTable(destDb, destinationTableName); + if (AcidUtils.isTransactionalTable(descTbl) && !inReplication(descTbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, descTbl); + } List partitions = getMSC().exchange_partitions(partitionSpecs, sourceDb, sourceTable, destDb, destinationTableName); @@ -5066,6 +5187,12 @@ public static boolean isHadoop1() { } catch (Exception ex) { LOG.error(StringUtils.stringifyException(ex)); throw new HiveException(ex); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().rollbackTxn(); + } + } } } @@ -5183,6 +5310,7 @@ public synchronized IMetaStoreClient getMSC( metaStoreClient = HiveMetaStoreClient.newSynchronizedClient(metaStoreClient); } } + //metaStoreClient.setValidWriteIdList(conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY)); return metaStoreClient; } @@ -5560,12 +5688,25 @@ public void cacheFileMetadata( public void dropConstraint(String dbName, String tableName, String constraintName) throws HiveException, NoSuchObjectException { + boolean txnOpened = false; try { + Table tbl = getTable(dbName, tableName); + if (AcidUtils.isTransactionalTable(tbl) && !inReplication(tbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, tbl); + } getMSC().dropConstraint(dbName, tableName, constraintName); } catch (NoSuchObjectException e) { throw e; } catch (Exception e) { throw new HiveException(e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().rollbackTxn(); + } + } } } @@ -5891,55 +6032,133 @@ public CheckConstraint getCheckConstraints(String dbName, String tblName) public void addPrimaryKey(List primaryKeyCols) throws HiveException, NoSuchObjectException { + boolean txnOpened = false; try { + Table tbl = getTable(primaryKeyCols.get(0).getTable_db(), primaryKeyCols.get(0).getTable_name()); + if (AcidUtils.isTransactionalTable(tbl) && !inReplication(tbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, tbl); + } getMSC().addPrimaryKey(primaryKeyCols); } catch (Exception e) { throw new HiveException(e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().rollbackTxn(); + } + } } } public void addForeignKey(List foreignKeyCols) throws HiveException, NoSuchObjectException { + boolean txnOpened = false; try { + Table tbl = getTable(foreignKeyCols.get(0).getFktable_db(), foreignKeyCols.get(0).getFktable_name()); + if (AcidUtils.isTransactionalTable(tbl) && !inReplication(tbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, tbl); + } getMSC().addForeignKey(foreignKeyCols); } catch (Exception e) { throw new HiveException(e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().rollbackTxn(); + } + } } } public void addUniqueConstraint(List uniqueConstraintCols) throws HiveException, NoSuchObjectException { + boolean txnOpened = false; try { + Table tbl = getTable(uniqueConstraintCols.get(0).getTable_db(), uniqueConstraintCols.get(0).getTable_name()); + if (AcidUtils.isTransactionalTable(tbl) && !inReplication(tbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, tbl); + } getMSC().addUniqueConstraint(uniqueConstraintCols); } catch (Exception e) { throw new HiveException(e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().rollbackTxn(); + } + } } } public void addNotNullConstraint(List notNullConstraintCols) throws HiveException, NoSuchObjectException { + boolean txnOpened = false; try { + Table tbl = getTable(notNullConstraintCols.get(0).getTable_db(), notNullConstraintCols.get(0).getTable_name()); + if (AcidUtils.isTransactionalTable(tbl) && !inReplication(tbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, tbl); + } getMSC().addNotNullConstraint(notNullConstraintCols); } catch (Exception e) { throw new HiveException(e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().rollbackTxn(); + } + } } } public void addDefaultConstraint(List defaultConstraints) throws HiveException, NoSuchObjectException { + boolean txnOpened = false; try { + Table tbl = getTable(defaultConstraints.get(0).getTable_db(), defaultConstraints.get(0).getTable_name()); + if (AcidUtils.isTransactionalTable(tbl) && !inReplication(tbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, tbl); + } getMSC().addDefaultConstraint(defaultConstraints); } catch (Exception e) { throw new HiveException(e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().rollbackTxn(); + } + } } } public void addCheckConstraint(List checkConstraints) throws HiveException, NoSuchObjectException { + boolean txnOpened = false; try { + Table tbl = getTable(checkConstraints.get(0).getTable_db(), checkConstraints.get(0).getTable_name()); + if (AcidUtils.isTransactionalTable(tbl) && !inReplication(tbl)) { + txnOpened = openTxnIfNeeded(); + // Advance writeId for ddl on transactional table + AcidUtils.advanceWriteId(conf, tbl); + } getMSC().addCheckConstraint(checkConstraints); } catch (Exception e) { throw new HiveException(e); + } finally { + if (txnOpened) { + if (SessionState.get().getTxnMgr().isTxnOpen()) { + SessionState.get().getTxnMgr().rollbackTxn(); + } + } } } @@ -6151,4 +6370,36 @@ public StorageHandlerInfo getStorageHandlerInfo(Table table) throw new HiveException(e); } } + + private boolean openTxnIfNeeded() throws HiveException { + try { + if (SessionState.get().getTxnMgr() == null) { + SessionState.get().initTxnMgr(conf); + } + HiveTxnManager txnMgr = SessionState.get().getTxnMgr(); + if (!txnMgr.isTxnOpen()) { + Context ctx = new Context(conf); + txnMgr.openTxn(ctx, SessionState.getUserFromAuthenticator()); + return true; + } + return false; + } catch (Exception e) { + throw new HiveException(e); + } + } + + public void clearValidWriteIdList() { + if (metaStoreClient != null) { + metaStoreClient.clearValidWriteIdList(); + } + } + + boolean inReplication(Table tbl) { + if (tbl.getParameters().get(ReplicationSpec.KEY.CURR_STATE_ID.toString()) != null) { + return true; + } else { + return false; + } + } } + diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index 0b19f178b5..9da501180e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -409,7 +409,14 @@ private boolean isTargetTable(Entity entity, Table targetTable) { * is this the right way to compare? Should it just compare paths? * equals() impl looks heavy weight */ - return targetTable.equals(entity.getTable()); + long targetWriteId = targetTable.getTTable().getWriteId(); + long entityWriteId = entity.getTable().getTTable().getWriteId(); + targetTable.getTTable().setWriteId(0L); + entity.getTable().getTTable().setWriteId(0L); + boolean result = targetTable.equals(entity.getTable()); + targetTable.getTTable().setWriteId(targetWriteId); + entity.getTable().getTTable().setWriteId(entityWriteId); + return result; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index de746a8d11..59a1d7aa05 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -67,13 +67,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hdfs.DFSUtilClient; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.*; import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; -import org.apache.hadoop.hive.common.StringInternUtils; -import org.apache.hadoop.hive.common.TableName; -import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -306,7 +301,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { - public static final String DUMMY_DATABASE = "_dummy_database"; public static final String DUMMY_TABLE = "_dummy_table"; public static final String SUBQUERY_TAG_1 = "-subquery1"; @@ -710,21 +704,21 @@ protected boolean isInsertInto(QBParseInfo qbp, String dest) { * e.g. VALUES(1,3..) */ private boolean isValueClause(ASTNode select) { - if(select == null) { + if (select == null) { return false; } - if(select.getChildCount() == 1) { - ASTNode selectExpr = (ASTNode)select.getChild(0); - if(selectExpr.getChildCount() == 1 ) { - ASTNode selectChildExpr = (ASTNode)selectExpr.getChild(0); - if(selectChildExpr.getType() == HiveParser.TOK_FUNCTION) { - ASTNode inline = (ASTNode)selectChildExpr.getChild(0); - ASTNode func = (ASTNode)selectChildExpr.getChild(1); - if(inline.getText().equals(GenericUDTFInline.class.getAnnotation(Description.class).name()) + if (select.getChildCount() == 1) { + ASTNode selectExpr = (ASTNode) select.getChild(0); + if (selectExpr.getChildCount() == 1) { + ASTNode selectChildExpr = (ASTNode) selectExpr.getChild(0); + if (selectChildExpr.getType() == HiveParser.TOK_FUNCTION) { + ASTNode inline = (ASTNode) selectChildExpr.getChild(0); + ASTNode func = (ASTNode) selectChildExpr.getChild(1); + if (inline.getText().equals(GenericUDTFInline.class.getAnnotation(Description.class).name()) && func.getType() == HiveParser.TOK_FUNCTION) { - ASTNode arrayNode = (ASTNode)func.getChild(0); - ASTNode funcNode= (ASTNode)func.getChild(1); - if(arrayNode.getText().equals(GenericUDFArray.class.getAnnotation(Description.class).name() ) + ASTNode arrayNode = (ASTNode) func.getChild(0); + ASTNode funcNode = (ASTNode) func.getChild(1); + if (arrayNode.getText().equals(GenericUDFArray.class.getAnnotation(Description.class).name()) && funcNode.getType() == HiveParser.TOK_FUNCTION) { return true; } @@ -743,16 +737,15 @@ private boolean isValueClause(ASTNode select) { * @return List of default constraints (including NULL if there is no default) * @throws SemanticException */ - private List getDefaultConstraints(Table tbl, List targetSchema) throws SemanticException{ + private List getDefaultConstraints(Table tbl, List targetSchema) throws SemanticException { Map colNameToDefaultVal = getColNameToDefaultValueMap(tbl); List defaultConstraints = new ArrayList<>(); - if(targetSchema != null) { + if (targetSchema != null) { for (String colName : targetSchema) { defaultConstraints.add(colNameToDefaultVal.get(colName)); } - } - else { - for(FieldSchema fs:tbl.getCols()) { + } else { + for (FieldSchema fs : tbl.getCols()) { defaultConstraints.add(colNameToDefaultVal.get(fs.getName())); } } @@ -781,10 +774,9 @@ private boolean isValueClause(ASTNode select) { */ private ASTNode getNodeReplacementforDefault(String newValue) throws SemanticException { ASTNode newNode = null; - if(newValue== null) { + if (newValue == null) { newNode = ASTBuilder.construct(HiveParser.TOK_NULL, "TOK_NULL").node(); - } - else { + } else { try { newNode = new ParseDriver().parseExpression(newValue); } catch(Exception e) { @@ -833,18 +825,19 @@ private void replaceDefaultKeywordForUpdate(ASTNode selectExprs, Table targetTab * @param targetSchema this is target schema/column schema if specified in query * @throws SemanticException */ - private void replaceDefaultKeyword(ASTNode valueArrClause, Table targetTable, List targetSchema) throws SemanticException{ + private void replaceDefaultKeyword(ASTNode valueArrClause, Table targetTable, List targetSchema) + throws SemanticException { List defaultConstraints = null; - for(int i=1; i aggregations, List wdwFns, - ASTNode wndParent) throws SemanticException { + private void doPhase1GetAllAggregations(ASTNode expressionTree, Map aggregations, + List wdwFns, ASTNode wndParent) throws SemanticException { int exprTokenType = expressionTree.getToken().getType(); - if(exprTokenType == HiveParser.TOK_SUBQUERY_EXPR) { + if (exprTokenType == HiveParser.TOK_SUBQUERY_EXPR) { //since now we have scalar subqueries we can get subquery expression in having // we don't want to include aggregate from within subquery return; @@ -908,8 +899,7 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, boolean parentIsWindowSpec = wndParent != null; - if (exprTokenType == HiveParser.TOK_FUNCTION - || exprTokenType == HiveParser.TOK_FUNCTIONDI + if (exprTokenType == HiveParser.TOK_FUNCTION || exprTokenType == HiveParser.TOK_FUNCTIONDI || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) { assert (expressionTree.getChildCount() != 0); Tree lastChild = expressionTree.getChild(expressionTree.getChildCount() - 1); @@ -918,7 +908,7 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, // Further, we will examine its children AST nodes to check whether // there are aggregation functions within wdwFns.add(expressionTree); - for(Node child : expressionTree.getChildren()) { + for (Node child : expressionTree.getChildren()) { doPhase1GetAllAggregations((ASTNode) child, aggregations, wdwFns, expressionTree); } return; @@ -926,24 +916,22 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, transformWithinGroup(expressionTree, lastChild); } if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { - String functionName = unescapeIdentifier(expressionTree.getChild(0) - .getText()); + String functionName = unescapeIdentifier(expressionTree.getChild(0).getText()); // Validate the function name if (FunctionRegistry.getFunctionInfo(functionName) == null) { throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName)); } - if(FunctionRegistry.impliesOrder(functionName) && !parentIsWindowSpec) { + if (FunctionRegistry.impliesOrder(functionName) && !parentIsWindowSpec) { throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName)); } if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) { - if(containsLeadLagUDF(expressionTree) && !parentIsWindowSpec) { + if (containsLeadLagUDF(expressionTree) && !parentIsWindowSpec) { throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName)); } aggregations.put(expressionTree.toStringTree(), expressionTree); FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName); if (!fi.isNative()) { - unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree - .getChild(0)); + unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree.getChild(0)); } return; } @@ -982,9 +970,9 @@ private void transformWithinGroup(ASTNode expressionTree, Tree withinGroupNode) ASTNode sortKey = (ASTNode) tabSortColNameNode.getChild(0).getChild(0); expressionTree.addChild(sortKey); expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral, - Integer.toString(DirectionUtils.tokenToCode(tabSortColNameNode.getType())))); + Integer.toString(DirectionUtils.tokenToCode(tabSortColNameNode.getType())))); expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral, - Integer.toString(NullOrdering.fromToken(nullsNode.getType()).getCode()))); + Integer.toString(NullOrdering.fromToken(nullsNode.getType()).getCode()))); } } @@ -1043,7 +1031,7 @@ protected void setAST(ASTNode newAST) { aliasIndex = index; } } - return new int[] {aliasIndex, propsIndex, tsampleIndex, ssampleIndex}; + return new int[] { aliasIndex, propsIndex, tsampleIndex, ssampleIndex }; } private String findSimpleTableName(ASTNode tabref, int aliasIndex) throws SemanticException { @@ -1053,12 +1041,12 @@ private String findSimpleTableName(ASTNode tabref, int aliasIndex) throws Semant String alias; if (aliasIndex != 0) { alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText()); - } - else { + } else { alias = getUnescapedUnqualifiedTableName(tableTree); } return alias; } + /** * Goes though the tabref tree and finds the alias for the table. Once found, * it records the table name-> alias association in aliasToTabs. It also makes @@ -1094,9 +1082,8 @@ private String processTable(QB qb, ASTNode tabref) throws SemanticException { // If the alias is already there then we have a conflict if (qb.exists(alias)) { - throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(), - tabref.getChild(aliasIndex))); + throw new SemanticException( + ASTErrorUtils.getMsg(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(), tabref.getChild(aliasIndex))); } if (tsampleIndex >= 0) { ASTNode sampleClause = (ASTNode) tabref.getChild(tsampleIndex); @@ -1113,15 +1100,12 @@ private String processTable(QB qb, ASTNode tabref) throws SemanticException { (ASTNode) tabref.getChild(0), ErrorMsg.SAMPLE_RESTRICTION.getMsg())); } - TableSample tabSample = new TableSample( - unescapeIdentifier(sampleClause.getChild(0).getText()), - unescapeIdentifier(sampleClause.getChild(1).getText()), - sampleCols); + TableSample tabSample = new TableSample(unescapeIdentifier(sampleClause.getChild(0).getText()), + unescapeIdentifier(sampleClause.getChild(1).getText()), sampleCols); qb.getParseInfo().setTabSample(alias, tabSample); if (unparseTranslator.isEnabled()) { for (ASTNode sampleCol : sampleCols) { - unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol - .getChild(0)); + unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol.getChild(0)); } } } else if (ssampleIndex >= 0) { @@ -1131,14 +1115,13 @@ private String processTable(QB qb, ASTNode tabref) throws SemanticException { Tree numerator = sampleClause.getChild(1); String value = unescapeIdentifier(numerator.getText()); - SplitSample sample; if (type.getType() == HiveParser.TOK_PERCENT) { assertCombineInputFormat(numerator, "Percentage"); double percent = Double.valueOf(value); - if (percent < 0 || percent > 100) { - throw new SemanticException(generateErrorMessage((ASTNode) numerator, - "Sampling percentage should be between 0 and 100")); + if (percent < 0 || percent > 100) { + throw new SemanticException( + generateErrorMessage((ASTNode) numerator, "Sampling percentage should be between 0 and 100")); } int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM); sample = new SplitSample(percent, seedNum); @@ -1188,12 +1171,11 @@ private String processTable(QB qb, ASTNode tabref) throws SemanticException { } private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException { - String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ? - HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT): - HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT); + String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ? HiveConf + .getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT) : HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT); if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) { - throw new SemanticException(generateErrorMessage((ASTNode) numerator, - message + " sampling is not supported in " + inputFormat)); + throw new SemanticException( + generateErrorMessage((ASTNode) numerator, message + " sampling is not supported in " + inputFormat)); } } @@ -1201,8 +1183,7 @@ private String processSubQuery(QB qb, ASTNode subq) throws SemanticException { // This is a subquery and must have an alias if (subq.getChildCount() != 2) { - throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(), subq)); + throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(), subq)); } ASTNode subqref = (ASTNode) subq.getChild(0); String alias = unescapeIdentifier(subq.getChild(1).getText()); @@ -1214,9 +1195,7 @@ private String processSubQuery(QB qb, ASTNode subq) throws SemanticException { // If the alias is already there then we have a conflict if (qb.exists(alias)) { - throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(), - subq.getChild(1))); + throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(), subq.getChild(1))); } // Insert this map into the stats qb.setSubqAlias(alias, qbexpr); @@ -1235,7 +1214,7 @@ private void processCTE(QB qb, ASTNode ctes) throws SemanticException { int numCTEs = ctes.getChildCount(); - for(int i=0; i 0 && ast.getChild(0) instanceof ASTNode) { ASTNode ch = (ASTNode) ast.getChild(0); - if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0 - && ch.getChild(0) instanceof ASTNode) { + if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0 && ch + .getChild(0) instanceof ASTNode) { ch = (ASTNode) ch.getChild(0); isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE; } else { if (ast.getToken().getType() == HiveParser.TOK_DESTINATION && ast.getChild(0).getType() == HiveParser.TOK_TAB) { - String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), - SessionState.get().getCurrentDatabase()); + String fullTableName = + getUnescapedName((ASTNode) ast.getChild(0).getChild(0), SessionState.get().getCurrentDatabase()); qbp.getInsertOverwriteTables().put(fullTableName.toLowerCase(), ast); qbp.setDestToOpType(ctx_1.dest, true); } @@ -1655,8 +1625,7 @@ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) // is there a insert in the subquery if (qbp.getIsSubQ() && !isTmpFileDest) { - throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(), ast)); + throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(), ast)); } qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); @@ -1689,8 +1658,7 @@ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) case HiveParser.TOK_FROM: int child_count = ast.getChildCount(); if (child_count != 1) { - throw new SemanticException(generateErrorMessage(ast, - "Multiple Children " + child_count)); + throw new SemanticException(generateErrorMessage(ast, "Multiple Children " + child_count)); } if (!qbp.getIsSubQ()) { @@ -1710,7 +1678,7 @@ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) } else if (isJoinToken(frm)) { processJoin(qb, frm); qbp.setJoinExpr(frm); - }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){ + } else if (frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) { queryProperties.setHasPTF(true); processPTF(qb, frm); } @@ -1818,7 +1786,6 @@ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase(); - qb.setTabAlias(table_name, table_name); qb.addAlias(table_name); qb.getParseInfo().setIsAnalyzeCommand(true); @@ -1887,7 +1854,7 @@ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) } catch (HiveException e) { LOG.info("Error while getting metadata : ", e); } - validatePartSpec(table, partition, (ASTNode)tab, conf, false); + validatePartSpec(table, partition, (ASTNode) tab, conf, false); } skipRecursion = false; break; @@ -1901,7 +1868,7 @@ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) processCTE(qb, ast); break; case HiveParser.QUERY_HINT: - processQueryHint(ast, qbp, 0); + processQueryHint(ast, qbp, 0); default: skipRecursion = false; break; @@ -1919,7 +1886,7 @@ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) return phase1Result; } - private int processQueryHint(ASTNode ast, QBParseInfo qbp, int posn) throws SemanticException{ + private int processQueryHint(ASTNode ast, QBParseInfo qbp, int posn) throws SemanticException { ParseDriver pd = new ParseDriver(); String queryHintStr = ast.getText(); LOG.debug("QUERY HINT: {} ", queryHintStr); @@ -1927,7 +1894,7 @@ private int processQueryHint(ASTNode ast, QBParseInfo qbp, int posn) throws Sema ASTNode hintNode = pd.parseHint(queryHintStr); qbp.setHints(hintNode); } catch (ParseException e) { - throw new SemanticException("failed to parse query hint: "+e.getMessage(), e); + throw new SemanticException("failed to parse query hint: " + e.getMessage(), e); } return posn + 1; } @@ -1939,20 +1906,22 @@ private int processQueryHint(ASTNode ast, QBParseInfo qbp, int posn) throws Sema * @throws SemanticException */ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException { - ASTNode tabColName = (ASTNode)ast.getChild(1); - if(ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) { + ASTNode tabColName = (ASTNode) ast.getChild(1); + if (ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null + && tabColName.getType() == HiveParser.TOK_TABCOLNAME) { //we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present List targetColNames = new ArrayList(); - for(Node col : tabColName.getChildren()) { - assert ((ASTNode)col).getType() == HiveParser.Identifier : - "expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType(); - targetColNames.add(((ASTNode)col).getText().toLowerCase()); - } - String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), - SessionState.get().getCurrentDatabase()); + for (Node col : tabColName.getChildren()) { + assert + ((ASTNode) col).getType() == HiveParser.Identifier : + "expected token " + HiveParser.Identifier + " found " + ((ASTNode) col).getType(); + targetColNames.add(((ASTNode) col).getText().toLowerCase()); + } + String fullTableName = + getUnescapedName((ASTNode) ast.getChild(0).getChild(0), SessionState.get().getCurrentDatabase()); qbp.setDestSchemaForClause(ctx_1.dest, targetColNames); Set targetColumns = new HashSet<>(targetColNames); - if(targetColNames.size() != targetColumns.size()) { + if (targetColNames.size() != targetColumns.size()) { throw new SemanticException(generateErrorMessage(tabColName, "Duplicate column name detected in " + fullTableName + " table schema specification")); } @@ -1963,15 +1932,14 @@ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase LOG.error("Error processing HiveParser.TOK_DESTINATION: " + ex.getMessage(), ex); throw new SemanticException(ex); } - if(targetTable == null) { - throw new SemanticException(generateErrorMessage(ast, - "Unable to access metadata for table " + fullTableName)); + if (targetTable == null) { + throw new SemanticException(generateErrorMessage(ast, "Unable to access metadata for table " + fullTableName)); } - for(FieldSchema f : targetTable.getCols()) { + for (FieldSchema f : targetTable.getCols()) { //parser only allows foo(a,b), not foo(foo.a, foo.b) targetColumns.remove(f.getName()); } - if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns + if (!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns /* We just checked the user specified schema columns among regular table column and found some which are not 'regular'. Now check is they are dynamic partition columns For dynamic partitioning, @@ -1988,34 +1956,36 @@ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase (TOK_TABCOLNAME d a) )*/ List dynamicPartitionColumns = new ArrayList(); - if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) { - ASTNode tokTab = (ASTNode)ast.getChild(0); - ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC); - if(tokPartSpec != null) { - for(Node n : tokPartSpec.getChildren()) { + if (ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) { + ASTNode tokTab = (ASTNode) ast.getChild(0); + ASTNode tokPartSpec = (ASTNode) tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC); + if (tokPartSpec != null) { + for (Node n : tokPartSpec.getChildren()) { ASTNode tokPartVal = null; - if(n instanceof ASTNode) { - tokPartVal = (ASTNode)n; + if (n instanceof ASTNode) { + tokPartVal = (ASTNode) n; } - if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) { - assert tokPartVal.getChild(0).getType() == HiveParser.Identifier : + if (tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL + && tokPartVal.getChildCount() == 1) { + assert + tokPartVal.getChild(0).getType() == HiveParser.Identifier : "Expected column name; found tokType=" + tokPartVal.getType(); dynamicPartitionColumns.add(tokPartVal.getChild(0).getText()); } } - for(String colName : dynamicPartitionColumns) { + for (String colName : dynamicPartitionColumns) { targetColumns.remove(colName); } - } else { + } else { // partition spec is not specified but column schema can have partitions specified - for(FieldSchema f : targetTable.getPartCols()) { + for (FieldSchema f : targetTable.getPartCols()) { //parser only allows foo(a,b), not foo(foo.a, foo.b) targetColumns.remove(f.getName()); } } } - if(!targetColumns.isEmpty()) { + if (!targetColumns.isEmpty()) { //Found some columns in user specified schema which are neither regular not dynamic partition columns throw new SemanticException(generateErrorMessage(tabColName, "'" + (targetColumns.size() == 1 ? targetColumns.iterator().next() : targetColumns) + @@ -2041,7 +2011,7 @@ private void getMaterializationMetadata(QB qb) throws SemanticException { // org.apache.commons.lang3.StringUtils LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); if (e instanceof SemanticException) { - throw (SemanticException)e; + throw (SemanticException) e; } throw new SemanticException(e.getMessage(), e); } @@ -2099,14 +2069,13 @@ private void getMetaData(QB qb, boolean enableMaterialization) throws SemanticEx getMetaData(qb, null); } catch (HiveException e) { if (e instanceof SemanticException) { - throw (SemanticException)e; + throw (SemanticException) e; } throw new SemanticException(e.getMessage(), e); } } - private void getMetaData(QBExpr qbexpr, ReadEntity parentInput) - throws HiveException { + private void getMetaData(QBExpr qbexpr, ReadEntity parentInput) throws HiveException { if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { getMetaData(qbexpr.getQB(), parentInput); } else { @@ -2115,9 +2084,7 @@ private void getMetaData(QBExpr qbexpr, ReadEntity parentInput) } } - @SuppressWarnings("nls") - private void getMetaData(QB qb, ReadEntity parentInput) - throws HiveException { + @SuppressWarnings("nls") private void getMetaData(QB qb, ReadEntity parentInput) throws HiveException { LOG.info("Get metadata for source tables"); // Go over the tables and populate the related structures. @@ -2129,8 +2096,7 @@ private void getMetaData(QB qb, ReadEntity parentInput) // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T // keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1. // This is needed for tracking the dependencies for inputs, along with their parents. - Map> aliasToViewInfo = - new HashMap>(); + Map> aliasToViewInfo = new HashMap>(); /* * used to capture view to SQ conversions. This is used to check for @@ -2148,8 +2114,7 @@ private void getMetaData(QB qb, ReadEntity parentInput) Table newTab = tab.makeCopy(); tab = newTab; } - if (tab == null || - tab.getDbName().equals(SessionState.get().getCurrentDatabase())) { + if (tab == null || tab.getDbName().equals(SessionState.get().getCurrentDatabase())) { Table materializedTab = ctx.getMaterializedTable(cteName); if (materializedTab == null) { // we first look for this alias from CTE, and then from catalog. @@ -2168,7 +2133,7 @@ private void getMetaData(QB qb, ReadEntity parentInput) } if (tab == null) { - if(tabName.equals(DUMMY_DATABASE + "." + DUMMY_TABLE)) { + if (tabName.equals(DUMMY_DATABASE + "." + DUMMY_TABLE)) { continue; } ASTNode src = qb.getParseInfo().getSrcForAlias(alias); @@ -2276,8 +2241,8 @@ private void getMetaData(QB qb, ReadEntity parentInput) switch (ast.getToken().getType()) { case HiveParser.TOK_TAB: { TableSpec ts = new TableSpec(db, conf, ast); - if (ts.tableHandle.isView() || - (mvRebuildMode == MaterializationRebuildMode.NONE && ts.tableHandle.isMaterializedView())) { + if (ts.tableHandle.isView() || (mvRebuildMode == MaterializationRebuildMode.NONE && ts.tableHandle + .isMaterializedView())) { throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg()); } @@ -2289,13 +2254,13 @@ private void getMetaData(QB qb, ReadEntity parentInput) ast, "The class is " + outputFormatClass.toString())); } - boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(ts.tableHandle.getDbName(), - ts.tableHandle.getTableName()); + boolean isTableWrittenTo = + qb.getParseInfo().isInsertIntoTable(ts.tableHandle.getDbName(), ts.tableHandle.getTableName()); isTableWrittenTo |= (qb.getParseInfo().getInsertOverwriteTables(). get(getUnescapedName((ASTNode) ast.getChild(0), ts.tableHandle.getDbName()).toLowerCase()) != null); assert isTableWrittenTo : - "Inconsistent data structure detected: we are writing to " + ts.tableHandle + " in " + - name + " but it's not in isInsertIntoTable() or getInsertOverwriteTables()"; + "Inconsistent data structure detected: we are writing to " + ts.tableHandle + " in " + name + + " but it's not in isInsertIntoTable() or getInsertOverwriteTables()"; // Disallow update and delete on non-acid tables boolean isFullAcid = AcidUtils.isFullAcidTable(ts.tableHandle); if ((updating(name) || deleting(name)) && !isFullAcid) { @@ -2396,7 +2361,7 @@ private void getMetaData(QB qb, ReadEntity parentInput) CreateTableDesc directoryDesc = new CreateTableDesc(); boolean directoryDescIsSet = false; int numCh = ast.getChildCount(); - for (int num = 1; num < numCh ; num++){ + for (int num = 1; num < numCh; num++) { ASTNode child = (ASTNode) ast.getChild(num); if (child != null) { if (storageFormat.fillStorageFormat(child)) { @@ -2415,7 +2380,7 @@ private void getMetaData(QB qb, ReadEntity parentInput) directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim); directoryDesc.setFieldEscape(rowFormatParams.fieldEscape); directoryDesc.setNullFormat(rowFormatParams.nullFormat); - directoryDescIsSet=true; + directoryDescIsSet = true; break; case HiveParser.TOK_TABLESERIALIZER: ASTNode serdeChild = (ASTNode) child.getChild(0); @@ -2430,14 +2395,13 @@ private void getMetaData(QB qb, ReadEntity parentInput) } } } - if (directoryDescIsSet){ + if (directoryDescIsSet) { qb.setDirectoryDesc(directoryDesc); } break; } default: - throw new SemanticException(generateErrorMessage(ast, - "Unknown Token Type " + ast.getToken().getType())); + throw new SemanticException(generateErrorMessage(ast, "Unknown Token Type " + ast.getToken().getType())); } } } @@ -2451,7 +2415,8 @@ private void getMetaData(QB qb, ReadEntity parentInput) private boolean isPathEncrypted(Path path) throws HiveException { try { - HadoopShims.HdfsEncryptionShim hdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim(path.getFileSystem(conf)); + HadoopShims.HdfsEncryptionShim hdfsEncryptionShim = + SessionState.get().getHdfsEncryptionShim(path.getFileSystem(conf)); if (hdfsEncryptionShim != null) { if (hdfsEncryptionShim.isPathEncrypted(path)) { return true; @@ -2591,13 +2556,12 @@ private Path getStagingDirectoryPathname(QB qb) throws HiveException { return stagingPath; } - private void replaceViewReferenceWithDefinition(QB qb, Table tab, - String tab_name, String alias) throws SemanticException { + private void replaceViewReferenceWithDefinition(QB qb, Table tab, String tab_name, String alias) + throws SemanticException { ASTNode viewTree; - final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(), - tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias( - alias)); + final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(), tab.getViewExpandedText(), alias, + qb.getParseInfo().getSrcForAlias(alias)); try { // Reparse text, passing null for context to avoid clobbering // the top-level token stream. @@ -2605,24 +2569,19 @@ private void replaceViewReferenceWithDefinition(QB qb, Table tab, String viewText = tab.getViewExpandedText(); TableMask viewMask = new TableMask(this, conf, false); viewTree = ParseUtils.parse(viewText, ctx, tab.getCompleteName()); - if (!unparseTranslator.isEnabled() && - (viewMask.isEnabled() && analyzeRewrite == null)) { - viewTree = rewriteASTWithMaskAndFilter(viewMask, viewTree, - ctx.getViewTokenRewriteStream(viewFullyQualifiedName), - ctx, db, tabNameToTabObject); + if (!unparseTranslator.isEnabled() && (viewMask.isEnabled() && analyzeRewrite == null)) { + viewTree = + rewriteASTWithMaskAndFilter(viewMask, viewTree, ctx.getViewTokenRewriteStream(viewFullyQualifiedName), ctx, + db, tabNameToTabObject); } SemanticDispatcher nodeOriginDispatcher = new SemanticDispatcher() { - @Override - public Object dispatch(Node nd, java.util.Stack stack, - Object... nodeOutputs) { + @Override public Object dispatch(Node nd, java.util.Stack stack, Object... nodeOutputs) { ((ASTNode) nd).setOrigin(viewOrigin); return null; } }; - SemanticGraphWalker nodeOriginTagger = new DefaultGraphWalker( - nodeOriginDispatcher); - nodeOriginTagger.startWalking(java.util.Collections - . singleton(viewTree), null); + SemanticGraphWalker nodeOriginTagger = new DefaultGraphWalker(nodeOriginDispatcher); + nodeOriginTagger.startWalking(java.util.Collections.singleton(viewTree), null); } catch (ParseException e) { // A user could encounter this if a stored view definition contains // an old SQL construct which has been eliminated in a later Hive @@ -2640,9 +2599,9 @@ public Object dispatch(Node nd, java.util.Stack stack, // if it is inside a view, skip checking; // if authorization flag is not enabled, skip checking. // if HIVE_STATS_COLLECT_SCANCOLS is enabled, check. - if ((!this.skipAuthorization() && !qb.isInsideView() && HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) - || HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) { + if ((!this.skipAuthorization() && !qb.isInsideView() && HiveConf + .getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) || HiveConf + .getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) { qb.rewriteViewToSubq(alias, tab_name, qbexpr, tab); } else { qb.rewriteViewToSubq(alias, tab_name, qbexpr, null); @@ -2676,7 +2635,7 @@ private String findAlias(ASTNode columnRef, String colName = unescapeIdentifier(columnRef.getChild(0).getText() .toLowerCase()); String tabAlias = null; - if ( aliasToOpInfo != null ) { + if (aliasToOpInfo != null) { for (Map.Entry opEntry : aliasToOpInfo.entrySet()) { Operator op = opEntry.getValue(); RowResolver rr = opParseCtx.get(op).getRowResolver(); @@ -2691,23 +2650,18 @@ private String findAlias(ASTNode columnRef, } } } - if ( tabAlias == null ) { - throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_TABLE_ALIAS.getMsg(), columnRef.getChild(0))); + if (tabAlias == null) { + throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(), columnRef.getChild(0))); } return tabAlias; } - @SuppressWarnings("nls") - void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn, - List leftAliases, List rightAliases, - List fields, - Map aliasToOpInfo) throws SemanticException { + @SuppressWarnings("nls") void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn, List leftAliases, + List rightAliases, List fields, Map aliasToOpInfo) throws SemanticException { // String[] allAliases = joinTree.getAllAliases(); switch (condn.getToken().getType()) { case HiveParser.TOK_TABLE_OR_COL: - String tableOrCol = unescapeIdentifier(condn.getChild(0).getText() - .toLowerCase()); + String tableOrCol = unescapeIdentifier(condn.getChild(0).getText().toLowerCase()); unparseTranslator.addIdentifierTranslation((ASTNode) condn.getChild(0)); if (isPresent(joinTree.getLeftAliases(), tableOrCol)) { if (!leftAliases.contains(tableOrCol)) { @@ -2723,7 +2677,7 @@ void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn, if (!leftAliases.contains(tableOrCol)) { leftAliases.add(tableOrCol); } - } else { + } else { if (!rightAliases.contains(tableOrCol)) { rightAliases.add(tableOrCol); } @@ -2771,16 +2725,16 @@ void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn, case HiveParser.TOK_FUNCTION: // check all the arguments for (int i = 1; i < condn.getChildCount(); i++) { - parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i), - leftAliases, rightAliases, null, aliasToOpInfo); + parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i), leftAliases, rightAliases, null, + aliasToOpInfo); } break; default: // This is an operator - so check whether it is unary or binary operator if (condn.getChildCount() == 1) { - parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), - leftAliases, rightAliases, null, aliasToOpInfo); + parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), leftAliases, rightAliases, null, + aliasToOpInfo); } else if (condn.getChildCount() == 2) { List fields1 = null; @@ -2811,8 +2765,7 @@ void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn, leftAliases, rightAliases, fields1, aliasToOpInfo); } } else { - throw new SemanticException(condn.toStringTree() + " encountered with " - + condn.getChildCount() + " children"); + throw new SemanticException(condn.toStringTree() + " encountered with " + condn.getChildCount() + " children"); } break; } @@ -2925,7 +2878,7 @@ void applyEqualityPredicateToQBJoinTree(QBJoinTree joinTree, if (leftTreeLeftSrc.size() == 1) { leftTree.setLeftAlias(leftTreeLeftSrc.get(0)); } - if ( pushedDown) { + if (pushedDown) { return; } } // leftTree != null @@ -3115,15 +3068,13 @@ private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, joinTree.addPostJoinFilter(joinCond); } else { if (!leftAliasNull) { - if (type.equals(JoinType.LEFTOUTER) - || type.equals(JoinType.FULLOUTER)) { + if (type.equals(JoinType.LEFTOUTER) || type.equals(JoinType.FULLOUTER)) { joinTree.getFilters().get(0).add(joinCond); } else { joinTree.getFiltersForPushing().get(0).add(joinCond); } } else { - if (type.equals(JoinType.RIGHTOUTER) - || type.equals(JoinType.FULLOUTER)) { + if (type.equals(JoinType.RIGHTOUTER) || type.equals(JoinType.FULLOUTER)) { joinTree.getFilters().get(1).add(joinCond); } else { joinTree.getFiltersForPushing().get(1).add(joinCond); @@ -3154,7 +3105,7 @@ private void extractJoinCondsFromWhereClause(QBJoinTree joinTree, ASTNode predic List leftCondAl2 = new ArrayList(); try { parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2, null, aliasToOpInfo); - } catch(SemanticException se) { + } catch (SemanticException se) { // suppress here; if it is a real issue will get caught in where clause handling. return; } @@ -3163,21 +3114,20 @@ private void extractJoinCondsFromWhereClause(QBJoinTree joinTree, ASTNode predic List rightCondAl1 = new ArrayList(); List rightCondAl2 = new ArrayList(); try { - parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, - rightCondAl2, null, aliasToOpInfo); - } catch(SemanticException se) { + parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1, rightCondAl2, null, aliasToOpInfo); + } catch (SemanticException se) { // suppress here; if it is a real issue will get caught in where clause handling. return; } - if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) - || ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) { + if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0)) || ((rightCondAl1.size() != 0) && ( + rightCondAl2.size() != 0))) { // this is not a join condition. return; } - if (((leftCondAl1.size() == 0) && (leftCondAl2.size() == 0)) - || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) { + if (((leftCondAl1.size() == 0) && (leftCondAl2.size() == 0)) || ((rightCondAl1.size() == 0) && ( + rightCondAl2.size() == 0))) { // this is not a join condition. Will get handled by predicate pushdown. return; } @@ -3249,7 +3199,7 @@ private Operator genHavingPlan(String dest, QB qb, Operator input, } protected ASTNode rewriteGroupingFunctionAST(final List grpByAstExprs, ASTNode targetNode, - final boolean noneSet) { + final boolean noneSet) { TreeVisitorAction action = new TreeVisitorAction() { @@ -3300,7 +3250,7 @@ public Object post(Object t) { // Create and add AST node with position of grouping function input // in group by clause ASTNode childN = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, - String.valueOf(IntMath.mod(-j-1, grpByAstExprs.size())) + "L"); + String.valueOf(IntMath.mod(-j - 1, grpByAstExprs.size())) + "L"); newRoot.addChild(childN); break; } @@ -3371,7 +3321,7 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, */ List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); - if ( subQueriesInOriginalTree.size() > 0 ) { + if (subQueriesInOriginalTree.size() > 0) { /* * Restriction.9.m :: disallow nested SubQuery expressions. @@ -3385,7 +3335,7 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, /* * Restriction.8.m :: We allow only 1 SubQuery expression per Query. */ - if (subQueriesInOriginalTree.size() > 1 ) { + if (subQueriesInOriginalTree.size() > 1) { throw new SemanticException(ASTErrorUtils.getMsg( ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(), @@ -3398,7 +3348,7 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); - for(int i=0; i < subQueries.size(); i++) { + for (int i = 0; i < subQueries.size(); i++) { ASTNode subQueryAST = subQueries.get(i); ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); @@ -3408,14 +3358,14 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, originalSubQueryAST, ctx); - if ( !forHavingClause ) { + if (!forHavingClause) { qb.setWhereClauseSubQueryPredicate(subQuery); } else { qb.setHavingClauseSubQueryPredicate(subQuery); } String havingInputAlias = null; - if ( forHavingClause ) { + if (forHavingClause) { havingInputAlias = "gby_sq" + sqIdx; aliasToOpInfo.put(havingInputAlias, input); } @@ -3445,7 +3395,7 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, * If this is a Not In SubQuery Predicate then Join in the Null Check SubQuery. * See QBSubQuery.NotInCheck for details on why and how this is constructed. */ - if ( subQuery.getNotInCheck() != null ) { + if (subQuery.getNotInCheck() != null) { QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck(); notInCheck.setSQRR(sqRR); QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true); @@ -3457,7 +3407,7 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, pushJoinFilters(qb, joinTree_nic, aliasToOpInfo, false); input = genJoinOperator(qbSQ_nic, joinTree_nic, aliasToOpInfo, input); inputRR = opParseCtx.get(input).getRowResolver(); - if ( forHavingClause ) { + if (forHavingClause) { aliasToOpInfo.put(havingInputAlias, input); } } @@ -3466,9 +3416,7 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, * Gen Join between outer Operator and SQ op */ subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); - QBJoinTree joinTree = genSQJoinTree(qb, subQuery, - input, - aliasToOpInfo); + QBJoinTree joinTree = genSQJoinTree(qb, subQuery, input, aliasToOpInfo); /* * push filters only for this QBJoinTree. Child QBJoinTrees have already been handled. */ @@ -3525,8 +3473,8 @@ private Operator genFilterPlan(QB qb, ASTNode condn, Operator input, boolean use * for inner joins push a 'is not null predicate' to the join sources for * every non nullSafe predicate. */ - private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, - QBJoinTree joinTree, ExprNodeDesc[] joinKeys) throws SemanticException { + private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, QBJoinTree joinTree, + ExprNodeDesc[] joinKeys) throws SemanticException { if (qb == null || joinTree == null) { return input; @@ -3541,13 +3489,13 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, } Multimap hashes = ArrayListMultimap.create(); if (input instanceof FilterOperator) { - ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc)input.getConf()).getPredicate()), hashes); + ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc) input.getConf()).getPredicate()), hashes); } ExprNodeDesc filterPred = null; List nullSafes = joinTree.getNullSafes(); for (int i = 0; i < joinKeys.length; i++) { - if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc && - ((ExprNodeColumnDesc)joinKeys[i]).getIsPartitionColOrVirtualCol())) { + if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc && ((ExprNodeColumnDesc) joinKeys[i]) + .getIsPartitionColOrVirtualCol())) { // no need to generate is not null predicate for partitioning or // virtual column, since those columns can never be null. continue; @@ -3611,11 +3559,10 @@ Integer genExprNodeDescRegex(String colRegex, String tabAlias, ASTNode sel, } @SuppressWarnings("nls") - // TODO: make aliases unique, otherwise needless rewriting takes place - Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, - List> colList, Set excludeCols, RowResolver input, - RowResolver colSrcRR, Integer pos, RowResolver output, List aliases, - boolean ensureUniqueCols) throws SemanticException { + // TODO: make aliases unique, otherwise needless rewriting takes place + Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, List> colList, + Set excludeCols, RowResolver input, RowResolver colSrcRR, Integer pos, RowResolver output, + List aliases, boolean ensureUniqueCols) throws SemanticException { if (colSrcRR == null) { colSrcRR = input; @@ -3720,7 +3667,8 @@ Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, // from the input schema for (Map.Entry entry : fMap.entrySet()) { ColumnInfo colInfo = entry.getValue(); - if (colSrcRR.getNamedJoinInfo() != null && colSrcRR.getNamedJoinInfo().getNamedColumns().contains(colInfo.getAlias())) { + if (colSrcRR.getNamedJoinInfo() != null && colSrcRR.getNamedJoinInfo().getNamedColumns() + .contains(colInfo.getAlias())) { // we already added this column in select list. continue; } @@ -4192,8 +4140,8 @@ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) return Pair.of(groupByExprs, groupingSets); } - private List getGroupingSets(List groupByExpr, QBParseInfo parseInfo, - String dest) throws SemanticException { + private List getGroupingSets(List groupByExpr, QBParseInfo parseInfo, String dest) + throws SemanticException { Map exprPos = new HashMap(); for (int i = 0; i < groupByExpr.size(); ++i) { ASTNode node = groupByExpr.get(i); @@ -4213,9 +4161,8 @@ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) String treeAsString = child.getChild(j).toStringTree(); Integer pos = exprPos.get(treeAsString); if (pos == null) { - throw new SemanticException( - generateErrorMessage((ASTNode) child.getChild(j), - ErrorMsg.HIVE_GROUPING_SETS_EXPR_NOT_IN_GROUPBY.getErrorCodedMsg())); + throw new SemanticException(generateErrorMessage((ASTNode) child.getChild(j), + ErrorMsg.HIVE_GROUPING_SETS_EXPR_NOT_IN_GROUPBY.getErrorCodedMsg())); } bitmap = unsetBit(bitmap, groupByExpr.size() - pos - 1); @@ -4402,8 +4349,7 @@ boolean isRegex(String pattern, HiveConf conf) { return false; } for (int i = 0; i < pattern.length(); i++) { - if (!Character.isLetterOrDigit(pattern.charAt(i)) - && pattern.charAt(i) != '_') { + if (!Character.isLetterOrDigit(pattern.charAt(i)) && pattern.charAt(i) != '_') { return true; } } @@ -4506,11 +4452,9 @@ boolean isRegex(String pattern, HiveConf conf) { break; case HiveParser.TOK_TABALIAS: assert (selExprChild.getChildCount() == 1); - udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0) - .getText()); + udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0).getText()); qb.addAlias(udtfTableAlias); - unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild - .getChild(0)); + unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild.getChild(0)); break; default: assert (false); @@ -4694,7 +4638,7 @@ private RowResolver getColForInsertStmtSpec(Map targetCol2 } for (int i = 0; i < targetTableColNames.size(); i++) { String f = targetTableColNames.get(i); - if(targetCol2Projection.containsKey(f)) { + if (targetCol2Projection.containsKey(f)) { //put existing column in new list to make sure it is in the right position newColList.add(targetCol2Projection.get(f)); ColumnInfo ci = targetCol2ColumnInfo.get(f); @@ -4703,9 +4647,9 @@ private RowResolver getColForInsertStmtSpec(Map targetCol2 } else { //add new 'synthetic' columns for projections not provided by Select - assert(colNameToDefaultVal != null); + assert (colNameToDefaultVal != null); ExprNodeDesc exp = null; - if(colNameToDefaultVal.containsKey(f)) { + if (colNameToDefaultVal.containsKey(f)) { // make an expression for default value String defaultValue = colNameToDefaultVal.get(f); ParseDriver parseDriver = new ParseDriver(); @@ -4754,11 +4698,11 @@ RowResolver handleInsertStatementSpec(List col_list, String dest, ASTNode selExprList) throws SemanticException { //(z,x) List targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);//specified in the query - if(targetTableSchema == null) { + if (targetTableSchema == null) { //no insert schema was specified return outputRR; } - if(targetTableSchema.size() != col_list.size()) { + if (targetTableSchema.size() != col_list.size()) { Table target = qb.getMetaData().getDestTableForAlias(dest); Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null; throw new SemanticException(generateErrorMessage(selExprList, @@ -4771,7 +4715,7 @@ RowResolver handleInsertStatementSpec(List col_list, String dest, //e.g. map z->ColumnInfo for a Map targetCol2ColumnInfo = new HashMap(); int colListPos = 0; - for(String targetCol : targetTableSchema) { + for (String targetCol : targetTableSchema) { targetCol2ColumnInfo.put(targetCol, outputRR.getColumnInfos().get(colListPos)); targetCol2Projection.put(targetCol, col_list.get(colListPos++)); } @@ -4786,15 +4730,15 @@ RowResolver handleInsertStatementSpec(List col_list, String dest, List targetTableCols = target != null ? target.getCols() : partition.getCols(); List targetTableColNames = new ArrayList(); List targetTableColTypes = new ArrayList(); - for(FieldSchema fs : targetTableCols) { + for (FieldSchema fs : targetTableCols) { targetTableColNames.add(fs.getName()); targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType())); } Map partSpec = qb.getMetaData().getPartSpecForAlias(dest); - if(partSpec != null) { + if (partSpec != null) { //find dynamic partition columns //relies on consistent order via LinkedHashMap - for(Map.Entry partKeyVal : partSpec.entrySet()) { + for (Map.Entry partKeyVal : partSpec.entrySet()) { if (partKeyVal.getValue() == null) { targetTableColNames.add(partKeyVal.getKey());//these must be after non-partition cols targetTableColTypes.add(TypeInfoFactory.stringTypeInfo); @@ -5169,22 +5113,18 @@ private void addGroupingSetKey(List groupByKeys, // This function is called for ReduceSink to add the additional grouping keys introduced by // GroupBy1 into the reduce keys. private void processGroupingSetReduceSinkOperator(RowResolver reduceSinkInputRowResolver, - RowResolver reduceSinkOutputRowResolver, - List reduceKeys, - List outputKeyColumnNames, - Map colExprMap) throws SemanticException { + RowResolver reduceSinkOutputRowResolver, List reduceKeys, List outputKeyColumnNames, + Map colExprMap) throws SemanticException { // add a key for reduce sink String groupingSetColumnName = reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); - ExprNodeDesc inputExpr = new ExprNodeColumnDesc(VirtualColumn.GROUPINGID.getTypeInfo(), - groupingSetColumnName, null, false); + ExprNodeDesc inputExpr = + new ExprNodeColumnDesc(VirtualColumn.GROUPINGID.getTypeInfo(), groupingSetColumnName, null, false); reduceKeys.add(inputExpr); outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); - String field = Utilities.ReduceField.KEY.toString() + "." - + getColumnInternalName(reduceKeys.size() - 1); - ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get( - reduceKeys.size() - 1).getTypeInfo(), null, true); + String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(reduceKeys.size() - 1).getTypeInfo(), null, true); reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo); colExprMap.put(colInfo.getInternalName(), inputExpr); } @@ -5231,8 +5171,7 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr); if (exprInfo == null) { - throw new SemanticException(ASTErrorUtils.getMsg( - ErrorMsg.INVALID_COLUMN.getMsg(), grpbyExpr)); + throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_COLUMN.getMsg(), grpbyExpr)); } groupByKeys.add(new ExprNodeColumnDesc(exprInfo)); @@ -5263,8 +5202,7 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, groupByOutputRowResolver, outputColumnNames, colExprMap); - } - else { + } else { // The grouping set has not yet been processed. Create a new grouping key // Consider the query: select a,b, count(1) from T group by a,b with cube; // where it is being executed in 2 map-reduce jobs @@ -5539,17 +5477,14 @@ private Operator genGroupByPlanMapGroupByOperator(QB qb, GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator( aggName, aggParameters, value, isDistinct, isAllColumns); assert (genericUDAFEvaluator != null); - GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, - aggParameters); - aggregations.add(new AggregationDesc(aggName.toLowerCase(), - udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, - amode)); - String field = getColumnInternalName(groupByKeys.size() - + aggregations.size() - 1); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); + aggregations.add( + new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, + amode)); + String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); outputColumnNames.add(field); if (groupByOutputRowResolver.getExpression(value) == null) { - groupByOutputRowResolver.putExpression(value, new ColumnInfo( - field, udaf.returnType, "", false)); + groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false)); } // Save the evaluator so that it can be used by the next-stage // GroupByOperators @@ -5597,8 +5532,7 @@ private ReduceSinkOperator genGroupByPlanReduceSinkOperator(QB qb, boolean mapAggrDone, boolean groupingSetsPresent) throws SemanticException { - RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo) - .getRowResolver(); + RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver(); QBParseInfo parseInfo = qb.getParseInfo(); RowResolver reduceSinkOutputRowResolver = new RowResolver(); reduceSinkOutputRowResolver.setIsExprResolver(true); @@ -5630,16 +5564,16 @@ private ReduceSinkOperator genGroupByPlanReduceSinkOperator(QB qb, } } - List> distinctColIndices = getDistinctColIndicesForReduceSink(parseInfo, dest, - reduceKeys, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, - colExprMap); + List> distinctColIndices = + getDistinctColIndicesForReduceSink(parseInfo, dest, reduceKeys, reduceSinkInputRowResolver, + reduceSinkOutputRowResolver, outputKeyColumnNames, colExprMap); List reduceValues = new ArrayList(); Map aggregationTrees = parseInfo.getAggregationExprsForClause(dest); if (!mapAggrDone) { - getReduceValuesForReduceSinkNoMapAgg(parseInfo, dest, reduceSinkInputRowResolver, - reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues, colExprMap); + getReduceValuesForReduceSinkNoMapAgg(parseInfo, dest, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, + outputValueColumnNames, reduceValues, colExprMap); } else { // Put partial aggregation results in reduceValues int inputField = reduceKeys.size() + numOfColsRmedFromkey; @@ -6187,8 +6121,7 @@ private Operator genGroupByPlan1ReduceMultiGBY(List dests, QB qb, Operat List whereExpressions = new ArrayList(); for (String dest : dests) { - Pair, List> grpByExprsGroupingSets = - getGroupByGroupingSetsForClause(parseInfo, dest); + Pair, List> grpByExprsGroupingSets = getGroupByGroupingSetsForClause(parseInfo, dest); List groupingSets = grpByExprsGroupingSets.getRight(); if (!groupingSets.isEmpty()) { @@ -6540,8 +6473,7 @@ private Operator genGroupByPlanMapAggrNoSkew(String dest, QB qb, groupingSets, groupingSetsPresent && !groupingSetsNeedAdditionalMRJob); - groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( - inputOperatorInfo).getRowResolver().getTableNames()); + groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get(inputOperatorInfo).getRowResolver().getTableNames()); int numReducers = -1; // Optimize the scenario when there are no grouping keys - only 1 reducer is @@ -6581,13 +6513,9 @@ private Operator genGroupByPlanMapAggrNoSkew(String dest, QB qb, // invoked on the reducer. In case of non-distincts, partial results are // used, and merge is invoked // on the reducer. - return genGroupByPlanGroupByOperator1(parseInfo, dest, - reduceSinkOperatorInfo, GroupByDesc.Mode.MERGEPARTIAL, - genericUDAFEvaluators, - groupingSets, groupingSetsPresent, groupingSetsNeedAdditionalMRJob); - } - else - { + return genGroupByPlanGroupByOperator1(parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.MERGEPARTIAL, + genericUDAFEvaluators, groupingSets, groupingSetsPresent, groupingSetsNeedAdditionalMRJob); + } else { // Add 'n' rows corresponding to the grouping sets. For each row, create 'n' rows, // one for each grouping set key. Since map-side aggregation has already been performed, // the number of rows would have been reduced. Moreover, the rows corresponding to the @@ -6724,10 +6652,9 @@ private Operator genGroupByPlanMapAggr2MR(String dest, QB qb, groupingSetsPresent); // ////// Generate GroupbyOperator for a partial aggregation - Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, - dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS, - genericUDAFEvaluators, - groupingSets, groupingSetsPresent, false); + Operator groupByOperatorInfo2 = + genGroupByPlanGroupByOperator1(parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS, + genericUDAFEvaluators, groupingSets, groupingSetsPresent, false); int numReducers = -1; if (grpByExprs.isEmpty()) { @@ -6759,13 +6686,12 @@ private Operator genGroupByPlanMapAggr2MR(String dest, QB qb, true, groupingSetsPresent); - return genGroupByPlanGroupByOperator2MR(parseInfo, dest, - reduceSinkOperatorInfo, genericUDAFEvaluators, false); + return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo, genericUDAFEvaluators, false); } } private int getReducersBucketing(int totalFiles, int maxReducers) { - int numFiles = (int)Math.ceil((double)totalFiles / (double)maxReducers); + int numFiles = (int) Math.ceil((double) totalFiles / (double) maxReducers); while (true) { if (totalFiles % numFiles == 0) { return totalFiles / numFiles; @@ -6874,21 +6800,20 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, partnCols = getPartitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, false); } } else { - if(updating(dest) || deleting(dest)) { + if (updating(dest) || deleting(dest)) { partnCols = getPartitionColsFromBucketColsForUpdateDelete(input, true); enforceBucketing = true; } } - if ((dest_tab.getSortCols() != null) && - (dest_tab.getSortCols().size() > 0)) { + if ((dest_tab.getSortCols() != null) && (dest_tab.getSortCols().size() > 0)) { sortCols = getSortCols(dest, qb, dest_tab, table_desc, input); sortOrders = getSortOrders(dest_tab); if (!enforceBucketing) { throw new SemanticException(ErrorMsg.TBL_SORTED_NOT_BUCKETED.getErrorCodedMsg(dest_tab.getCompleteName())); } - } else if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SORT_WHEN_BUCKETING) && - enforceBucketing && !updating(dest) && !deleting(dest)) { + } else if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SORT_WHEN_BUCKETING) && enforceBucketing && !updating( + dest) && !deleting(dest)) { sortCols = new ArrayList<>(); for (ExprNodeDesc expr : partnCols) { sortCols.add(expr.clone()); @@ -6900,8 +6825,8 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, } if (enforceBucketing) { - Operation acidOp = AcidUtils.isFullAcidTable(dest_tab) ? getAcidType(table_desc.getOutputFileFormatClass(), - dest, AcidUtils.isInsertOnlyTable(dest_tab)) : Operation.NOT_ACID; + Operation acidOp = AcidUtils.isFullAcidTable(dest_tab) ? getAcidType(table_desc.getOutputFileFormatClass(), dest, + AcidUtils.isInsertOnlyTable(dest_tab)) : Operation.NOT_ACID; int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS); if (conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS) > 0) { maxReducers = conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS); @@ -6916,14 +6841,12 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, totalFiles = numBuckets; if (totalFiles % maxReducers == 0) { numFiles = totalFiles / maxReducers; - } - else { + } else { // find the number of reducers such that it is a divisor of totalFiles maxReducers = getReducersBucketing(totalFiles, maxReducers); numFiles = totalFiles / maxReducers; } - } - else if (acidOp == Operation.NOT_ACID || acidOp == Operation.INSERT) { + } else if (acidOp == Operation.NOT_ACID || acidOp == Operation.INSERT) { maxReducers = numBuckets; } @@ -7062,8 +6985,8 @@ private Operator genMaterializedViewDataOrgPlan(List sortColInfos, L rsRR.addMappingOnly(nm2[0], nm2[1], colInfo); } } - Operator result = putOpInsertMap(OperatorFactory.getAndMakeChild( - rsConf, new RowSchema(rsSignature), input), rsRR); + Operator result = + putOpInsertMap(OperatorFactory.getAndMakeChild(rsConf, new RowSchema(rsSignature), input), rsRR); result.setColumnExprMap(colExprMap); // Create SEL operator @@ -7084,10 +7007,12 @@ private Operator genMaterializedViewDataOrgPlan(List sortColInfos, L String colName = colInfo.getInternalName(); ExprNodeDesc exprNodeDesc; if (keys.contains(colName)) { - exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.KEY.toString() + "." + colName, null, false); + exprNodeDesc = + new ExprNodeColumnDesc(colInfo.getType(), ReduceField.KEY.toString() + "." + colName, null, false); columnExprs.add(exprNodeDesc); } else { - exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.VALUE.toString() + "." + colName, null, false); + exprNodeDesc = + new ExprNodeColumnDesc(colInfo.getType(), ReduceField.VALUE.toString() + "." + colName, null, false); columnExprs.add(exprNodeDesc); } colNames.add(colName); @@ -7104,18 +7029,18 @@ private void setStatsForNonNativeTable(String dbName, String tableName) throws S TableName qTableName = HiveTableName.ofNullable(tableName, dbName); Map mapProp = new HashMap<>(); mapProp.put(StatsSetupConst.COLUMN_STATS_ACCURATE, null); - AlterTableUnsetPropertiesDesc alterTblDesc = new AlterTableUnsetPropertiesDesc(qTableName, null, null, false, - mapProp, false, null); + AlterTableUnsetPropertiesDesc alterTblDesc = + new AlterTableUnsetPropertiesDesc(qTableName, null, null, false, mapProp, false, null); this.rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc))); } private boolean mergeCardinalityViolationBranch(final Operator input) { - if(input instanceof SelectOperator) { - SelectOperator selectOp = (SelectOperator)input; - if(selectOp.getConf().getColList().size() == 1) { + if (input instanceof SelectOperator) { + SelectOperator selectOp = (SelectOperator) input; + if (selectOp.getConf().getColList().size() == 1) { ExprNodeDesc colExpr = selectOp.getConf().getColList().get(0); - if(colExpr instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc)colExpr ; + if (colExpr instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) colExpr; return func.getGenericUDF() instanceof GenericUDFCardinalityViolation; } } @@ -7235,8 +7160,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) dpCtx = qbm.getDPCtx(dest); if (dpCtx == null) { destinationTable.validatePartColumnNames(partSpec, false); - dpCtx = new DynamicPartitionCtx(partSpec, - conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), + dpCtx = new DynamicPartitionCtx(partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE)); qbm.setDPCtx(dest, dpCtx); } @@ -7489,7 +7413,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES .getMsg(destinationTable.getTableName() + "@" + destinationPartition.getName())); } - break; + break; } case QBMetaData.DEST_LOCAL_FILE: isLocal = true; @@ -7651,7 +7575,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) Class serdeClass = LazySimpleSerDe.class; if (fileFormat.equals(PlanUtils.LLAP_OUTPUT_FORMAT_KEY)) { boolean useArrow = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_OUTPUT_FORMAT_ARROW); - if(useArrow) { + if (useArrow) { serdeClass = ArrowColumnarBatchSerDe.class; } else { serdeClass = LazyBinarySerDe2.class; @@ -7753,7 +7677,6 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throw new SemanticException("Unknown destination type: " + destType); } - inputRR = opParseCtx.get(input).getRowResolver(); List vecCol = new ArrayList(); @@ -8039,8 +7962,7 @@ private FileSinkDesc createFileSinkDesc(String dest, TableDesc table_desc, //INSERT [OVERWRITE] path String destTableFullName = dest_tab.getCompleteName().replace('@', '.'); Map iowMap = qb.getParseInfo().getInsertOverwriteTables(); - if (iowMap.containsKey(destTableFullName) && - qb.getParseInfo().isDestToOpTypeInsertOverwrite(dest)) { + if (iowMap.containsKey(destTableFullName) && qb.getParseInfo().isDestToOpTypeInsertOverwrite(dest)) { isInsertOverwrite = true; } break; @@ -8061,9 +7983,9 @@ private FileSinkDesc createFileSinkDesc(String dest, TableDesc table_desc, fileSinkDesc.setHiveServerQuery(isHiveServerQuery); // If this is an insert, update, or delete on an ACID table then mark that so the // FileSinkOperator knows how to properly write to it. - boolean isDestInsertOnly = (dest_part != null && dest_part.getTable() != null && - AcidUtils.isInsertOnlyTable(dest_part.getTable().getParameters())) - || (table_desc != null && AcidUtils.isInsertOnlyTable(table_desc.getProperties())); + boolean isDestInsertOnly = (dest_part != null && dest_part.getTable() != null && AcidUtils + .isInsertOnlyTable(dest_part.getTable().getParameters())) || (table_desc != null && AcidUtils + .isInsertOnlyTable(table_desc.getProperties())); if (isDestInsertOnly) { fileSinkDesc.setWriteType(Operation.INSERT); @@ -8071,8 +7993,8 @@ private FileSinkDesc createFileSinkDesc(String dest, TableDesc table_desc, } if (destTableIsAcid) { - AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : - (deleting(dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT); + AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : (deleting( + dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT); fileSinkDesc.setWriteType(wt); acidFileSinks.add(fileSinkDesc); } @@ -8228,11 +8150,9 @@ private void checkImmutableTable(QB qb, Table dest_tab, Path dest_path, boolean } try { FileSystem fs = dest_path.getFileSystem(conf); - if (! org.apache.hadoop.hive.metastore.utils.FileUtils.isDirEmpty(fs,dest_path)){ - LOG.warn("Attempted write into an immutable table : " - + dest_tab.getTableName() + " : " + dest_path); - throw new SemanticException( - ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName())); + if (!org.apache.hadoop.hive.metastore.utils.FileUtils.isDirEmpty(fs, dest_path)) { + LOG.warn("Attempted write into an immutable table : " + dest_tab.getTableName() + " : " + dest_path); + throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName())); } } catch (IOException ioe) { LOG.warn("Error while trying to determine if immutable table " @@ -8249,21 +8169,20 @@ private DynamicPartitionCtx checkDynPart(QB qb, QBMetaData qbm, Table dest_tab, return null; // table is not partitioned } if (partSpec == null || partSpec.size() == 0) { // user did NOT specify partition - throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), - ErrorMsg.NEED_PARTITION_ERROR.getMsg())); + throw new SemanticException( + generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg())); } DynamicPartitionCtx dpCtx = qbm.getDPCtx(dest); if (dpCtx == null) { dest_tab.validatePartColumnNames(partSpec, false); - dpCtx = new DynamicPartitionCtx(partSpec, - conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), + dpCtx = new DynamicPartitionCtx(partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE)); qbm.setDPCtx(dest, dpCtx); } if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP - throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), - ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg())); + throw new SemanticException( + generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg())); } if ((dest_tab.getNumBuckets() > 0)) { dpCtx.setNumBuckets(dest_tab.getNumBuckets()); @@ -8273,18 +8192,17 @@ private DynamicPartitionCtx checkDynPart(QB qb, QBMetaData qbm, Table dest_tab, private void createPreInsertDesc(Table table, boolean overwrite) { PreInsertTableDesc preInsertTableDesc = new PreInsertTableDesc(table, overwrite); - this.rootTasks - .add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), preInsertTableDesc))); + this.rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), preInsertTableDesc))); } - private void genAutoColumnStatsGatheringPipeline(Table table, Map partSpec, Operator curr, boolean isInsertInto, boolean useTableValueConstructor) throws SemanticException { LOG.info("Generate an operator pipeline to autogather column stats for table " + table.getTableName() + " in query " + ctx.getCmd()); ColumnStatsAutoGatherContext columnStatsAutoGatherContext = null; - columnStatsAutoGatherContext = new ColumnStatsAutoGatherContext(this, conf, curr, table, partSpec, isInsertInto, ctx); + columnStatsAutoGatherContext = + new ColumnStatsAutoGatherContext(this, conf, curr, table, partSpec, isInsertInto, ctx); if (useTableValueConstructor) { // Table does not exist, use table value constructor to simulate columnStatsAutoGatherContext.insertTableValuesAnalyzePipeline(); @@ -8411,30 +8329,28 @@ private Operator genConversionSelectOperator(String dest, QB qb, Operator input, // create ExprNodeDesc ColumnInfo inputColumn = rowFields.get(dpColIdx + rowFieldsOffset); TypeInfo inputTypeInfo = inputColumn.getType(); - ExprNodeDesc column = - new ExprNodeColumnDesc(inputTypeInfo, inputColumn.getInternalName(), "", true); + ExprNodeDesc column = new ExprNodeColumnDesc(inputTypeInfo, inputColumn.getInternalName(), "", true); // Cast input column to destination column type if necessary. if (conf.getBoolVar(DYNAMICPARTITIONCONVERT)) { if (parts != null && !parts.isEmpty()) { String destPartitionName = dpCtx.getDPColNames().get(dpColIdx); - FieldSchema destPartitionFieldSchema = parts.stream() - .filter(dynamicPartition -> dynamicPartition.getName().equals(destPartitionName)) - .findFirst().orElse(null); + FieldSchema destPartitionFieldSchema = + parts.stream().filter(dynamicPartition -> dynamicPartition.getName().equals(destPartitionName)) + .findFirst().orElse(null); if (destPartitionFieldSchema == null) { - throw new IllegalStateException("Partition schema for dynamic partition " + - destPartitionName + " not found in DynamicPartitionCtx."); + throw new IllegalStateException("Partition schema for dynamic partition " + destPartitionName + + " not found in DynamicPartitionCtx."); } String partitionType = destPartitionFieldSchema.getType(); if (partitionType == null) { - throw new IllegalStateException("Couldn't get FieldSchema for partition" + - destPartitionFieldSchema.getName()); + throw new IllegalStateException( + "Couldn't get FieldSchema for partition" + destPartitionFieldSchema.getName()); } - PrimitiveTypeInfo partitionTypeInfo = - TypeInfoFactory.getPrimitiveTypeInfo(partitionType); + PrimitiveTypeInfo partitionTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(partitionType); if (!partitionTypeInfo.equals(inputTypeInfo)) { - column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() - .createConversionCast(column, partitionTypeInfo); + column = + ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, partitionTypeInfo); converted = true; } } else { @@ -8937,11 +8853,11 @@ private Operator genReduceSinkPlan(Operator input, List partiti new RowSchema(rsRR.getColumnInfos()), input), rsRR); List keyColNames = rsdesc.getOutputKeyColumnNames(); - for (int i = 0 ; i < keyColNames.size(); i++) { + for (int i = 0; i < keyColNames.size(); i++) { colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), sortCols.get(i)); } List valueColNames = rsdesc.getOutputValueColumnNames(); - for (int i = 0 ; i < valueColNames.size(); i++) { + for (int i = 0; i < valueColNames.size(); i++) { colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), valueCols.get(i)); } interim.setColumnExprMap(colExprMap); @@ -9252,14 +9168,13 @@ private Operator genJoinReduceSinkChild(ExprNodeDesc[] joinKeys, reduceKeys.size(), numReds, AcidUtils.Operation.NOT_ACID, defaultNullOrder); ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( - OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputRR.getColumnInfos()), - child), outputRR); + OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputRR.getColumnInfos()), child), outputRR); List keyColNames = rsDesc.getOutputKeyColumnNames(); - for (int i = 0 ; i < keyColNames.size(); i++) { + for (int i = 0; i < keyColNames.size(); i++) { colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i)); } List valColNames = rsDesc.getOutputValueColumnNames(); - for (int i = 0 ; i < valColNames.size(); i++) { + for (int i = 0; i < valColNames.size(); i++) { colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i)); } @@ -9279,7 +9194,7 @@ private Operator genJoinOperator(QB qb, QBJoinTree joinTree, joinSrcOp = genJoinOperator(qb, leftChild, map, null); } - if ( joinSrcOp != null ) { + if (joinSrcOp != null) { List filter = joinTree.getFiltersForPushing().get(0); for (ASTNode cond : filter) { joinSrcOp = genFilterPlan(qb, cond, joinSrcOp, false); @@ -9325,7 +9240,7 @@ private Operator genJoinOperator(QB qb, QBJoinTree joinTree, for (int i = 0; i < srcOps.length; i++) { // generate a ReduceSink operator for the join - String[] srcs = baseSrc[i] != null ? new String[] {baseSrc[i]} : joinTree.getLeftAliases(); + String[] srcs = baseSrc[i] != null ? new String[] { baseSrc[i] } : joinTree.getLeftAliases(); if (!isCBOExecuted()) { srcOps[i] = genNotNullFilterForJoinSourcePlan(qb, srcOps[i], joinTree, joinKeys[i]); } @@ -9350,7 +9265,7 @@ private Operator genJoinOperator(QB qb, QBJoinTree joinTree, // Safety check for postconditions throw new SemanticException("Post-filtering conditions should have been added to the JOIN operator"); } - for(ASTNode condn : joinTree.getPostJoinFilters()) { + for (ASTNode condn : joinTree.getPostJoinFilters()) { topOp = genFilterPlan(qb, condn, topOp, false); } } @@ -9519,10 +9434,9 @@ private Operator genMapGroupByForSemijoin(List fields, Operator inpu } // Add implicit type conversion if necessary for (int i = 0; i < keys.length; i++) { - if (TypeInfoUtils.isConversionRequiredForComparison( - keys[i][k].getTypeInfo(), commonType)) { + if (TypeInfoUtils.isConversionRequiredForComparison(keys[i][k].getTypeInfo(), commonType)) { keys[i][k] = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() - .createConversionCast(keys[i][k], (PrimitiveTypeInfo)commonType); + .createConversionCast(keys[i][k], (PrimitiveTypeInfo) commonType); } else { // For the case no implicit type conversion, e.g., varchar(5) and varchar(10), // pick the common type for all the keys since during run-time, same key type is assumed. @@ -9578,25 +9492,22 @@ private void pushJoinFilters(QB qb, QBJoinTree joinTree, private List getMapSideJoinTables(QB qb) { List cols = new ArrayList(); - ASTNode hints = qb.getParseInfo().getHints(); for (int pos = 0; pos < hints.getChildCount(); pos++) { ASTNode hint = (ASTNode) hints.getChild(pos); if (((ASTNode) hint.getChild(0)).getToken().getType() == HintParser.TOK_MAPJOIN) { // the user has specified to ignore mapjoin hint - if (!conf.getBoolVar(HiveConf.ConfVars.HIVEIGNOREMAPJOINHINT) - && !conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { + if (!conf.getBoolVar(HiveConf.ConfVars.HIVEIGNOREMAPJOINHINT) && !conf + .getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { ASTNode hintTblNames = (ASTNode) hint.getChild(1); int numCh = hintTblNames.getChildCount(); for (int tblPos = 0; tblPos < numCh; tblPos++) { - String tblName = ((ASTNode) hintTblNames.getChild(tblPos)).getText() - .toLowerCase(); + String tblName = ((ASTNode) hintTblNames.getChild(tblPos)).getText().toLowerCase(); if (!cols.contains(tblName)) { cols.add(tblName); } } - } - else { + } else { queryProperties.setMapJoinRemoved(true); } } @@ -9642,9 +9553,8 @@ private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree, String tableName = getUnescapedUnqualifiedTableName((ASTNode) child.getChild(0)); - String alias = child.getChildCount() == 1 ? tableName - : unescapeIdentifier(child.getChild(child.getChildCount() - 1) - .getText().toLowerCase()); + String alias = child.getChildCount() == 1 ? tableName : unescapeIdentifier( + child.getChild(child.getChildCount() - 1).getText().toLowerCase()); if (i == 0) { leftAliases.add(alias); @@ -9902,8 +9812,7 @@ private QBJoinTree genJoinTree(QB qb, ASTNode joinParseTree, children[0] = alias; joinTree.setBaseSrc(children); joinTree.setId(qb.getId()); - joinTree.getAliasToOpInfo().put( - getModifiedAlias(qb, alias), aliasToOpInfo.get(alias)); + joinTree.getAliasToOpInfo().put(getModifiedAlias(qb, alias), aliasToOpInfo.get(alias)); } else if (isJoinLeftToken) { QBJoinTree leftTree = genJoinTree(qb, left, aliasToOpInfo); joinTree.setJoinSrc(leftTree); @@ -9930,8 +9839,7 @@ private QBJoinTree genJoinTree(QB qb, ASTNode joinParseTree, children[1] = alias; joinTree.setBaseSrc(children); joinTree.setId(qb.getId()); - joinTree.getAliasToOpInfo().put( - getModifiedAlias(qb, alias), aliasToOpInfo.get(alias)); + joinTree.getAliasToOpInfo().put(getModifiedAlias(qb, alias), aliasToOpInfo.get(alias)); // remember rhs table for semijoin if (!joinTree.getNoSemiJoin()) { joinTree.addRHSSemijoin(alias); @@ -10075,7 +9983,7 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { } } int curIdx = 0; - while(curIdx < args.getChildCount()) { + while (curIdx < args.getChildCount()) { curIdx = parseSingleSemiJoinHint(args, curIdx, result); } } @@ -10191,7 +10099,7 @@ private void mergeJoins(QBJoinTree node, QBJoinTree target, int pos, int[] tgtTo for (int i = 0; i < nodeRightAliases.length; i++) { List nodeConds = node.getExpressions().get(i + 1); List reordereNodeConds = new ArrayList(); - for(int k=0; k < tgtToNodeExprMap.length; k++) { + for (int k = 0; k < tgtToNodeExprMap.length; k++) { reordereNodeConds.add(nodeConds.get(tgtToNodeExprMap[k])); } expr.add(reordereNodeConds); @@ -10253,10 +10161,10 @@ private void mergeJoins(QBJoinTree node, QBJoinTree target, int pos, int[] tgtTo * - if many: add as a filter from merging trees. */ - for(ASTNode nodeFilter : node.getFiltersForPushing().get(0) ) { + for (ASTNode nodeFilter : node.getFiltersForPushing().get(0)) { int fPos = ParseUtils.checkJoinFilterRefersOneAlias(target.getBaseSrc(), nodeFilter); - if ( fPos != - 1 ) { + if (fPos != -1) { filter.get(fPos).add(nodeFilter); } else { target.addPostJoinFilter(nodeFilter); @@ -10304,7 +10212,7 @@ private void mergeJoins(QBJoinTree node, QBJoinTree target, int pos, int[] tgtTo if (node.getPostJoinFilters().size() != 0) { // Safety check: if we are merging join operators and there are post-filtering // conditions, they cannot be outer joins - assert node.getNoOuterJoin() ; + assert node.getNoOuterJoin(); assert target.getPostJoinFilters().size() == 0 || target.getNoOuterJoin(); for (ASTNode exprPostFilter : node.getPostJoinFilters()) { target.addPostJoinFilter(exprPostFilter); @@ -10336,7 +10244,7 @@ private void mergeJoins(QBJoinTree node, QBJoinTree target, int pos, int[] tgtTo } } - if ( targetCondn == null || (nodeCondn.size() != targetCondn.size())) { + if (targetCondn == null || (nodeCondn.size() != targetCondn.size())) { return Pair.of(-1, null); } @@ -10350,22 +10258,22 @@ private void mergeJoins(QBJoinTree node, QBJoinTree target, int pos, int[] tgtTo int[] tgtToNodeExprMap = new int[targetCondn.size()]; boolean[] nodeFiltersMapped = new boolean[nodeCondn.size()]; int i, j; - for(i=0; i inner most(n) List trees = new ArrayList(); - for (;tree != null; tree = tree.getJoinSrc()) { + for (; tree != null; tree = tree.getJoinSrc()) { trees.add(tree); } @@ -10440,7 +10348,7 @@ private void mergeJoinTree(QB qb) { if (prevType != null && prevType != currType) { break; } - if(!shouldMerge(node, target)) { + if (!shouldMerge(node, target)) { // Outer joins with post-filtering conditions cannot be merged break; } @@ -10554,8 +10462,7 @@ private Operator genSelectAllDesc(Operator input) { return commonGroupByDestGroups; } - List> inputOperators = - new ArrayList>(ks.size()); + List> inputOperators = new ArrayList>(ks.size()); // We will try to combine multiple clauses into a smaller number with compatible keys. List> newSprayKeyLists = new ArrayList>(ks.size()); List> newDistinctKeyLists = new ArrayList>(ks.size()); @@ -10742,11 +10649,9 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT // There is no group by in that representative query) or // The data is skewed or // The conf variable used to control combining group bys into a single reducer is false - if (commonGroupByDestGroup.size() == 1 || - (qbp.getAggregationExprsForClause(firstDest).size() == 0 && - getGroupByForClause(qbp, firstDest).size() == 0) || - conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) || - !conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) { + if (commonGroupByDestGroup.size() == 1 || (qbp.getAggregationExprsForClause(firstDest).size() == 0 + && getGroupByForClause(qbp, firstDest).size() == 0) || conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) + || !conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) { // Go over all the destination tables for (String dest : commonGroupByDestGroup) { @@ -10759,13 +10664,12 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT // Preserve operator before the GBY - we'll use it to resolve '*' Operator gbySource = curr; - if ((qbp.getAggregationExprsForClause(dest).size() != 0 - || getGroupByForClause(qbp, dest).size() > 0) - && (qbp.getSelForClause(dest).getToken().getType() != HiveParser.TOK_SELECTDI - || qbp.getWindowingExprsForClause(dest) == null)) { + if ((qbp.getAggregationExprsForClause(dest).size() != 0 || getGroupByForClause(qbp, dest).size() > 0) && ( + qbp.getSelForClause(dest).getToken().getType() != HiveParser.TOK_SELECTDI + || qbp.getWindowingExprsForClause(dest) == null)) { // multiple distincts is not supported with skew in data - if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) && - qbp.getDistinctFuncExprsForClause(dest).size() > 1) { + if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) + && qbp.getDistinctFuncExprsForClause(dest).size() > 1) { throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS. getMsg()); } @@ -10780,8 +10684,8 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match // SEL%SEL% rule. ASTNode selExprList = qbp.getSelForClause(dest); - if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI - && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { + if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI && selExprList.getChildCount() == 1 + && selExprList.getChild(0).getChildCount() == 1) { ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { curr = genSelectPlan(dest, qb, curr, curr); @@ -10802,8 +10706,9 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT } } if (LOG.isDebugEnabled()) { - LOG.debug("RR before GB " + opParseCtx.get(gbySource).getRowResolver() - + " after GB " + opParseCtx.get(curr).getRowResolver()); + LOG.debug( + "RR before GB " + opParseCtx.get(gbySource).getRowResolver() + " after GB " + opParseCtx.get(curr) + .getRowResolver()); } curr = genPostGroupByBodyPlan(curr, dest, qb, aliasToOpInfo, gbySource); @@ -10820,9 +10725,8 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT } private Map> createInputForDests(QB qb, - Operator input, Set dests) throws SemanticException { - Map> inputs = - new HashMap>(); + Operator input, Set dests) throws SemanticException { + Map> inputs = new HashMap>(); for (String dest : dests) { inputs.put(dest, genLateralViewPlanForDest(dest, qb, input)); } @@ -10843,7 +10747,7 @@ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb, curr = genHavingPlan(dest, qb, curr, aliasToOpInfo); } - if(queryProperties.hasWindowing() && qb.getWindowingSpec(dest) != null) { + if (queryProperties.hasWindowing() && qb.getWindowingSpec(dest) != null) { curr = genWindowingPlan(qb, qb.getWindowingSpec(dest), curr); // GBy for DISTINCT after windowing if ((qbp.getAggregationExprsForClause(dest).size() != 0 @@ -10907,7 +10811,6 @@ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb, curr = genReduceSinkPlan(dest, qb, curr, numReducers, hasOrderBy); } - if (qbp.getIsSubQ()) { if (limit != null) { // In case of order by, only 1 reducer is used, so no need of @@ -11013,20 +10916,18 @@ private Operator genUnionPlan(String unionalias, String leftalias, leftOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator)leftOp).isIdentitySelect()) ) { - if(!(leftOp instanceof UnionOperator)) { + if (!(leftOp instanceof UnionOperator)) { Operator oldChild = leftOp; leftOp = (Operator) leftOp.getParentOperators().get(0); leftOp.removeChildAndAdoptItsChildren(oldChild); } // make left a child of right - List> child = - new ArrayList>(); + List> child = new ArrayList>(); child.add(leftOp); rightOp.setChildOperators(child); - List> parent = leftOp - .getParentOperators(); + List> parent = leftOp.getParentOperators(); parent.add(rightOp); UnionDesc uDesc = ((UnionOperator) leftOp).getConf(); @@ -11041,20 +10942,18 @@ private Operator genUnionPlan(String unionalias, String leftalias, rightOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator)rightOp).isIdentitySelect()) ) { - if(!(rightOp instanceof UnionOperator)) { + if (!(rightOp instanceof UnionOperator)) { Operator oldChild = rightOp; rightOp = (Operator) rightOp.getParentOperators().get(0); rightOp.removeChildAndAdoptItsChildren(oldChild); } // make right a child of left - List> child = - new ArrayList>(); + List> child = new ArrayList>(); child.add(rightOp); leftOp.setChildOperators(child); - List> parent = rightOp - .getParentOperators(); + List> parent = rightOp.getParentOperators(); parent.add(leftOp); UnionDesc uDesc = ((UnionOperator) rightOp).getConf(); uDesc.setNumInputs(uDesc.getNumInputs() + 1); @@ -11228,8 +11127,7 @@ protected String getAliasId(String alias, QB qb) { return (qb.getId() == null ? alias : qb.getId() + ":" + alias).toLowerCase(); } - @SuppressWarnings("nls") - private Operator genTablePlan(String alias, QB qb) throws SemanticException { + @SuppressWarnings("nls") private Operator genTablePlan(String alias, QB qb) throws SemanticException { String alias_id = getAliasId(alias, qb); Table tab = qb.getMetaData().getSrcForAlias(alias); @@ -11385,26 +11283,21 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { // input pruning is enough; add the filter for the optimizer to use it // later LOG.info("No need for sample filter"); - ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, - colsEqual, alias, rwsch, null, - tab.getBucketingVersion()); - FilterDesc filterDesc = new FilterDesc( - samplePredicate, true, new SampleDesc(ts.getNumerator(), - ts.getDenominator(), tabBucketCols, true)); + ExprNodeDesc samplePredicate = + genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, null, tab.getBucketingVersion()); + FilterDesc filterDesc = new FilterDesc(samplePredicate, true, + new SampleDesc(ts.getNumerator(), ts.getDenominator(), tabBucketCols, true)); filterDesc.setGenerated(true); - op = OperatorFactory.getAndMakeChild(filterDesc, - new RowSchema(rwsch.getColumnInfos()), top); + op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top); } else { // need to add filter // create tableOp to be filterDesc and set as child to 'top' LOG.info("Need sample filter"); - ExprNodeDesc samplePredicate = genSamplePredicate(ts, tabBucketCols, - colsEqual, alias, rwsch, null, - tab.getBucketingVersion()); + ExprNodeDesc samplePredicate = + genSamplePredicate(ts, tabBucketCols, colsEqual, alias, rwsch, null, tab.getBucketingVersion()); FilterDesc filterDesc = new FilterDesc(samplePredicate, true); filterDesc.setGenerated(true); - op = OperatorFactory.getAndMakeChild(filterDesc, - new RowSchema(rwsch.getColumnInfos()), top); + op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top); } } else { boolean testMode = conf.getBoolVar(ConfVars.HIVETESTMODE); @@ -11430,15 +11323,12 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { TableSample tsSample = new TableSample(1, numBuckets); tsSample.setInputPruning(true); qb.getParseInfo().setTabSample(alias, tsSample); - ExprNodeDesc samplePred = genSamplePredicate(tsSample, tab - .getBucketCols(), true, alias, rwsch, null, - tab.getBucketingVersion()); + ExprNodeDesc samplePred = + genSamplePredicate(tsSample, tab.getBucketCols(), true, alias, rwsch, null, tab.getBucketingVersion()); FilterDesc filterDesc = new FilterDesc(samplePred, true, - new SampleDesc(tsSample.getNumerator(), tsSample - .getDenominator(), tab.getBucketCols(), true)); + new SampleDesc(tsSample.getNumerator(), tsSample.getDenominator(), tab.getBucketCols(), true)); filterDesc.setGenerated(true); - op = OperatorFactory.getAndMakeChild(filterDesc, - new RowSchema(rwsch.getColumnInfos()), top); + op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top); LOG.info("No need for sample filter"); } else { // The table is not bucketed, add a dummy filter :: rand() @@ -11448,14 +11338,12 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { qb.getParseInfo().setTabSample(alias, tsSample); LOG.info("Need sample filter"); ExprNodeDesc randFunc = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() - .getFuncExprNodeDesc("rand", - new ExprNodeConstantDesc(Integer.valueOf(460476415))); - ExprNodeDesc samplePred = genSamplePredicate(tsSample, null, false, - alias, rwsch, randFunc, tab.getBucketingVersion()); + .getFuncExprNodeDesc("rand", new ExprNodeConstantDesc(Integer.valueOf(460476415))); + ExprNodeDesc samplePred = + genSamplePredicate(tsSample, null, false, alias, rwsch, randFunc, tab.getBucketingVersion()); FilterDesc filterDesc = new FilterDesc(samplePred, true); filterDesc.setGenerated(true); - op = OperatorFactory.getAndMakeChild(filterDesc, - new RowSchema(rwsch.getColumnInfos()), top); + op = OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(rwsch.getColumnInfos()), top); } } } @@ -11563,8 +11451,8 @@ private Operator genPlan(QB parent, QBExpr qbexpr) throws SemanticException { Operator qbexpr1Ops = genPlan(parent, qbexpr.getQBExpr1()); Operator qbexpr2Ops = genPlan(parent, qbexpr.getQBExpr2()); - return genUnionPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(), - qbexpr1Ops, qbexpr.getQBExpr2().getAlias(), qbexpr2Ops); + return genUnionPlan(qbexpr.getAlias(), qbexpr.getQBExpr1().getAlias(), qbexpr1Ops, qbexpr.getQBExpr2().getAlias(), + qbexpr2Ops); } return null; } @@ -11608,7 +11496,7 @@ private Operator genPlan(QB qb, boolean skipAmbiguityCheck) // Recurse over all the source tables for (String alias : qb.getTabAliases()) { - if(alias.equals(DUMMY_TABLE)) { + if (alias.equals(DUMMY_TABLE)) { continue; } Operator op = genTablePlan(alias, qb); @@ -11627,24 +11515,23 @@ private Operator genPlan(QB qb, boolean skipAmbiguityCheck) Operator srcOpInfo = null; Operator lastPTFOp = null; - if(queryProperties.hasPTF()){ + if (queryProperties.hasPTF()) { //After processing subqueries and source tables, process // partitioned table functions Map ptfNodeToSpec = qb.getPTFNodeToSpec(); - if ( ptfNodeToSpec != null ) { - for(Entry entry : ptfNodeToSpec.entrySet()) { + if (ptfNodeToSpec != null) { + for (Entry entry : ptfNodeToSpec.entrySet()) { ASTNode ast = entry.getKey(); PTFInvocationSpec spec = entry.getValue(); String inputAlias = spec.getQueryInputName(); Operator inOp = aliasToOpInfo.get(inputAlias); - if ( inOp == null ) { - throw new SemanticException(generateErrorMessage(ast, - "Cannot resolve input Operator for PTF invocation")); + if (inOp == null) { + throw new SemanticException(generateErrorMessage(ast, "Cannot resolve input Operator for PTF invocation")); } lastPTFOp = genPTFPlan(spec, inOp); String ptfAlias = spec.getFunction().getAlias(); - if ( ptfAlias != null ) { + if (ptfAlias != null) { aliasToOpInfo.put(ptfAlias, lastPTFOp); } } @@ -11656,7 +11543,6 @@ private Operator genPlan(QB qb, boolean skipAmbiguityCheck) // appropriate operators to the TS genLateralViewPlans(aliasToOpInfo, qb); - // process join if (qb.getParseInfo().getJoinExpr() != null) { ASTNode joinExpr = qb.getParseInfo().getJoinExpr(); @@ -11672,13 +11558,11 @@ private Operator genPlan(QB qb, boolean skipAmbiguityCheck) * as Join conditions */ Set dests = qb.getParseInfo().getClauseNames(); - if ( dests.size() == 1 && joinTree.getNoOuterJoin()) { + if (dests.size() == 1 && joinTree.getNoOuterJoin()) { String dest = dests.iterator().next(); ASTNode whereClause = qb.getParseInfo().getWhrForClause(dest); - if ( whereClause != null ) { - extractJoinCondsFromWhereClause(joinTree, - (ASTNode) whereClause.getChild(0), - aliasToOpInfo ); + if (whereClause != null) { + extractJoinCondsFromWhereClause(joinTree, (ASTNode) whereClause.getChild(0), aliasToOpInfo); } } @@ -11853,8 +11737,7 @@ private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) int allColumns = allPathRR.getColumnInfos().size(); // Get the UDTF Path QB blankQb = new QB(null, null, false); - Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree - .getChild(0), blankQb, lvForward, null, + Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree.getChild(0), blankQb, lvForward, null, lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER); // add udtf aliases to QB for (String udtfAlias : blankQb.getAliases()) { @@ -11870,7 +11753,6 @@ private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) RowResolver lateralViewRR = new RowResolver(); List outputInternalColNames = new ArrayList(); - // For PPD, we need a column to expression map so that during the walk, // the processor knows how to transform the internal col names. // Following steps are dependant on the fact that we called @@ -12135,7 +12017,8 @@ private void walkASTMarkTABREF(TableMask tableMask, ASTNode ast, Set cte } basicInfos.put(new HivePrivilegeObject(table.getDbName(), table.getTableName(), colNames), - new MaskAndFilterInfo(colTypes, additionalTabInfo.toString(), alias, astNode, table.isView(), table.isNonNative())); + new MaskAndFilterInfo(colTypes, additionalTabInfo.toString(), alias, astNode, table.isView(), + table.isNonNative())); } } if (astNode.getChildCount() > 0 && !IGNORED_TOKENS.contains(astNode.getToken().getType())) { @@ -12274,25 +12157,23 @@ boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws Sema viewsExpanded.add(createVwDesc.getViewName()); } - switch(ast.getToken().getType()) { + switch (ast.getToken().getType()) { case HiveParser.TOK_SET_AUTOCOMMIT: assert ast.getChildCount() == 1; - if(ast.getChild(0).getType() == HiveParser.TOK_TRUE) { + if (ast.getChild(0).getType() == HiveParser.TOK_TRUE) { setAutoCommitValue(true); - } - else if(ast.getChild(0).getType() == HiveParser.TOK_FALSE) { + } else if (ast.getChild(0).getType() == HiveParser.TOK_FALSE) { setAutoCommitValue(false); - } - else { + } else { assert false : "Unexpected child of TOK_SET_AUTOCOMMIT: " + ast.getChild(0).getType(); } //fall through case HiveParser.TOK_START_TRANSACTION: case HiveParser.TOK_COMMIT: case HiveParser.TOK_ROLLBACK: - if(!(conf.getBoolVar(ConfVars.HIVE_IN_TEST) || conf.getBoolVar(ConfVars.HIVE_IN_TEZ_TEST))) { - throw new IllegalStateException(HiveOperation.operationForToken(ast.getToken().getType()) + - " is not supported yet."); + if (!(conf.getBoolVar(ConfVars.HIVE_IN_TEST) || conf.getBoolVar(ConfVars.HIVE_IN_TEZ_TEST))) { + throw new IllegalStateException( + HiveOperation.operationForToken(ast.getToken().getType()) + " is not supported yet."); } queryState.setCommandType(HiveOperation.operationForToken(ast.getToken().getType())); return false; @@ -12308,9 +12189,55 @@ else if(ast.getChild(0).getType() == HiveParser.TOK_FALSE) { // if phase1Result false return return false; } + + // 5. Set write id for HMS client + if (getTxnMgr().supportsAcid() && conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY) == null) { + + ValidTxnWriteIdList txnWriteIds = null; + + if (conf.get(ValidTxnWriteIdList.COMPACTOR_VALID_TABLES_WRITEIDS_KEY) != null) { + txnWriteIds = new ValidTxnWriteIdList(conf.getLong(ValidTxnList.COMPACTOR_VALID_TXNS_ID_KEY, 0)); + txnWriteIds.addTableValidWriteIdList( + new ValidReaderWriteIdList(conf.get(ValidTxnWriteIdList.COMPACTOR_VALID_TABLES_WRITEIDS_KEY))); + } else { + List tabNames = new ArrayList<>(); + for (String tabName : collectTables(qb)) { + String fullName = TableName + .fromString(tabName, SessionState.get().getCurrentCatalog(), SessionState.get().getCurrentDatabase()) + .getDbTable(); + tabNames.add(fullName); + } + + if (!tabNames.isEmpty()) { + String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); + + try { + if ((txnString == null) || (txnString.isEmpty())) { + txnString = getTxnMgr().getValidTxns().toString(); + conf.set(ValidTxnList.VALID_TXNS_KEY, txnString); + } + + txnWriteIds = getTxnMgr().getValidWriteIds(tabNames, txnString); + } catch (LockException e) { + throw new SemanticException("Failed to fetch write Id from TxnManager", e); + } + } + } + + if (txnWriteIds != null) { + conf.set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY, txnWriteIds.toString()); + try { + db.getMSC().setValidWriteIdList(txnWriteIds.toString()); + Hive.get().getMSC().setValidWriteIdList(txnWriteIds.toString()); + } catch (HiveException | MetaException e) { + throw new SemanticException("Failed to set write Id for HMS client", e); + } + } + } + LOG.info("Completed phase 1 of Semantic Analysis"); - // 5. Resolve Parse Tree + // 6. Resolve Parse Tree // Materialization is allowed if it is not a view definition getMetaData(qb, createVwDesc == null); LOG.info("Completed getting MetaData in Semantic Analysis"); @@ -12318,6 +12245,42 @@ else if(ast.getChild(0).getType() == HiveParser.TOK_FALSE) { return true; } + private Set collectTables(QBExpr qbExpr) { + Set result = new HashSet<>(); + if (qbExpr.getQB() != null) { + result.addAll(collectTables(qbExpr.getQB())); + } else { + if (qbExpr.getQBExpr1() != null) { + result.addAll(collectTables(qbExpr.getQBExpr1())); + } + if (qbExpr.getQBExpr2() != null) { + result.addAll(collectTables(qbExpr.getQBExpr2())); + } + } + return result; + } + + private Set collectTables(QB qb) { + Set result = new HashSet<>(); + for (String alias : qb.getTabAliases()) { + result.add(qb.getTabNameForAlias(alias)); + } + for (String alias : qb.getSubqAliases()) { + QBExpr qbExpr = qb.getSubqForAlias(alias); + if (qbExpr.getQB() != null) { + result.addAll(collectTables(qbExpr.getQB())); + } else { + if (qbExpr.getQBExpr1() != null) { + result.addAll(collectTables(qbExpr.getQBExpr1())); + } + if (qbExpr.getQBExpr2() != null) { + result.addAll(collectTables(qbExpr.getQBExpr2())); + } + } + } + return result; + } + void getHintsFromQB(QB qb, List hints) { if (qb.getParseInfo().getHints() != null) { hints.add(qb.getParseInfo().getHints()); @@ -12456,11 +12419,10 @@ void analyzeInternal(ASTNode ast, Supplier pcf) throws SemanticE Operator sinkOp = genOPTree(ast, plannerCtx); boolean usesMasking = false; - if (!unparseTranslator.isEnabled() && - (tableMask.isEnabled() && analyzeRewrite == null)) { + if (!unparseTranslator.isEnabled() && (tableMask.isEnabled() && analyzeRewrite == null)) { // Here we rewrite the * and also the masking table - ASTNode rewrittenAST = rewriteASTWithMaskAndFilter(tableMask, astForMasking, ctx.getTokenRewriteStream(), - ctx, db, tabNameToTabObject); + ASTNode rewrittenAST = rewriteASTWithMaskAndFilter(tableMask, astForMasking, ctx.getTokenRewriteStream(), ctx, db, + tabNameToTabObject); if (astForMasking != rewrittenAST) { usesMasking = true; plannerCtx = pcf.get(); @@ -12564,8 +12526,7 @@ void analyzeInternal(ASTNode ast, Supplier pcf) throws SemanticE pCtx = t.transform(pCtx); } // we just use view name as location. - queryState.getLineageState() - .mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp); + queryState.getLineageState().mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp); } return; } @@ -12650,8 +12611,7 @@ void analyzeInternal(ASTNode ast, Supplier pcf) throws SemanticE QueryResultsCache.QueryInfo queryInfo = createCacheQueryInfoForQuery(lookupInfo); // Specify that the results of this query can be cached. - setCacheUsage(new CacheUsage( - CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS, queryInfo)); + setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS, queryInfo)); } } } @@ -12660,7 +12620,7 @@ void analyzeInternal(ASTNode ast, Supplier pcf) throws SemanticE private void putAccessedColumnsToReadEntity(Set inputs, ColumnAccessInfo columnAccessInfo) { Map> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap(); if (tableToColumnAccessMap != null && !tableToColumnAccessMap.isEmpty()) { - for(ReadEntity entity: inputs) { + for (ReadEntity entity : inputs) { List cols; switch (entity.getType()) { case TABLE: @@ -12797,14 +12757,12 @@ protected void saveViewDefinition() throws SemanticException { // descriptor. List partColNames = createVwDesc.getPartColNames(); if (partColNames.size() > derivedSchema.size()) { - throw new SemanticException( - ErrorMsg.VIEW_PARTITION_MISMATCH.getMsg()); + throw new SemanticException(ErrorMsg.VIEW_PARTITION_MISMATCH.getMsg()); } // Get the partition columns from the end of derivedSchema. - List partitionColumns = derivedSchema.subList( - derivedSchema.size() - partColNames.size(), - derivedSchema.size()); + List partitionColumns = + derivedSchema.subList(derivedSchema.size() - partColNames.size(), derivedSchema.size()); // Verify that the names match the PARTITIONED ON clause. Iterator colNameIter = partColNames.iterator(); @@ -12884,8 +12842,7 @@ protected void saveViewDefinition() throws SemanticException { /** * Generates an expression node descriptor for the expression with TypeCheckCtx. */ - public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) - throws SemanticException { + public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) throws SemanticException { // Since the user didn't supply a customized type-checking context, // use default settings. return genExprNodeDesc(expr, input, true, false); @@ -12970,17 +12927,14 @@ private ExprNodeDesc getExprNodeDescCached(ASTNode expr, RowResolver input) // Create the walker and the rules dispatcher. tcCtx.setUnparseTranslator(unparseTranslator); - Map nodeOutputs = - ExprNodeTypeCheck.genExprNode(expr, tcCtx); + Map nodeOutputs = ExprNodeTypeCheck.genExprNode(expr, tcCtx); ExprNodeDesc desc = nodeOutputs.get(expr); if (desc == null) { - String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr - .getChild(0).getText()); + String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()); ColumnInfo colInfo = input.get(null, tableOrCol); String errMsg; - if (colInfo == null && input.getIsExprResolver()){ - errMsg = ASTErrorUtils.getMsg( - ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(), expr); + if (colInfo == null && input.getIsExprResolver()) { + errMsg = ASTErrorUtils.getMsg(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(), expr); } else { errMsg = tcCtx.getError(); } @@ -13101,15 +13055,13 @@ public void validate() throws SemanticException { // Do not check for ACID; it does not create new parts and this is expensive as hell. // TODO: add an API to get table name list for archived parts with a single call; // nobody uses this so we could skip the whole thing. - conflictingArchive = ArchiveUtils - .conflictingArchiveNameOrNull(db, tbl, usedp.getSpec()); + conflictingArchive = ArchiveUtils.conflictingArchiveNameOrNull(db, tbl, usedp.getSpec()); } } catch (HiveException e) { throw new SemanticException(e); } if (conflictingArchive != null) { - String message = String.format("Insert conflict with existing archive: %s", - conflictingArchive); + String message = String.format("Insert conflict with existing archive: %s", conflictingArchive); throw new SemanticException(message); } } else if (type == WriteEntity.Type.TABLE) { @@ -13123,15 +13075,13 @@ public void validate() throws SemanticException { } } - if (type != WriteEntity.Type.TABLE && - type != WriteEntity.Type.PARTITION) { + if (type != WriteEntity.Type.TABLE && type != WriteEntity.Type.PARTITION) { LOG.debug("not validating writeEntity, because entity is neither table nor partition"); continue; } } - boolean reworkMapredWork = HiveConf.getBoolVar(this.conf, - HiveConf.ConfVars.HIVE_REWORK_MAPREDWORK); + boolean reworkMapredWork = HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_REWORK_MAPREDWORK); // validate all tasks for (Task rootTask : rootTasks) { @@ -13139,8 +13089,7 @@ public void validate() throws SemanticException { } } - private void validate(Task task, boolean reworkMapredWork) - throws SemanticException { + private void validate(Task task, boolean reworkMapredWork) throws SemanticException { Utilities.reworkMapRedWork(task, reworkMapredWork, conf); if (task.getChildTasks() == null) { return; @@ -13208,11 +13157,10 @@ private void updateDefaultTblProps(Map source, Map source, Map convertToAcidByDefault( - StorageFormat storageFormat, String qualifiedTableName, List sortCols, - Map retValue) { + private Map convertToAcidByDefault(StorageFormat storageFormat, String qualifiedTableName, + List sortCols, Map retValue) { /*for CTAS, TransactionalValidationListener.makeAcid() runs to late to make table Acid so the initial write ends up running as non-acid...*/ try { - Class inputFormatClass = storageFormat.getInputFormat() == null ? null : - Class.forName(storageFormat.getInputFormat()); - Class outputFormatClass = storageFormat.getOutputFormat() == null ? null : - Class.forName(storageFormat.getOutputFormat()); - if (inputFormatClass == null || outputFormatClass == null || - !AcidInputFormat.class.isAssignableFrom(inputFormatClass) || - !AcidOutputFormat.class.isAssignableFrom(outputFormatClass)) { + Class inputFormatClass = + storageFormat.getInputFormat() == null ? null : Class.forName(storageFormat.getInputFormat()); + Class outputFormatClass = + storageFormat.getOutputFormat() == null ? null : Class.forName(storageFormat.getOutputFormat()); + if (inputFormatClass == null || outputFormatClass == null || !AcidInputFormat.class + .isAssignableFrom(inputFormatClass) || !AcidOutputFormat.class.isAssignableFrom(outputFormatClass)) { return retValue; } } catch (ClassNotFoundException e) { - LOG.warn("Could not verify InputFormat=" + storageFormat.getInputFormat() + " or OutputFormat=" + - storageFormat.getOutputFormat() + " for " + qualifiedTableName); + LOG.warn("Could not verify InputFormat=" + storageFormat.getInputFormat() + " or OutputFormat=" + storageFormat + .getOutputFormat() + " for " + qualifiedTableName); return retValue; } if (sortCols != null && !sortCols.isEmpty()) { @@ -13267,27 +13213,27 @@ private void updateDefaultTblProps(Map source, Map partCols, final List defConstraints, - final List notNullConstraints, - final List checkConstraints) { - for(FieldSchema partFS: partCols) { - for(SQLDefaultConstraint dc:defConstraints) { - if(dc.getColumn_name().equals(partFS.getName())) { + final List notNullConstraints, final List checkConstraints) { + for (FieldSchema partFS : partCols) { + for (SQLDefaultConstraint dc : defConstraints) { + if (dc.getColumn_name().equals(partFS.getName())) { return true; } } - for(SQLCheckConstraint cc:checkConstraints) { - if(cc.getColumn_name().equals(partFS.getName())) { + for (SQLCheckConstraint cc : checkConstraints) { + if (cc.getColumn_name().equals(partFS.getName())) { return true; } } - for(SQLNotNullConstraint nc:notNullConstraints) { - if(nc.getColumn_name().equals(partFS.getName()) && nc.isEnable_cstr()) { + for (SQLNotNullConstraint nc : notNullConstraints) { + if (nc.getColumn_name().equals(partFS.getName()) && nc.isEnable_cstr()) { return true; } } } return false; } + /** * Analyze the create table command. If it is a regular create-table or * create-table-like statements, we create a DDLWork and return true. If it is @@ -13310,8 +13256,8 @@ ASTNode analyzeCreateTable( List foreignKeys = new ArrayList(); List uniqueConstraints = new ArrayList<>(); List notNullConstraints = new ArrayList<>(); - List defaultConstraints= new ArrayList<>(); - List checkConstraints= new ArrayList<>(); + List defaultConstraints = new ArrayList<>(); + List checkConstraints = new ArrayList<>(); List sortCols = new ArrayList(); int numBuckets = -1; String comment = null; @@ -13372,12 +13318,10 @@ ASTNode analyzeCreateTable( likeTableName = getUnescapedName((ASTNode) child.getChild(0)); if (likeTableName != null) { if (command_type == CTAS) { - throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE - .getMsg()); + throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); } if (cols.size() != 0) { - throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE - .getMsg()); + throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE.getMsg()); } } command_type = CTLT; @@ -13454,8 +13398,7 @@ ASTNode analyzeCreateTable( child = (ASTNode) child.getChild(0); storageFormat.setSerde(unescapeSQLString(child.getChild(0).getText())); if (child.getChildCount() == 2) { - readProps((ASTNode) (child.getChild(1).getChild(0)), - storageFormat.getSerdeProps()); + readProps((ASTNode) (child.getChild(1).getChild(0)), storageFormat.getSerdeProps()); } break; case HiveParser.TOK_TABLESKEWED: @@ -13565,7 +13508,7 @@ ASTNode analyzeCreateTable( } tblProps = validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, isTransactional); - addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, + addDbAndTabToOutputs(new String[] { qualifiedTabName.getDb(), qualifiedTabName.getTable() }, TableType.MANAGED_TABLE, false, tblProps); CreateTableDesc crtTranTblDesc = @@ -13629,7 +13572,8 @@ ASTNode analyzeCreateTable( // dumpTable is only used to check the conflict for non-temporary tables try { Table dumpTable = db.newTable(dbDotTab); - if (null != db.getTable(dumpTable.getDbName(), dumpTable.getTableName(), false) && !ctx.isExplainSkipExecution()) { + if (null != db.getTable(dumpTable.getDbName(), dumpTable.getTableName(), false) && !ctx + .isExplainSkipExecution()) { throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(dbDotTab)); } } catch (HiveException e) { @@ -13643,12 +13587,12 @@ ASTNode analyzeCreateTable( FileStatus locStats = null; try { curFs = locPath.getFileSystem(conf); - if(curFs != null) { + if (curFs != null) { locStats = curFs.getFileStatus(locPath); } if (locStats != null && locStats.isDir()) { FileStatus[] lStats = curFs.listStatus(locPath); - if(lStats != null && lStats.length != 0) { + if (lStats != null && lStats.length != 0) { // Don't throw an exception if the target location only contains the staging-dirs for (FileStatus lStat : lStats) { if (!lStat.getPath().getName().startsWith(HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR))) { @@ -13661,7 +13605,7 @@ ASTNode analyzeCreateTable( //we will create the folder if it does not exist. } catch (IOException ioE) { if (LOG.isDebugEnabled()) { - LOG.debug("Exception when validate folder ",ioE); + LOG.debug("Exception when validate folder ", ioE); } } @@ -13671,9 +13615,9 @@ ASTNode analyzeCreateTable( "Partition columns can only declared using their names in CTAS statements"); } - tblProps = validateAndAddDefaultProperties( - tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, isTransactional); - addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, + tblProps = validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, + isTemporary, isTransactional); + addDbAndTabToOutputs(new String[] { qualifiedTabName.getDb(), qualifiedTabName.getTable() }, TableType.MANAGED_TABLE, isTemporary, tblProps); tableDesc = new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, partColNames, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, @@ -14008,7 +13952,7 @@ void processPositionAlias(ASTNode ast) throws SemanticException { while (!stack.isEmpty()) { ASTNode next = stack.pop(); - if (next.getChildCount() == 0) { + if (next.getChildCount() == 0) { continue; } @@ -14085,14 +14029,12 @@ void processPositionAlias(ASTNode ast) throws SemanticException { if (pos > 0 && pos <= selectExpCnt && selectNode.getChild(pos - 1).getChildCount() > 0) { colNode.setChild(0, selectNode.getChild(pos - 1).getChild(0)); } else { - throw new SemanticException( - ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg( - "Position alias: " + pos + " does not exist\n" + - "The Select List is indexed from 1 to " + selectExpCnt)); + throw new SemanticException(ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg( + "Position alias: " + pos + " does not exist\n" + "The Select List is indexed from 1 to " + + selectExpCnt)); } } else { - throw new SemanticException( - ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg()); + throw new SemanticException(ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg()); } } else { //if not using position alias and it is a number. warn("Using constant number " + node.getText() + @@ -14105,7 +14047,7 @@ void processPositionAlias(ASTNode ast) throws SemanticException { ArrayList childrenList = next.getChildren(); for (int i = childrenList.size() - 1; i >= 0; i--) { - stack.push((ASTNode)childrenList.get(i)); + stack.push((ASTNode) childrenList.get(i)); } } } @@ -14115,7 +14057,7 @@ void processPositionAlias(ASTNode ast) throws SemanticException { * @param tree * @throws SemanticException */ - protected void processNoScanCommand (ASTNode tree) throws SemanticException { + protected void processNoScanCommand(ASTNode tree) throws SemanticException { // check if it is noscan command checkNoScan(tree); @@ -14182,7 +14124,7 @@ void setQB(QB qb) { this.qb = qb; } -//--------------------------- PTF handling ----------------------------------- + //--------------------------- PTF handling ----------------------------------- /* * - a partitionTableFunctionSource can be a tableReference, a SubQuery or another @@ -14196,8 +14138,7 @@ private PTFInputSpec processPTFSource(QB qb, ASTNode inputNode) throws SemanticE PTFInputSpec qInSpec = null; int type = inputNode.getType(); String alias; - switch(type) - { + switch (type) { case HiveParser.TOK_TABREF: alias = processTable(qb, inputNode); qInSpec = new PTFQueryInputSpec(); @@ -14229,8 +14170,7 @@ private PTFInputSpec processPTFSource(QB qb, ASTNode inputNode) throws SemanticE * - a partitionTableFunctionSource can be a tableReference, a SubQuery or another * PTF invocation. */ - private PartitionedTableFunctionSpec processPTFChain(QB qb, ASTNode ptf) - throws SemanticException{ + private PartitionedTableFunctionSpec processPTFChain(QB qb, ASTNode ptf) throws SemanticException { int child_count = ptf.getChildCount(); if (child_count < 2) { throw new SemanticException(generateErrorMessage(ptf, @@ -14252,7 +14192,7 @@ private PartitionedTableFunctionSpec processPTFChain(QB qb, ASTNode ptf) * alias */ ASTNode secondChild = (ASTNode) ptf.getChild(1); - if ( secondChild.getType() == HiveParser.Identifier ) { + if (secondChild.getType() == HiveParser.Identifier) { ptfSpec.setAlias(secondChild.getText()); inputIdx++; } @@ -14271,8 +14211,7 @@ private PartitionedTableFunctionSpec processPTFChain(QB qb, ASTNode ptf) int pSpecIdx = inputIdx + 1; ASTNode pSpecNode = ptf.getChildCount() > inputIdx ? (ASTNode) ptf.getChild(pSpecIdx) : null; - if (pSpecNode != null && pSpecNode.getType() == HiveParser.TOK_PARTITIONINGSPEC) - { + if (pSpecNode != null && pSpecNode.getType() == HiveParser.TOK_PARTITIONINGSPEC) { PartitioningSpec partitioning = processPTFPartitionSpec(pSpecNode); ptfSpec.setPartitioning(partitioning); argStartIdx++; @@ -14281,8 +14220,7 @@ private PartitionedTableFunctionSpec processPTFChain(QB qb, ASTNode ptf) /* * arguments */ - for(int i=argStartIdx; i < ptf.getChildCount(); i++) - { + for (int i = argStartIdx; i < ptf.getChildCount(); i++) { ptfSpec.addArg((ASTNode) ptf.getChild(i)); } return ptfSpec; @@ -14309,14 +14247,14 @@ private void processPTF(QB qb, ASTNode ptf) throws SemanticException{ private void handleQueryWindowClauses(QB qb, Phase1Ctx ctx_1, ASTNode node) throws SemanticException { WindowingSpec spec = qb.getWindowingSpec(ctx_1.dest); - for(Node child : node.getChildren()) { + for (Node child : node.getChildren()) { processQueryWindowClause(spec, (ASTNode) child); } } private PartitionSpec processPartitionSpec(ASTNode node) { PartitionSpec pSpec = new PartitionSpec(); - for(Node child : node.getChildren()) { + for (Node child : node.getChildren()) { PartitionExpression exprSpec = new PartitionExpression(); exprSpec.setExpression((ASTNode) child); pSpec.addExpression(exprSpec); @@ -14327,18 +14265,17 @@ private PartitionSpec processPartitionSpec(ASTNode node) { private OrderSpec processOrderSpec(ASTNode sortNode) { OrderSpec oSpec = new OrderSpec(); int exprCnt = sortNode.getChildCount(); - for(int i=0; i < exprCnt; i++) { + for (int i = 0; i < exprCnt; i++) { OrderExpression exprSpec = new OrderExpression(); ASTNode orderSpec = (ASTNode) sortNode.getChild(i); ASTNode nullOrderSpec = (ASTNode) orderSpec.getChild(0); exprSpec.setExpression((ASTNode) nullOrderSpec.getChild(0)); - if ( orderSpec.getType() == HiveParser.TOK_TABSORTCOLNAMEASC ) { + if (orderSpec.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { exprSpec.setOrder(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.ASC); - } - else { + } else { exprSpec.setOrder(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC); } - if ( nullOrderSpec.getType() == HiveParser.TOK_NULLS_FIRST ) { + if (nullOrderSpec.getType() == HiveParser.TOK_NULLS_FIRST) { exprSpec.setNullOrder(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder.NULLS_FIRST); } else { exprSpec.setNullOrder(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder.NULLS_LAST); @@ -14348,35 +14285,30 @@ private OrderSpec processOrderSpec(ASTNode sortNode) { return oSpec; } - private PartitioningSpec processPTFPartitionSpec(ASTNode pSpecNode) - { + private PartitioningSpec processPTFPartitionSpec(ASTNode pSpecNode) { PartitioningSpec partitioning = new PartitioningSpec(); ASTNode firstChild = (ASTNode) pSpecNode.getChild(0); int type = firstChild.getType(); - if ( type == HiveParser.TOK_DISTRIBUTEBY || type == HiveParser.TOK_CLUSTERBY ) - { + if (type == HiveParser.TOK_DISTRIBUTEBY || type == HiveParser.TOK_CLUSTERBY) { PartitionSpec pSpec = processPartitionSpec(firstChild); partitioning.setPartSpec(pSpec); ASTNode sortNode = pSpecNode.getChildCount() > 1 ? (ASTNode) pSpecNode.getChild(1) : null; - if ( sortNode != null ) - { + if (sortNode != null) { OrderSpec oSpec = processOrderSpec(sortNode); partitioning.setOrderSpec(oSpec); } - } - else if ( type == HiveParser.TOK_SORTBY || type == HiveParser.TOK_ORDERBY ) { + } else if (type == HiveParser.TOK_SORTBY || type == HiveParser.TOK_ORDERBY) { OrderSpec oSpec = processOrderSpec(firstChild); partitioning.setOrderSpec(oSpec); } return partitioning; } - private WindowFunctionSpec processWindowFunction(ASTNode node, ASTNode wsNode) - throws SemanticException { + private WindowFunctionSpec processWindowFunction(ASTNode node, ASTNode wsNode) throws SemanticException { WindowFunctionSpec wfSpec = new WindowFunctionSpec(); - switch(node.getType()) { + switch (node.getType()) { case HiveParser.TOK_FUNCTIONSTAR: wfSpec.setStar(true); break; @@ -14390,12 +14322,12 @@ private WindowFunctionSpec processWindowFunction(ASTNode node, ASTNode wsNode) ASTNode nameNode = (ASTNode) node.getChild(0); wfSpec.setName(nameNode.getText()); - for(int i=1; i < node.getChildCount()-1; i++) { + for (int i = 1; i < node.getChildCount() - 1; i++) { ASTNode child = (ASTNode) node.getChild(i); wfSpec.addArg(child); } - if ( wsNode != null ) { + if (wsNode != null) { wfSpec.setWindowSpec(processWindowSpec(wsNode)); } @@ -14407,32 +14339,28 @@ private boolean containsLeadLagUDF(ASTNode expressionTree) { if (exprTokenType == HiveParser.TOK_FUNCTION) { assert (expressionTree.getChildCount() != 0); if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { - String functionName = unescapeIdentifier(expressionTree.getChild(0) - .getText()); + String functionName = unescapeIdentifier(expressionTree.getChild(0).getText()); functionName = functionName.toLowerCase(); - if ( FunctionRegistry.LAG_FUNC_NAME.equals(functionName) || - FunctionRegistry.LEAD_FUNC_NAME.equals(functionName) - ) { + if (FunctionRegistry.LAG_FUNC_NAME.equals(functionName) || FunctionRegistry.LEAD_FUNC_NAME + .equals(functionName)) { return true; } } } for (int i = 0; i < expressionTree.getChildCount(); i++) { - if ( containsLeadLagUDF((ASTNode) expressionTree.getChild(i))) { + if (containsLeadLagUDF((ASTNode) expressionTree.getChild(i))) { return true; } } return false; } - private void processQueryWindowClause(WindowingSpec spec, ASTNode node) - throws SemanticException { + private void processQueryWindowClause(WindowingSpec spec, ASTNode node) throws SemanticException { ASTNode nameNode = (ASTNode) node.getChild(0); ASTNode wsNode = (ASTNode) node.getChild(1); - if(spec.getWindowSpecs() != null && spec.getWindowSpecs().containsKey(nameNode.getText())){ - throw new SemanticException(generateErrorMessage(nameNode, - "Duplicate definition of window " + nameNode.getText() + - " is not allowed")); + if (spec.getWindowSpecs() != null && spec.getWindowSpecs().containsKey(nameNode.getText())) { + throw new SemanticException( + generateErrorMessage(nameNode, "Duplicate definition of window " + nameNode.getText() + " is not allowed")); } WindowSpec ws = processWindowSpec(wsNode); spec.addWindowSpec(nameNode.getText(), ws); @@ -14442,20 +14370,21 @@ private WindowSpec processWindowSpec(ASTNode node) throws SemanticException { boolean hasSrcId = false, hasPartSpec = false, hasWF = false; int srcIdIdx = -1, partIdx = -1, wfIdx = -1; - for(int i=0; i < node.getChildCount(); i++) - { + for (int i = 0; i < node.getChildCount(); i++) { int type = node.getChild(i).getType(); - switch(type) - { + switch (type) { case HiveParser.Identifier: - hasSrcId = true; srcIdIdx = i; + hasSrcId = true; + srcIdIdx = i; break; case HiveParser.TOK_PARTITIONINGSPEC: - hasPartSpec = true; partIdx = i; + hasPartSpec = true; + partIdx = i; break; case HiveParser.TOK_WINDOWRANGE: case HiveParser.TOK_WINDOWVALUES: - hasWF = true; wfIdx = i; + hasWF = true; + wfIdx = i; break; } } @@ -14473,8 +14402,7 @@ private WindowSpec processWindowSpec(ASTNode node) throws SemanticException { ws.setPartitioning(partitioning); } - if ( hasWF) - { + if (hasWF) { ASTNode wfNode = (ASTNode) node.getChild(wfIdx); WindowFrameSpec wfSpec = processWindowFrame(wfNode); ws.setWindowFrame(wfSpec); @@ -14493,20 +14421,19 @@ private WindowFrameSpec processWindowFrame(ASTNode node) throws SemanticExceptio * are specified. */ BoundarySpec start = processBoundary((ASTNode) node.getChild(0)); - if ( node.getChildCount() > 1 ) { + if (node.getChildCount() > 1) { end = processBoundary((ASTNode) node.getChild(1)); } // Note: TOK_WINDOWVALUES means RANGE type, TOK_WINDOWRANGE means ROWS type return new WindowFrameSpec(type == HiveParser.TOK_WINDOWVALUES ? WindowType.RANGE : WindowType.ROWS, start, end); } - private BoundarySpec processBoundary(ASTNode node) throws SemanticException { + private BoundarySpec processBoundary(ASTNode node) throws SemanticException { BoundarySpec bs = new BoundarySpec(); int type = node.getType(); boolean hasAmt = true; - switch(type) - { + switch (type) { case HiveParser.KW_PRECEDING: bs.setDirection(Direction.PRECEDING); break; @@ -14521,17 +14448,13 @@ private BoundarySpec processBoundary(ASTNode node) throws SemanticException { // no-op } - if ( hasAmt ) - { + if (hasAmt) { ASTNode amtNode = (ASTNode) node.getChild(0); - if ( amtNode.getType() == HiveParser.KW_UNBOUNDED) - { + if (amtNode.getType() == HiveParser.KW_UNBOUNDED) { bs.setAmt(BoundarySpec.UNBOUNDED_AMOUNT); - } - else - { + } else { int amt = Integer.parseInt(amtNode.getText()); - if ( amt <= 0 ) { + if (amt <= 0) { throw new SemanticException( "Window Frame Boundary Amount must be a positive integer, provided amount is: " + amt); } @@ -14542,7 +14465,7 @@ private BoundarySpec processBoundary(ASTNode node) throws SemanticException { return bs; } -//--------------------------- PTF handling: PTFInvocationSpec to PTFDesc -------------------------- + //--------------------------- PTF handling: PTFInvocationSpec to PTFDesc -------------------------- private PTFDesc translatePTFInvocationSpec(PTFInvocationSpec ptfQSpec, RowResolver inputRR) throws SemanticException { @@ -14559,7 +14482,6 @@ private Operator genPTFPlan(PTFInvocationSpec ptfQSpec, Operator input) throws S return input; } - /** * Construct the data structures containing ExprNodeDesc for partition * columns and order columns. Use the input definition to construct the list @@ -14628,8 +14550,7 @@ private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operato /* * a. add Map-side PTF Operator if needed */ - if (tabDef.isTransformsRawInput() ) - { + if (tabDef.isTransformsRawInput()) { RowResolver ptfMapRR = tabDef.getRawInputShape().getRr(); ptfDesc.setMapSide(true); @@ -14684,7 +14605,7 @@ private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operato return input; } -//--------------------------- Windowing handling: PTFInvocationSpec to PTFDesc -------------------- + //--------------------------- Windowing handling: PTFInvocationSpec to PTFDesc -------------------- private Operator genWindowingPlan(QB qb, WindowingSpec wSpec, Operator input) throws SemanticException { wSpec.validateAndMakeEffective(); @@ -14706,7 +14627,7 @@ private Operator genWindowingPlan(QB qb, WindowingSpec wSpec, Operator input) th WindowingComponentizer groups = new WindowingComponentizer(wSpec); RowResolver rr = opParseCtx.get(input).getRowResolver(); - while(groups.hasNext() ) { + while (groups.hasNext()) { wSpec = groups.next(conf, this, unparseTranslator, rr); input = genReduceSinkPlanForWindowing(wSpec, rr, input); rr = opParseCtx.get(input).getRowResolver(); @@ -14789,10 +14710,9 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, && selExpr.getChild(1).getType() == HiveParser.Identifier ? (ASTNode) selExpr.getChild(1) : null; String alias = null; - if ( aliasNode != null ) { + if (aliasNode != null) { alias = aliasNode.getText(); - } - else { + } else { String[] tabColAlias = getColAlias(selExpr, null, null, true, -1); alias = tabColAlias[1]; } diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java index 53c5b1de08..fe72449043 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.List; +import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -188,9 +189,11 @@ public int persistColumnStats(Hive db, Table tbl) throws HiveException, MetaExce HiveTxnManager txnMgr = AcidUtils.isTransactionalTable(tbl) ? SessionState.get().getTxnMgr() : null; if (txnMgr != null) { - request.setValidWriteIdList(AcidUtils.getTableValidWriteIdList(conf, - AcidUtils.getFullTableName(tbl.getDbName(), tbl.getTableName())).toString()); request.setWriteId(txnMgr.getAllocatedTableWriteId(tbl.getDbName(), tbl.getTableName())); + ValidWriteIdList writeId = AcidUtils.getTableValidWriteIdList(conf, + AcidUtils.getFullTableName(tbl.getDbName(), tbl.getTableName())); + writeId.locallyCommitWriteId(request.getWriteId()); + request.setValidWriteIdList(writeId.toString()); } db.setPartitionColumnStatistics(request); return 0; diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java index fa15e2876a..a446f4c855 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnAddPartition.java @@ -102,14 +102,14 @@ private void addPartition(boolean isVectorized) throws Exception { String testQuery = isVectorized ? "select ROW__ID, p, a, b from T order by p, ROW__ID" : "select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID"; String[][] expected = new String[][]{ - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", - "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", - "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t0\t2", - "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t0\t4", - "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}}; + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", + "warehouse/t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", + "warehouse/t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t0\t2", + "warehouse/t/p=1/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t0\t4", + "warehouse/t/p=1/delta_0000002_0000002_0000/000000_0"}}; checkResult(expected, testQuery, isVectorized, "add 2 parts w/data and 1 empty", LOG); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/3'"); @@ -126,18 +126,18 @@ private void addPartition(boolean isVectorized) throws Exception { + "PARTITION (p=3) location '" + getWarehouseDir() + "/3/data'";//p=3 doesn't exist runStatementOnDriver(stmt); String[][] expected2 = new String[][]{ - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", - "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", - "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t0\t2", - "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t0\t4", - "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t3\t0\t2", - "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t3\t0\t4", - "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}}; + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", + "warehouse/t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", + "warehouse/t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t0\t2", + "warehouse/t/p=1/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t0\t4", + "warehouse/t/p=1/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t3\t0\t2", + "warehouse/t/p=3/delta_0000004_0000004_0000/000000_0"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t3\t0\t4", + "warehouse/t/p=3/delta_0000004_0000004_0000/000000_0"}}; checkResult(expected2, testQuery, isVectorized, "add 2 existing parts and 1 empty", LOG); } @@ -180,10 +180,10 @@ private void addPartitionMM(boolean isVectorized) throws Exception { String testQuery = isVectorized ? "select p, a, b from T order by p, a, b" : "select p, a, b, INPUT__FILE__NAME from T order by p, a, b"; String[][] expected = new String[][]{ - {"0\t0\t2", "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"0\t0\t4", "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"1\t0\t2", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, - {"1\t0\t4", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}}; + {"0\t0\t2", "warehouse/t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"0\t0\t4", "warehouse/t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"1\t0\t2", "warehouse/t/p=1/delta_0000002_0000002_0000/000000_0"}, + {"1\t0\t4", "warehouse/t/p=1/delta_0000002_0000002_0000/000000_0"}}; checkResult(expected, testQuery, isVectorized, "add 2 parts w/data and 1 empty", LOG); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/3'"); @@ -199,12 +199,12 @@ private void addPartitionMM(boolean isVectorized) throws Exception { + "PARTITION (p=2) location '" + getWarehouseDir() + "/3/data'"//p=2 exists and is empty + "PARTITION (p=3) location '" + getWarehouseDir() + "/3/data'");//p=3 doesn't exist String[][] expected2 = new String[][]{ - {"0\t0\t2", "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"0\t0\t4", "warehouse/t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"1\t0\t2", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, - {"1\t0\t4", "warehouse/t/p=1/delta_0000001_0000001_0000/000000_0"}, - {"3\t0\t2", "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}, - {"3\t0\t4", "warehouse/t/p=3/delta_0000003_0000003_0000/000000_0"}}; + {"0\t0\t2", "warehouse/t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"0\t0\t4", "warehouse/t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"1\t0\t2", "warehouse/t/p=1/delta_0000002_0000002_0000/000000_0"}, + {"1\t0\t4", "warehouse/t/p=1/delta_0000002_0000002_0000/000000_0"}, + {"3\t0\t2", "warehouse/t/p=3/delta_0000004_0000004_0000/000000_0"}, + {"3\t0\t4", "warehouse/t/p=3/delta_0000004_0000004_0000/000000_0"}}; checkResult(expected2, testQuery, isVectorized, "add 2 existing parts and 1 empty", LOG); } @@ -226,10 +226,10 @@ public void addPartitionBucketed() throws Exception { List rs = runStatementOnDriver( "select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID"); String[][] expected = new String[][]{ - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t0\t0\t2", - "warehouse/t/p=0/delta_0000001_0000001_0000/000001_0"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t0\t1\t4", - "warehouse/t/p=0/delta_0000001_0000001_0000/000001_0"}}; + {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t0\t0\t2", + "warehouse/t/p=0/delta_0000002_0000002_0000/000001_0"}, + {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":1}\t0\t1\t4", + "warehouse/t/p=0/delta_0000002_0000002_0000/000001_0"}}; checkExpected(rs, expected, "add partition (p=0)"); } @@ -263,10 +263,10 @@ public void addPartitionRename() throws Exception { List rs = runStatementOnDriver( "select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID"); String[][] expected = new String[][]{ - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t0\t0\t2", - "warehouse/t/p=0/delta_0000001_0000001_0000/000001_0"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t0\t1\t4", - "warehouse/t/p=0/delta_0000001_0000001_0000/000001_0"}}; + {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t0\t0\t2", + "warehouse/t/p=0/delta_0000002_0000002_0000/000001_0"}, + {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":1}\t0\t1\t4", + "warehouse/t/p=0/delta_0000002_0000002_0000/000001_0"}}; checkExpected(rs, expected, "add partition (p=0)"); } @@ -294,6 +294,6 @@ public void addPartitionTransactional() throws Exception { runStatementOnDriver("insert into Tstage partition(p=1) values(0,2),(1,4)"); runStatementOnDriver("ALTER TABLE T ADD PARTITION (p=0) location '" - + getWarehouseDir() + "/tstage/p=1/delta_0000001_0000001_0000/bucket_00001_0'"); + + getWarehouseDir() + "/tstage/p=1/delta_0000002_0000002_0000/bucket_00001_0'"); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 337f469d1a..2759403f5a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -1238,8 +1238,8 @@ private void writeBetweenWorkerAndCleanerForVariousTblProperties(String tblPrope FileStatus[] status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + tblName.toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER); Set expectedDeltas = new HashSet<>(); - expectedDeltas.add("delete_delta_0000001_0000002_v0000019"); - expectedDeltas.add("delta_0000001_0000002_v0000019"); + expectedDeltas.add("delete_delta_0000002_0000003_v0000019"); + expectedDeltas.add("delta_0000002_0000003_v0000019"); Set actualDeltas = new HashSet<>(); for(FileStatus file : status) { actualDeltas.add(file.getPath().getName()); @@ -1463,7 +1463,7 @@ public void testMultiInsertStatement() throws Exception { List r = runStatementOnDriver("select a,b from " + Table.ACIDTBLPART + " order by a,b"); int[][] targetVals = {{2,1},{4,3},{5,6},{7,8}}; Assert.assertEquals(stringifyValues(targetVals), r); - //currently multi-insrt doesn't allow same table/partition in > 1 output branch + //currently multi-insert doesn't allow same table/partition in > 1 output branch String s = "from " + Table.ACIDTBLPART + " target right outer join " + Table.NONACIDPART2 + " source on target.a = source.a2 " + " INSERT INTO TABLE " + Table.ACIDTBLPART + " PARTITION(p='even') select source.a2, source.b2 where source.a2=target.a " + diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java index e1f669ac3c..9e1f251f2a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -100,9 +100,9 @@ public void testRenameTable() throws Exception { "select count(*) from COMPACTION_QUEUE where CQ_TABLE='s'")); Assert.assertEquals(1, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from WRITE_SET where WS_TABLE='s'")); - Assert.assertEquals(3, TxnDbUtil.countQueryAgent(hiveConf, + Assert.assertEquals(5, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXN_TO_WRITE_ID where T2W_TABLE='s'")); - Assert.assertEquals(1, TxnDbUtil.countQueryAgent(hiveConf, + Assert.assertEquals(2, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from NEXT_WRITE_ID where NWI_TABLE='s'")); runStatementOnDriver("alter table mydb1.S RENAME TO mydb2.bar"); @@ -115,9 +115,9 @@ public void testRenameTable() throws Exception { "select count(*) from COMPACTION_QUEUE where CQ_TABLE='bar'")); Assert.assertEquals(1, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from WRITE_SET where WS_TABLE='bar'")); - Assert.assertEquals(4, TxnDbUtil.countQueryAgent(hiveConf, + Assert.assertEquals(7, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from TXN_TO_WRITE_ID where T2W_TABLE='bar'")); - Assert.assertEquals(1, TxnDbUtil.countQueryAgent(hiveConf, + Assert.assertEquals(3, TxnDbUtil.countQueryAgent(hiveConf, "select count(*) from NEXT_WRITE_ID where NWI_TABLE='bar'")); } @@ -165,10 +165,10 @@ private void testDeleteEventPruning() throws Exception { "select ROW__ID, a, b from T order by a, b" : "select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b"; String[][] expected = new String[][]{ - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t4\t5", - "warehouse/t/delta_0000001_0000001_0000/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t4\t6", - "warehouse/t/delta_0000002_0000002_0000/bucket_00000"}}; + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t4\t5", + "warehouse/t/delta_0000002_0000002_0000/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t4\t6", + "warehouse/t/delta_0000003_0000003_0000/bucket_00000"}}; checkResult(expected, testQuery, isVectorized, "after delete", LOG); runStatementOnDriver("alter table T compact 'MAJOR'"); @@ -183,9 +183,9 @@ private void testDeleteEventPruning() throws Exception { .startsWith("job_local")); String[][] expected2 = new String[][]{ - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t4\t5", + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t4\t5", "warehouse/t/base_0000001/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t4\t6", + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t4\t6", "warehouse/t/base_0000002/bucket_00000"}}; checkResult(expected2, testQuery, isVectorized, "after compaction", LOG); } @@ -271,28 +271,28 @@ private void testSdpoBucketed(boolean isVectorized, boolean isSdpo, int bucketin "select ROW__ID, a, b, ds from acid_uap order by ds, a, b" : "select ROW__ID, a, b, ds, INPUT__FILE__NAME from acid_uap order by ds, a, b"; String[][] expected = new String[][]{ - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\tbah\ttoday", + {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\tbah\ttoday", "warehouse/acid_uap/ds=today/delta_0000002_0000002_0000/bucket_00001_0"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t2\tyah\ttoday", + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\tyah\ttoday", "warehouse/acid_uap/ds=today/delta_0000002_0000002_0000/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\tbah\ttomorrow", + {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\tbah\ttomorrow", "warehouse/acid_uap/ds=tomorrow/delta_0000001_0000001_0000/bucket_00001_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t2\tyah\ttomorrow", + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t2\tyah\ttomorrow", "warehouse/acid_uap/ds=tomorrow/delta_0000001_0000001_0000/bucket_00000_0"}}; checkResult(expected, testQuery, isVectorized, "after insert", LOG); runStatementOnDriver("update acid_uap set b = 'fred'"); String[][] expected2 = new String[][]{ - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\tfred\ttoday", + {"{\"writeid\":4,\"bucketid\":536936448,\"rowid\":0}\t1\tfred\ttoday", "warehouse/acid_uap/ds=today/delta_0000003_0000003_0000/bucket_00001"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\tfred\ttoday", + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t2\tfred\ttoday", "warehouse/acid_uap/ds=today/delta_0000003_0000003_0000/bucket_00000"}, - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\tfred\ttomorrow", + {"{\"writeid\":4,\"bucketid\":536936448,\"rowid\":0}\t1\tfred\ttomorrow", "warehouse/acid_uap/ds=tomorrow/delta_0000003_0000003_0000/bucket_00001"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\tfred\ttomorrow", + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t2\tfred\ttomorrow", "warehouse/acid_uap/ds=tomorrow/delta_0000003_0000003_0000/bucket_00000"}}; checkResult(expected2, testQuery, isVectorized, "after update", LOG); } @@ -350,9 +350,9 @@ public void testCleaner2() throws Exception { FileUtils.HIDDEN_FILES_PATH_FILTER); String[] expectedList = new String[] { - "/t/delta_0000001_0000002_v0000019", - "/t/delta_0000001_0000001_0000", + "/t/delta_0000002_0000003_v0000019", "/t/delta_0000002_0000002_0000", + "/t/delta_0000003_0000003_0000", }; checkExpectedFiles(actualList, expectedList, warehousePath.toString()); @@ -383,7 +383,7 @@ so cleaner removes all files shadowed by it (which is everything in this case) runCleaner(hiveConf); expectedList = new String[] { - "/t/delta_0000001_0000003_v0000022" + "/t/delta_0000002_0000004_v0000022" }; actualList = fs.listStatus(new Path(warehousePath + "/t"), FileUtils.HIDDEN_FILES_PATH_FILTER); diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java index 63944293ce..7fc62c0195 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnConcatenate.java @@ -56,14 +56,14 @@ public void testConcatenate() throws Exception { runStatementOnDriver("insert into " + Table.ACIDTBL + " values(5,6),(8,8)"); String testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from " + Table.ACIDTBL + " order by a, b"; String[][] expected = new String[][] { - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\t4", - "acidtbl/delta_0000002_0000002_0000/bucket_00001"}, - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":1}\t4\t4", - "acidtbl/delta_0000002_0000002_0000/bucket_00001"}, - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t5\t6", - "acidtbl/delta_0000003_0000003_0000/bucket_00001_0"}, - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":1}\t8\t8", - "acidtbl/delta_0000003_0000003_0000/bucket_00001_0"}}; + {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\t4", + "acidtbl/delta_0000003_0000003_0000/bucket_00001"}, + {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":1}\t4\t4", + "acidtbl/delta_0000003_0000003_0000/bucket_00001"}, + {"{\"writeid\":4,\"bucketid\":536936448,\"rowid\":0}\t5\t6", + "acidtbl/delta_0000004_0000004_0000/bucket_00001_0"}, + {"{\"writeid\":4,\"bucketid\":536936448,\"rowid\":1}\t8\t8", + "acidtbl/delta_0000004_0000004_0000/bucket_00001_0"}}; checkResult(expected, testQuery, false, "check data", LOG); /*in UTs, there is no standalone HMS running to kick off compaction so it's done via runWorker() @@ -80,14 +80,14 @@ public void testConcatenate() throws Exception { Assert.assertEquals(1, rsp.getCompactsSize()); Assert.assertEquals(TxnStore.CLEANING_RESPONSE, rsp.getCompacts().get(0).getState()); String[][] expected2 = new String[][] { - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\t4", - "acidtbl/base_0000003_v0000019/bucket_00001"}, - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":1}\t4\t4", - "acidtbl/base_0000003_v0000019/bucket_00001"}, - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t5\t6", - "acidtbl/base_0000003_v0000019/bucket_00001"}, - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":1}\t8\t8", - "acidtbl/base_0000003_v0000019/bucket_00001"}}; + {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\t4", + "acidtbl/base_0000004_v0000019/bucket_00001"}, + {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":1}\t4\t4", + "acidtbl/base_0000004_v0000019/bucket_00001"}, + {"{\"writeid\":4,\"bucketid\":536936448,\"rowid\":0}\t5\t6", + "acidtbl/base_0000004_v0000019/bucket_00001"}, + {"{\"writeid\":4,\"bucketid\":536936448,\"rowid\":1}\t8\t8", + "acidtbl/base_0000004_v0000019/bucket_00001"}}; checkResult(expected2, testQuery, false, "check data after concatenate", LOG); } @Test @@ -97,14 +97,14 @@ public void testConcatenatePart() throws Exception { runStatementOnDriver("insert into " + Table.ACIDTBLPART + " values(5,6,'p1'),(8,8,'p2')"); String testQuery = "select ROW__ID, a, b, INPUT__FILE__NAME from " + Table.ACIDTBLPART + " order by a, b"; String[][] expected = new String[][] { - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\t4", - "acidtblpart/p=p1/delta_0000002_0000002_0000/bucket_00001"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t4\t5", - "acidtblpart/p=p2/delta_0000001_0000001_0000/bucket_00001_0"}, - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t5\t6", - "acidtblpart/p=p1/delta_0000003_0000003_0000/bucket_00001_0"}, - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t8\t8", - "acidtblpart/p=p2/delta_0000003_0000003_0000/bucket_00001_0"}}; + {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\t4", + "acidtblpart/p=p1/delta_0000003_0000003_0000/bucket_00001"}, + {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t4\t5", + "acidtblpart/p=p2/delta_0000002_0000002_0000/bucket_00001_0"}, + {"{\"writeid\":4,\"bucketid\":536936448,\"rowid\":0}\t5\t6", + "acidtblpart/p=p1/delta_0000004_0000004_0000/bucket_00001_0"}, + {"{\"writeid\":4,\"bucketid\":536936448,\"rowid\":0}\t8\t8", + "acidtblpart/p=p2/delta_0000004_0000004_0000/bucket_00001_0"}}; checkResult(expected, testQuery, false, "check data", LOG); /*in UTs, there is no standalone HMS running to kick off compaction so it's done via runWorker() @@ -121,14 +121,14 @@ public void testConcatenatePart() throws Exception { Assert.assertEquals(1, rsp.getCompactsSize()); Assert.assertEquals(TxnStore.CLEANING_RESPONSE, rsp.getCompacts().get(0).getState()); String[][] expected2 = new String[][] { - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\t4", - "acidtblpart/p=p1/base_0000003_v0000019/bucket_00001"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t4\t5", - "acidtblpart/p=p2/delta_0000001_0000001_0000/bucket_00001_0"}, - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t5\t6", - "acidtblpart/p=p1/base_0000003_v0000019/bucket_00001"}, - {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t8\t8", - "acidtblpart/p=p2/delta_0000003_0000003_0000/bucket_00001_0"}}; + {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\t4", + "acidtblpart/p=p1/base_0000004_v0000019/bucket_00001"}, + {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t4\t5", + "acidtblpart/p=p2/delta_0000002_0000002_0000/bucket_00001_0"}, + {"{\"writeid\":4,\"bucketid\":536936448,\"rowid\":0}\t5\t6", + "acidtblpart/p=p1/base_0000004_v0000019/bucket_00001"}, + {"{\"writeid\":4,\"bucketid\":536936448,\"rowid\":0}\t8\t8", + "acidtblpart/p=p2/delta_0000004_0000004_0000/bucket_00001_0"}}; checkResult(expected2, testQuery, false, "check data after concatenate", LOG); } @@ -142,10 +142,10 @@ public void testConcatenateMM() throws Exception { runStatementOnDriver("insert into T values(5,6),(8,8)"); String testQuery = "select a, b, INPUT__FILE__NAME from T order by a, b"; String[][] expected = new String[][] { - {"1\t2", "t/delta_0000001_0000001_0000/000000_0"}, - {"4\t5", "t/delta_0000001_0000001_0000/000000_0"}, - {"5\t6", "t/delta_0000002_0000002_0000/000000_0"}, - {"8\t8", "t/delta_0000002_0000002_0000/000000_0"}}; + {"1\t2", "t/delta_0000002_0000002_0000/000000_0"}, + {"4\t5", "t/delta_0000002_0000002_0000/000000_0"}, + {"5\t6", "t/delta_0000003_0000003_0000/000000_0"}, + {"8\t8", "t/delta_0000003_0000003_0000/000000_0"}}; checkResult(expected, testQuery, false, "check data", LOG); /*in UTs, there is no standalone HMS running to kick off compaction so it's done via runWorker() @@ -162,10 +162,10 @@ public void testConcatenateMM() throws Exception { Assert.assertEquals(1, rsp.getCompactsSize()); Assert.assertEquals(TxnStore.CLEANING_RESPONSE, rsp.getCompacts().get(0).getState()); String[][] expected2 = new String[][] { - {"1\t2", "t/base_0000002_v0000020/000000_0"}, - {"4\t5", "t/base_0000002_v0000020/000000_0"}, - {"5\t6", "t/base_0000002_v0000020/000000_0"}, - {"8\t8", "t/base_0000002_v0000020/000000_0"}}; + {"1\t2", "t/base_0000003_v0000020/000000_0"}, + {"4\t5", "t/base_0000003_v0000020/000000_0"}, + {"5\t6", "t/base_0000003_v0000020/000000_0"}, + {"8\t8", "t/base_0000003_v0000020/000000_0"}}; checkResult(expected2, testQuery, false, "check data after concatenate", LOG); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java index ba5341778c..448a5a7fcf 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnExIm.java @@ -323,34 +323,67 @@ private void testImport(boolean isVectorized, boolean existingTarget) throws Exc String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" : "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; - String[][] expected = new String[][] { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", - "t/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", - "t/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t5\t6", - "t/delta_0000001_0000001_0000/000000_0"}}; + String[][] expected; + if (existingTarget) { + expected = new String[][] { + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", + "t/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", + "t/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":2}\t5\t6", + "t/delta_0000002_0000002_0000/000000_0"}}; + } else { + expected = new String[][] { + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", + "t/delta_0000001_0000001_0000/000000_0"}, + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", + "t/delta_0000001_0000001_0000/000000_0"}, + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t5\t6", + "t/delta_0000001_0000001_0000/000000_0"}}; + } checkResult(expected, testQuery, isVectorized, "import existing table"); runStatementOnDriver("update T set a = 0 where b = 6"); - String[][] expected2 = new String[][] { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", - "t/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", - "t/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t6", - "t/delta_0000002_0000002_0000/bucket_00000"}}; + String[][] expected2; + if (existingTarget) { + expected2 = new String[][] { + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", + "t/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", + "t/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t6", + "t/delta_0000003_0000003_0000/bucket_00000"}}; + } else { + expected2 = new String[][] { + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", + "t/delta_0000001_0000001_0000/000000_0"}, + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", + "t/delta_0000001_0000001_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t6", + "t/delta_0000002_0000002_0000/bucket_00000"}}; + } checkResult(expected2, testQuery, isVectorized, "update imported table"); runStatementOnDriver("alter table T compact 'minor'"); TestTxnCommands2.runWorker(hiveConf); - String[][] expected3 = new String[][] { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", - ".*t/delta_0000001_0000002_v000002[5-6]/bucket_00000"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", - ".*t/delta_0000001_0000002_v000002[5-6]/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t6", - ".*t/delta_0000001_0000002_v000002[5-6]/bucket_00000"}}; + String[][] expected3; + if (existingTarget) { + expected3 = new String[][] { + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", + ".*t/delta_0000002_0000003_v000002[5-6]/bucket_00000"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", + ".*t/delta_0000002_0000003_v000002[5-6]/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t6", + ".*t/delta_0000002_0000003_v000002[5-6]/bucket_00000"}}; + } else { + expected3 = new String[][] { + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", + ".*t/delta_0000001_0000002_v000002[5-6]/bucket_00000"}, + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", + ".*t/delta_0000001_0000002_v000002[5-6]/bucket_00000"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t6", + ".*t/delta_0000001_0000002_v000002[5-6]/bucket_00000"}}; + } checkResult(expected3, testQuery, isVectorized, "minor compact imported table"); } @@ -529,7 +562,11 @@ private void testMM(boolean existingTable, boolean isSourceMM) throws Exception rs = runStatementOnDriver("select INPUT__FILE__NAME from T order by INPUT__FILE__NAME"); Assert.assertEquals(3, rs.size()); for (String s : rs) { - Assert.assertTrue(s, s.contains("/delta_0000001_0000001_0000/")); + if (existingTable) { + Assert.assertTrue(s, s.contains("/delta_0000002_0000002_0000/")); + } else { + Assert.assertTrue(s, s.contains("/delta_0000001_0000001_0000/")); + } Assert.assertTrue(s, s.endsWith("/000000_0")); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java index 125c76ab5a..ad8ceb7cbd 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java @@ -112,13 +112,13 @@ private void loadDataUpdate(boolean isVectorized) throws Exception { String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" : "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; String[][] expected = new String[][]{ - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}}; + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000002_0000002_0000/000000_0"}}; checkResult(expected, testQuery, isVectorized, "load data inpath"); runStatementOnDriver("update T set b = 17 where a = 1"); String[][] expected2 = new String[][]{ - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/delta_0000002_0000002_0000/bucket_00000"} + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/delta_0000003_0000003_0000/bucket_00000"} }; checkResult(expected2, testQuery, isVectorized, "update"); @@ -210,10 +210,10 @@ private void loadData(boolean isVectorized) throws Exception { runStatementOnDriver("alter table T compact 'minor'"); TestTxnCommands2.runWorker(hiveConf); String[][] expected1 = new String[][] { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/delta_0000001_0000002_v0000025/bucket_00000"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/delta_0000001_0000002_v0000025/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000002_v0000025/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000002_v0000025/bucket_00000"} + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/delta_0000001_0000002_v0000025/bucket_00000"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/delta_0000001_0000002_v0000025/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000002_v0000025/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000002_v0000025/bucket_00000"} }; checkResult(expected1, testQuery, isVectorized, "load data inpath (minor)"); @@ -222,11 +222,11 @@ private void loadData(boolean isVectorized) throws Exception { runStatementOnDriver("alter table T compact 'major'"); TestTxnCommands2.runWorker(hiveConf); String[][] expected2 = new String[][] { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/base_0000003_v0000030/bucket_00000"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/base_0000003_v0000030/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/base_0000003_v0000030/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/base_0000003_v0000030/bucket_00000"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/base_0000003_v0000030/bucket_00000"} + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/base_0000004_v0000030/bucket_00000"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/base_0000004_v0000030/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/base_0000004_v0000030/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/base_0000004_v0000030/bucket_00000"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/base_0000004_v0000030/bucket_00000"} }; checkResult(expected2, testQuery, isVectorized, "load data inpath (major)"); @@ -235,8 +235,8 @@ private void loadData(boolean isVectorized) throws Exception { runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/2'"); runStatementOnDriver("load data inpath '" + getWarehouseDir() + "/2/data' overwrite into table T"); String[][] expected3 = new String[][] { - {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000004/000000_0"}, - {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000004/000000_0"}}; + {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000005/000000_0"}, + {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000005/000000_0"}}; checkResult(expected3, testQuery, isVectorized, "load data inpath overwrite"); //one more major compaction @@ -244,9 +244,9 @@ private void loadData(boolean isVectorized) throws Exception { runStatementOnDriver("alter table T compact 'major'"); TestTxnCommands2.runWorker(hiveConf); String[][] expected4 = new String[][] { - {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000005_v0000040/bucket_00000"}, - {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000005_v0000040/bucket_00000"}, - {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t6\t6", "t/base_0000005_v0000040/bucket_00000"}}; + {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000006_v0000040/bucket_00000"}, + {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000006_v0000040/bucket_00000"}, + {"{\"writeid\":6,\"bucketid\":536870912,\"rowid\":0}\t6\t6", "t/base_0000006_v0000040/bucket_00000"}}; checkResult(expected4, testQuery, isVectorized, "load data inpath overwrite (major)"); } /** @@ -362,12 +362,12 @@ public void loadDataPartitioned() throws Exception { List rs = runStatementOnDriver("select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID"); String[][] expected = new String[][] { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", "t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t2", "t/p=1/delta_0000002_0000002_0000/000000_0"}, - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4", "t/p=1/delta_0000002_0000002_0000/000000_0"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t2", "t/p=1/delta_0000003_0000003_0000/000000_0"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4", "t/p=1/delta_0000003_0000003_0000/000000_0"}}; + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", "t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t2", "t/p=1/delta_0000003_0000003_0000/000000_0"}, + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4", "t/p=1/delta_0000003_0000003_0000/000000_0"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t2", "t/p=1/delta_0000004_0000004_0000/000000_0"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4", "t/p=1/delta_0000004_0000004_0000/000000_0"}}; checkExpected(rs, expected, "load data inpath partitioned"); @@ -376,10 +376,10 @@ public void loadDataPartitioned() throws Exception { runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data inpath '" + getWarehouseDir() + "/4/data' overwrite into table T partition(p=1)"); String[][] expected2 = new String[][] { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", "t/p=0/delta_0000001_0000001_0000/000000_0"}, - {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t1\t5\t2", "t/p=1/base_0000004/000000_0"}, - {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t1\t5\t4", "t/p=1/base_0000004/000000_0"}}; + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", "t/p=0/delta_0000002_0000002_0000/000000_0"}, + {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t1\t5\t2", "t/p=1/base_0000005/000000_0"}, + {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":1}\t1\t5\t4", "t/p=1/base_0000005/000000_0"}}; rs = runStatementOnDriver("select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID"); checkExpected(rs, expected2, "load data inpath partitioned overwrite"); } diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index 88d5d042ee..eb603fd581 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -96,14 +96,14 @@ public void testNoBuckets() throws Exception { /**the insert creates 2 output files (presumably because there are 2 input files) * The number in the file name is writerId. This is the number encoded in ROW__ID.bucketId - * see {@link org.apache.hadoop.hive.ql.io.BucketCodec}*/ - Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t0\t")); - Assert.assertTrue(rs.get(0), rs.get(0).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000001_0000001_0000/bucket_00000_0")); - Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t")); - Assert.assertTrue(rs.get(1), rs.get(1).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000001_0000001_0000/bucket_00000_0")); - Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t1\t")); - Assert.assertTrue(rs.get(2), rs.get(2).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000001_0000001_0000/bucket_00001_0")); - Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t")); - Assert.assertTrue(rs.get(3), rs.get(3).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000001_0000001_0000/bucket_00001_0")); + Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t0\t")); + Assert.assertTrue(rs.get(0), rs.get(0).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00000_0")); + Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t")); + Assert.assertTrue(rs.get(1), rs.get(1).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00000_0")); + Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t1\t")); + Assert.assertTrue(rs.get(2), rs.get(2).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00001_0")); + Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":2,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t")); + Assert.assertTrue(rs.get(3), rs.get(3).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00001_0")); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_EXPLAIN_USER, false); rs = runStatementOnDriver(String.format("explain update %s set c3 = 17 where c3 in(0,1)", NO_BUCKETS_TBL_NAME)); @@ -120,25 +120,25 @@ public void testNoBuckets() throws Exception { for(String s : rs) { LOG.warn(s); } - Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t")); - Assert.assertTrue(rs.get(0), rs.get(0).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000001_0000001_0000/bucket_00000_0")); - Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t")); - Assert.assertTrue(rs.get(1), rs.get(1).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000001_0000001_0000/bucket_00001_0")); + Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t")); + Assert.assertTrue(rs.get(0), rs.get(0).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00000_0")); + Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t")); + Assert.assertTrue(rs.get(1), rs.get(1).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00001_0")); //so update has 1 writer, but which creates buckets where the new rows land - Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17\t")); - Assert.assertTrue(rs.get(2), rs.get(2).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00000")); + Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17\t")); + Assert.assertTrue(rs.get(2), rs.get(2).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000003_0000003_0000/bucket_00000")); // update for "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t1\t" - Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t17\t")); - Assert.assertTrue(rs.get(3), rs.get(3).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00001")); + Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t17\t")); + Assert.assertTrue(rs.get(3), rs.get(3).endsWith(NO_BUCKETS_TBL_NAME + "/delta_0000003_0000003_0000/bucket_00001")); Set expectedFiles = new HashSet<>(); //both delete events land in corresponding buckets to the original row-ids - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delete_delta_0000002_0000002_0000/bucket_00000"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delete_delta_0000002_0000002_0000/bucket_00001"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000001_0000001_0000/bucket_00000_0"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000001_0000001_0000/bucket_00001_0"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00000"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00001"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delete_delta_0000003_0000003_0000/bucket_00000"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delete_delta_0000003_0000003_0000/bucket_00001"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00000_0"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00001_0"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000003_0000003_0000/bucket_00000"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000003_0000003_0000/bucket_00001"); //check that we get the right files on disk assertExpectedFileSet(expectedFiles, getWarehouseDir() + "/" + NO_BUCKETS_TBL_NAME, NO_BUCKETS_TBL_NAME); //todo: it would be nice to check the contents of the files... could use orc.FileDump - it has @@ -168,10 +168,10 @@ public void testNoBuckets() throws Exception { */ String expected[][] = { - {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17", NO_BUCKETS_TBL_NAME + "/base_0000002_v0000025/bucket_00000"}, - {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t17", NO_BUCKETS_TBL_NAME + "/base_0000002_v0000025/bucket_00001"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2", NO_BUCKETS_TBL_NAME + "/base_0000002_v0000025/bucket_00001"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3", NO_BUCKETS_TBL_NAME + "/base_0000002_v0000025/bucket_00000"} + {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17", NO_BUCKETS_TBL_NAME + "/base_0000003_v0000025/bucket_00000"}, + {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t17", NO_BUCKETS_TBL_NAME + "/base_0000003_v0000025/bucket_00001"}, + {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2", NO_BUCKETS_TBL_NAME + "/base_0000003_v0000025/bucket_00001"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3", NO_BUCKETS_TBL_NAME + "/base_0000003_v0000025/bucket_00000"} }; checkResult(expected, "select ROW__ID, c1, c2, c3" + (shouldVectorize() ? "" : ", INPUT__FILE__NAME") @@ -180,14 +180,14 @@ public void testNoBuckets() throws Exception { "After Major Compaction", LOG); expectedFiles.clear(); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delete_delta_0000002_0000002_0000/bucket_00000"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delete_delta_0000002_0000002_0000/bucket_00001"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000001_0000001_0000/bucket_00000_0"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000001_0000001_0000/bucket_00001_0"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00000"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00001"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/base_0000002_v0000025/bucket_00000"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/base_0000002_v0000025/bucket_00001"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delete_delta_0000003_0000003_0000/bucket_00000"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delete_delta_0000003_0000003_0000/bucket_00001"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00000_0"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000002_0000002_0000/bucket_00001_0"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000003_0000003_0000/bucket_00000"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/delta_0000003_0000003_0000/bucket_00001"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/base_0000003_v0000025/bucket_00000"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/base_0000003_v0000025/bucket_00001"); assertExpectedFileSet(expectedFiles, getWarehouseDir() + "/" + NO_BUCKETS_TBL_NAME, NO_BUCKETS_TBL_NAME); TestTxnCommands2.runCleaner(hiveConf); @@ -196,8 +196,8 @@ public void testNoBuckets() throws Exception { Assert.assertEquals("Unexpected result after clean", stringifyValues(result), rs); expectedFiles.clear(); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/base_0000002_v0000025/bucket_00000"); - expectedFiles.add(NO_BUCKETS_TBL_NAME + "/base_0000002_v0000025/bucket_00001"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/base_0000003_v0000025/bucket_00000"); + expectedFiles.add(NO_BUCKETS_TBL_NAME + "/base_0000003_v0000025/bucket_00001"); assertExpectedFileSet(expectedFiles, getWarehouseDir() + "/" + NO_BUCKETS_TBL_NAME, NO_BUCKETS_TBL_NAME); } @@ -265,8 +265,8 @@ public void testCTAS() throws Exception { "'='true', 'transactional_properties'='default') as select a, b from " + Table.ACIDTBL); rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from myctas2 order by ROW__ID"); String expected2[][] = { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00000"}, - {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t3\t4", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00001"} + {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00000"}, + {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t2", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00001"} }; checkExpected(rs, expected2, "Unexpected row count after ctas from acid table"); @@ -338,11 +338,11 @@ public void testInsertToAcidWithUnionRemove() throws Exception { List rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"); String expected[][] = { - {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":0}\t1\t2", "/delta_0000001_0000001_0001/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":1}\t3\t4", "/delta_0000001_0000001_0001/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536870914,\"rowid\":0}\t5\t6", "/delta_0000001_0000001_0002/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536870915,\"rowid\":0}\t9\t10", "/delta_0000001_0000001_0003/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536936450,\"rowid\":0}\t7\t8", "/delta_0000001_0000001_0002/bucket_00001_0"}, + {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":0}\t1\t2", "/delta_0000002_0000002_0001/bucket_00000_0"}, + {"{\"writeid\":2,\"bucketid\":536870913,\"rowid\":1}\t3\t4", "/delta_0000002_0000002_0001/bucket_00000_0"}, + {"{\"writeid\":2,\"bucketid\":536870914,\"rowid\":0}\t5\t6", "/delta_0000002_0000002_0002/bucket_00000_0"}, + {"{\"writeid\":2,\"bucketid\":536870915,\"rowid\":0}\t9\t10", "/delta_0000002_0000002_0003/bucket_00000_0"}, + {"{\"writeid\":2,\"bucketid\":536936450,\"rowid\":0}\t7\t8", "/delta_0000002_0000002_0002/bucket_00001_0"}, }; checkExpected(rs, expected, "Unexpected row count after ctas"); } @@ -798,14 +798,14 @@ public void testCompactStatsGather() throws Exception { String query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, q, a, b"; List rs = runStatementOnDriver(query); String[][] expected = { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000_0"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000003_0000003_0000/bucket_00000_0"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000003_0000003_0000/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/delta_0000001_0000001_0000/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/delta_0000001_0000001_0000/bucket_00000_0"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/delta_0000003_0000003_0000/bucket_00000_0"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/delta_0000003_0000003_0000/bucket_00000_0"} + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000_0"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000004_0000004_0000/bucket_00000_0"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000004_0000004_0000/bucket_00000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/delta_0000002_0000002_0000/bucket_00000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/delta_0000002_0000002_0000/bucket_00000_0"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/delta_0000004_0000004_0000/bucket_00000_0"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/delta_0000004_0000004_0000/bucket_00000_0"} }; checkExpected(rs, expected, "insert data"); @@ -816,14 +816,14 @@ public void testCompactStatsGather() throws Exception { query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, q, a, b"; rs = runStatementOnDriver(query); String[][] expected2 = { - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000_0"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000003_0000003_0000/bucket_00000_0"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000003_0000003_0000/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/base_0000003_v0000020/bucket_00000"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/base_0000003_v0000020/bucket_00000"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/base_0000003_v0000020/bucket_00000"}, - {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/base_0000003_v0000020/bucket_00000"} + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000_0"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000004_0000004_0000/bucket_00000_0"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000004_0000004_0000/bucket_00000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/base_0000004_v0000020/bucket_00000"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/base_0000004_v0000020/bucket_00000"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/base_0000004_v0000020/bucket_00000"}, + {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/base_0000004_v0000020/bucket_00000"} }; checkExpected(rs, expected2, "after major compaction"); @@ -848,8 +848,8 @@ public void testDefault() throws Exception { List rs = runStatementOnDriver(query); String[][] expected = { //this proves data is written in Acid layout so T was made Acid - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000_0"}, - {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000_0"} + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000002_0000002_0000/bucket_00000_0"}, + {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000002_0000002_0000/bucket_00000_0"} }; checkExpected(rs, expected, "insert data"); } diff --git ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java index 8a15b7cc5d..242fd3c251 100644 --- ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java +++ ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java @@ -2733,7 +2733,7 @@ public void testValidWriteIdListSnapshot() throws Exception { // Open a base txn which allocates write ID and then committed. long baseTxnId = txnMgr.openTxn(ctx, "u0"); long baseWriteId = txnMgr.getTableWriteId("temp", "T7"); - Assert.assertEquals(1, baseWriteId); + Assert.assertEquals(2, baseWriteId); txnMgr.commitTxn(); // committed baseTxnId // Open a txn with no writes. @@ -2758,11 +2758,11 @@ public void testValidWriteIdListSnapshot() throws Exception { long aboveHwmOpenTxnId = txnMgr3.openTxn(ctx, "u3"); Assert.assertTrue("Invalid txn ID", aboveHwmOpenTxnId > testTxnId); long aboveHwmOpenWriteId = txnMgr3.getTableWriteId("temp", "T7"); - Assert.assertEquals(2, aboveHwmOpenWriteId); + Assert.assertEquals(3, aboveHwmOpenWriteId); // Allocate writeId to txn under HWM. This will get Id greater than a txn > HWM. long underHwmOpenWriteId = txnMgr1.getTableWriteId("temp", "T7"); - Assert.assertEquals(3, underHwmOpenWriteId); + Assert.assertEquals(4, underHwmOpenWriteId); // Verify the ValidWriteIdList with one open txn on this table. Write ID of open txn should be invalid. testValidWriteIds = txnMgr2.getValidWriteIds(Collections.singletonList("temp.t7"), testValidTxns) @@ -2786,7 +2786,7 @@ public void testValidWriteIdListSnapshot() throws Exception { // Write Ids of committed and self test txn should be valid but writeId of open txn should be invalid. // WriteId of recently committed txn which was open when get ValidTxnList snapshot should be invalid as well. long testWriteId = txnMgr2.getTableWriteId("temp", "T7"); - Assert.assertEquals(4, testWriteId); + Assert.assertEquals(5, testWriteId); testValidWriteIds = txnMgr2.getValidWriteIds(Collections.singletonList("temp.t7"), testValidTxns) .getTableValidWriteIdList("temp.t7"); diff --git ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java index 49097a0585..447bf343fa 100755 --- ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java +++ ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java @@ -182,6 +182,7 @@ public void testTable() throws Throwable { tbl.setSerdeParam(serdeConstants.FIELD_DELIM, "1"); tbl.setSerializationLib(LazySimpleSerDe.class.getName()); tbl.setStoredAsSubDirectories(false); + tbl.setTemporary(false); tbl.setRewriteEnabled(false); diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSQLSchema.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSQLSchema.java index 3615d2b778..bce6f6c048 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSQLSchema.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSQLSchema.java @@ -45,6 +45,7 @@ public static void setUpBeforeClass() throws Exception { conf.set("hive.security.authorization.manager", "org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider"); sessionState = SessionState.start(conf); + sessionState.initTxnMgr(conf); } @AfterClass diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 71af79370d..b2f9a0b89a 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -144,6 +144,8 @@ private static String[] processorCapabilities; private static String processorIdentifier; + private ValidTxnWriteIdList txnWriteIdList; + //copied from ErrorMsg.java private static final String REPL_EVENTS_MISSING_IN_METASTORE = "Notification events are missing in the meta store."; @@ -4259,6 +4261,15 @@ public void scheduledQueryProgress(ScheduledQueryProgressInfo info) throws TExce client.scheduled_query_progress(info); } + @Override + public void setValidWriteIdList(String txnWriteIdListStr) { + this.txnWriteIdList = (txnWriteIdListStr == null ? null : new ValidTxnWriteIdList(txnWriteIdListStr)); + } + + @Override public void clearValidWriteIdList() { + this.txnWriteIdList = null; + } + @Override public ScheduledQueryPollResponse scheduledQueryPoll(ScheduledQueryPollRequest request) throws MetaException, TException { diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index a8b1023b82..164eaf853c 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -4062,4 +4062,8 @@ void createOrDropTriggerToPoolMapping(String resourcePlanName, String triggerNam ReplicationMetricList getReplicationMetrics(GetReplicationMetricsRequest replicationMetricsRequest) throws MetaException, TException; + void setValidWriteIdList(String txnWriteIdList); + + void clearValidWriteIdList(); + } diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java index 218ea44335..45d2486d06 100644 --- standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java +++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java @@ -3792,6 +3792,17 @@ public ReplicationMetricList getReplicationMetrics(GetReplicationMetricsRequest return client.get_replication_metrics(replicationMetricsRequest); } + @Override + public void setValidWriteIdList(String txnWriteIdList) { + throw new UnsupportedOperationException(); + } + + @Override + public void clearValidWriteIdList() { + throw new UnsupportedOperationException(); + + } + @Override public ScheduledQueryPollResponse scheduledQueryPoll(ScheduledQueryPollRequest request) throws MetaException, TException { diff --git storage-api/src/java/org/apache/hadoop/hive/common/ValidReaderWriteIdList.java storage-api/src/java/org/apache/hadoop/hive/common/ValidReaderWriteIdList.java index bc8ac0d61b..c0daa727a2 100644 --- storage-api/src/java/org/apache/hadoop/hive/common/ValidReaderWriteIdList.java +++ storage-api/src/java/org/apache/hadoop/hive/common/ValidReaderWriteIdList.java @@ -260,5 +260,25 @@ public RangeResponse isWriteIdRangeAborted(long minWriteId, long maxWriteId) { public ValidReaderWriteIdList updateHighWatermark(long value) { return new ValidReaderWriteIdList(tableName, exceptions, abortedBits, value, minOpenWriteId); } + + public void locallyCommitWriteId(long writeId) { + if (writeId > highWatermark) { + highWatermark = writeId; + long[] newExceptions = new long[exceptions.length + (int) (writeId - highWatermark)]; + System.arraycopy(exceptions, 0, newExceptions, 0, exceptions.length); + for (long i = highWatermark; i < writeId; i++) { + exceptions[exceptions.length + (int) (i - highWatermark)] = i; + } + exceptions = newExceptions; + } else { + int pos = Arrays.binarySearch(exceptions, writeId); + if (pos >= 0) { + long[] newExceptions = new long[exceptions.length - 1]; + System.arraycopy(exceptions, 0, newExceptions, 0, pos); + System.arraycopy(exceptions, pos + 1, newExceptions, pos, exceptions.length - pos - 1); + exceptions = newExceptions; + } + } + } } diff --git storage-api/src/java/org/apache/hadoop/hive/common/ValidTxnList.java storage-api/src/java/org/apache/hadoop/hive/common/ValidTxnList.java index d4c3b09730..c81da2b3d3 100644 --- storage-api/src/java/org/apache/hadoop/hive/common/ValidTxnList.java +++ storage-api/src/java/org/apache/hadoop/hive/common/ValidTxnList.java @@ -31,6 +31,11 @@ */ public static final String VALID_TXNS_KEY = "hive.txn.valid.txns"; + /** + * Key used to store txn id for compactor in a + * {@link org.apache.hadoop.conf.Configuration} object. + */ + public static final String COMPACTOR_VALID_TXNS_ID_KEY = "hive.compactor.txn.valid.txns.id"; /** * The response to a range query. NONE means no values in this range match, * SOME mean that some do, and ALL means that every value does. diff --git storage-api/src/java/org/apache/hadoop/hive/common/ValidTxnWriteIdList.java storage-api/src/java/org/apache/hadoop/hive/common/ValidTxnWriteIdList.java index cfe01feed0..24cf5e57ef 100644 --- storage-api/src/java/org/apache/hadoop/hive/common/ValidTxnWriteIdList.java +++ storage-api/src/java/org/apache/hadoop/hive/common/ValidTxnWriteIdList.java @@ -32,12 +32,18 @@ */ public static final String VALID_TABLES_WRITEIDS_KEY = "hive.txn.tables.valid.writeids"; + /** + * Key used to store valid write id list for compactor in a + * {@link org.apache.hadoop.conf.Configuration} object. + */ + public static final String COMPACTOR_VALID_TABLES_WRITEIDS_KEY = "hive.compactor.txn.tables.valid.writeids"; + // Transaction for which the list of tables valid write Ids are populated private Long txnId; // Map of valid write ids list for all the tables read by the current txn // Key is full table name string of format . - private Map tablesValidWriteIdList = new HashMap<>(); + final private Map tablesValidWriteIdList = new HashMap<>(); public ValidTxnWriteIdList(Long txnId) { this.txnId = txnId; } diff --git storage-api/src/java/org/apache/hadoop/hive/common/ValidWriteIdList.java storage-api/src/java/org/apache/hadoop/hive/common/ValidWriteIdList.java index b3d64021e6..8ac15b8221 100644 --- storage-api/src/java/org/apache/hadoop/hive/common/ValidWriteIdList.java +++ storage-api/src/java/org/apache/hadoop/hive/common/ValidWriteIdList.java @@ -118,4 +118,19 @@ * @return smallest Open write Id in this set, {@code null} if there is none. */ Long getMinOpenWriteId(); + + /** + * Mark the writeId as locally committed within the Hive Metastore client. + * This will not mark a given writeId as committed on the server. + * This is required for transactional managed tables, especially for cases in which a + * given statement results in a write followed by a read. + * Although right now the transactional boundary is a statement, there can be single statements + * that can result in a write followed by a write. + * For example: Create Table As Select (CTAS) and Dynamic-Partition Insert + * Marking the writeId of the previous write within the same session as committed would + * ensure that the subsequent read will invalidate the cache entry. This behavior is essential when + * we want to provide cache consistency with HMS HA. + * @param writeId + */ + void locallyCommitWriteId(long writeId); }