diff --git common/src/java/org/apache/hadoop/hive/common/JavaUtils.java common/src/java/org/apache/hadoop/hive/common/JavaUtils.java index 3916fe3..fbb646c 100644 --- common/src/java/org/apache/hadoop/hive/common/JavaUtils.java +++ common/src/java/org/apache/hadoop/hive/common/JavaUtils.java @@ -28,6 +28,8 @@ import java.util.Arrays; import java.util.List; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,6 +39,10 @@ */ public final class JavaUtils { + public static final String DELTA_PREFIX = "delta"; + public static final String DELTA_DIGITS = "%07d"; + public static final int DELTA_DIGITS_LEN = 7; + public static final String STATEMENT_DIGITS = "%04d"; private static final Logger LOG = LoggerFactory.getLogger(JavaUtils.class); private static final Method SUN_MISC_UTIL_RELEASE; @@ -158,4 +164,65 @@ public static String txnIdsToString(List txnIds) { private JavaUtils() { // prevent instantiation } + + public static Long extractWriteId(Path file) { + String fileName = file.getName(); + String[] parts = fileName.split("_", 4); // e.g. delta_0000001_0000001_0000 + if (parts.length < 4 || !DELTA_PREFIX.equals(parts[0])) { + LOG.info("Cannot extract write ID for a MM table: " + file + + " (" + Arrays.toString(parts) + ")"); + return null; + } + long writeId = -1; + try { + writeId = Long.parseLong(parts[1]); + } catch (NumberFormatException ex) { + LOG.info("Cannot extract write ID for a MM table: " + file + + "; parsing " + parts[1] + " got " + ex.getMessage()); + return null; + } + return writeId; + } + + public static class IdPathFilter implements PathFilter { + private final String mmDirName; + private final boolean isMatch, isIgnoreTemp; + public IdPathFilter(long writeId, int stmtId, boolean isMatch) { + this(writeId, stmtId, isMatch, false); + } + public IdPathFilter(long writeId, int stmtId, boolean isMatch, boolean isIgnoreTemp) { + this.mmDirName = DELTA_PREFIX + "_" + String.format(DELTA_DIGITS, writeId) + "_" + + String.format(DELTA_DIGITS, writeId) + "_" + String.format(STATEMENT_DIGITS, stmtId); + this.isMatch = isMatch; + this.isIgnoreTemp = isIgnoreTemp; + } + + @Override + public boolean accept(Path path) { + String name = path.getName(); + if (name.equals(mmDirName)) { + return isMatch; + } + if (isIgnoreTemp && name.length() > 0) { + char c = name.charAt(0); + if (c == '.' || c == '_') return false; // Regardless of isMatch, ignore this. + } + return !isMatch; + } + } + + public static class AnyIdDirFilter implements PathFilter { + @Override + public boolean accept(Path path) { + String name = path.getName(); + if (!name.startsWith(DELTA_PREFIX + "_")) return false; + String idStr = name.substring(DELTA_PREFIX.length() + 1, DELTA_PREFIX.length() + 1 + DELTA_DIGITS_LEN); + try { + Long.parseLong(idStr); + } catch (NumberFormatException ex) { + return false; + } + return true; + } + } } diff --git common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java deleted file mode 100644 index 4cbeb89..0000000 --- common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java +++ /dev/null @@ -1,218 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.common; - -import java.util.Arrays; -import java.util.HashSet; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.hive.conf.HiveConf; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class ValidWriteIds { - public static final ValidWriteIds NO_WRITE_IDS = new ValidWriteIds(-1, -1, false, null); - - public static final String MM_PREFIX = "mm"; - private static final String CURRENT_SUFFIX = ".current"; - - private final static Logger LOG = LoggerFactory.getLogger(ValidWriteIds.class); - - private static final String VALID_WRITEIDS_PREFIX = "hive.valid.write.ids."; - private final long lowWatermark, highWatermark; - private final boolean areIdsValid; - private final HashSet ids; - private String source = null; - - public ValidWriteIds( - long lowWatermark, long highWatermark, boolean areIdsValid, HashSet ids) { - this.lowWatermark = lowWatermark; - this.highWatermark = highWatermark; - this.areIdsValid = areIdsValid; - this.ids = ids; - } - - public static ValidWriteIds createFromConf(Configuration conf, String dbName, String tblName) { - return createFromConf(conf, dbName + "." + tblName); - } - - public static ValidWriteIds createFromConf(Configuration conf, String fullTblName) { - String key = createConfKey(fullTblName); - String idStr = conf.get(key, null); - String current = conf.get(key + CURRENT_SUFFIX, null); - if (idStr == null || idStr.isEmpty()) return null; - return new ValidWriteIds(idStr, current); - } - - private static String createConfKey(String dbName, String tblName) { - return createConfKey(dbName + "." + tblName); - } - - private static String createConfKey(String fullName) { - return VALID_WRITEIDS_PREFIX + fullName; - } - - private ValidWriteIds(String src, String current) { - // TODO: lifted from ACID config implementation... optimize if needed? e.g. ranges, base64 - String[] values = src.split(":"); - highWatermark = Long.parseLong(values[0]); - lowWatermark = Long.parseLong(values[1]); - if (values.length > 2) { - areIdsValid = Long.parseLong(values[2]) > 0; - ids = new HashSet(); - for(int i = 3; i < values.length; ++i) { - ids.add(Long.parseLong(values[i])); - } - if (current != null) { - long currentId = Long.parseLong(current); - if (areIdsValid) { - ids.add(currentId); - } else { - ids.remove(currentId); - } - } - } else if (current != null) { - long currentId = Long.parseLong(current); - areIdsValid = true; - ids = new HashSet(); - ids.add(currentId); - } else { - areIdsValid = false; - ids = null; - } - } - - public static void addCurrentToConf( - Configuration conf, String dbName, String tblName, long mmWriteId) { - String key = createConfKey(dbName, tblName) + CURRENT_SUFFIX; - if (LOG.isDebugEnabled()) { - LOG.debug("Setting " + key + " => " + mmWriteId); - } - conf.set(key, Long.toString(mmWriteId)); - } - - public void addToConf(Configuration conf, String dbName, String tblName) { - if (source == null) { - source = toString(); - } - String key = createConfKey(dbName, tblName); - if (LOG.isDebugEnabled()) { - LOG.debug("Setting " + key + " => " + source - + " (old value was " + conf.get(key, null) + ")"); - } - conf.set(key, source); - } - - public static void clearConf(Configuration conf, String dbName, String tblName) { - if (LOG.isDebugEnabled()) { - LOG.debug("Unsetting " + createConfKey(dbName, tblName)); - } - conf.unset(createConfKey(dbName, tblName)); - } - - public String toString() { - // TODO: lifted from ACID config implementation... optimize if needed? e.g. ranges, base64 - StringBuilder buf = new StringBuilder(); - buf.append(highWatermark); - buf.append(':'); - buf.append(lowWatermark); - if (ids != null) { - buf.append(':'); - buf.append(areIdsValid ? 1 : 0); - for (long id : ids) { - buf.append(':'); - buf.append(id); - } - } - return buf.toString(); - } - - public boolean isValid(long writeId) { - if (writeId < 0) throw new RuntimeException("Incorrect write ID " + writeId); - if (writeId <= lowWatermark) return true; - if (writeId >= highWatermark) return false; - return ids != null && (areIdsValid == ids.contains(writeId)); - } - - public static String getMmFilePrefix(long mmWriteId) { - return MM_PREFIX + "_" + mmWriteId; - } - - - public static class IdPathFilter implements PathFilter { - private final String mmDirName; - private final boolean isMatch, isIgnoreTemp; - public IdPathFilter(long writeId, boolean isMatch) { - this(writeId, isMatch, false); - } - public IdPathFilter(long writeId, boolean isMatch, boolean isIgnoreTemp) { - this.mmDirName = ValidWriteIds.getMmFilePrefix(writeId); - this.isMatch = isMatch; - this.isIgnoreTemp = isIgnoreTemp; - } - - @Override - public boolean accept(Path path) { - String name = path.getName(); - if (name.equals(mmDirName)) { - return isMatch; - } - if (isIgnoreTemp && name.length() > 0) { - char c = name.charAt(0); - if (c == '.' || c == '_') return false; // Regardless of isMatch, ignore this. - } - return !isMatch; - } - } - - public static class AnyIdDirFilter implements PathFilter { - @Override - public boolean accept(Path path) { - String name = path.getName(); - if (!name.startsWith(MM_PREFIX + "_")) return false; - String idStr = name.substring(MM_PREFIX.length() + 1); - try { - Long.parseLong(idStr); - } catch (NumberFormatException ex) { - return false; - } - return true; - } - } - public static Long extractWriteId(Path file) { - String fileName = file.getName(); - String[] parts = fileName.split("_", 3); - if (parts.length < 2 || !MM_PREFIX.equals(parts[0])) { - LOG.info("Cannot extract write ID for a MM table: " + file - + " (" + Arrays.toString(parts) + ")"); - return null; - } - long writeId = -1; - try { - writeId = Long.parseLong(parts[1]); - } catch (NumberFormatException ex) { - LOG.info("Cannot extract write ID for a MM table: " + file - + "; parsing " + parts[1] + " got " + ex.getMessage()); - return null; - } - return writeId; - } - -} \ No newline at end of file diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java index 0c51a68..c70925a 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java @@ -103,7 +103,7 @@ protected void setUp() { db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true); db.createTable(src, cols, null, TextInputFormat.class, IgnoreKeyTextOutputFormat.class); - db.loadTable(hadoopDataFile[i], src, false, false, false, false, false, null); + db.loadTable(hadoopDataFile[i], src, false, false, false, false, false, null, 0); i++; } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MmCleanerThread.java metastore/src/java/org/apache/hadoop/hive/metastore/MmCleanerThread.java index d99b0d7..c496012 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MmCleanerThread.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MmCleanerThread.java @@ -29,7 +29,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.ValidWriteIds; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.RawStore.FullTableName; @@ -301,7 +301,7 @@ private void deleteAbortedWriteIdFiles(String location, HashSet abortedWri LOG.warn("Skipping a non-directory file " + childPath); continue; } - Long writeId = ValidWriteIds.extractWriteId(childPath); + Long writeId = JavaUtils.extractWriteId(childPath); if (writeId == null) { LOG.warn("Skipping an unknown directory " + childPath); continue; diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 0b615cd..2502f72 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -34,15 +34,15 @@ import java.util.Map; import java.util.Queue; import java.util.Set; +import java.util.Stack; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantLock; import com.google.common.collect.Iterables; import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.common.metrics.common.Metrics; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; @@ -50,21 +50,10 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveVariableSource; import org.apache.hadoop.hive.conf.VariableSubstitution; -import org.apache.hadoop.hive.metastore.LockComponentBuilder; import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.DataOperationType; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.LockComponent; import org.apache.hadoop.hive.metastore.api.Schema; -import org.apache.hadoop.hive.ql.exec.ConditionalTask; -import org.apache.hadoop.hive.ql.exec.ExplainTask; -import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.TaskResult; -import org.apache.hadoop.hive.ql.exec.TaskRunner; -import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext; @@ -80,6 +69,8 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.merge.MergeFileTask; +import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; import org.apache.hadoop.hive.ql.lockmgr.HiveLock; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.lockmgr.LockException; @@ -96,20 +87,18 @@ import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl; import org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.ParseDriver; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; -import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.HiveOperation; -import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.hive.ql.plan.*; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; @@ -167,6 +156,9 @@ // whether any ACID table is involved in a query private boolean acidInQuery; + // A list of FileSinkOperators writing in an MM compliant manner + private Set mmSinks; + // A limit on the number of threads that can be launched private int maxthreads; private int tryCount = Integer.MAX_VALUE; @@ -520,6 +512,9 @@ public void run() { // them later. acidSinks = sem.getAcidFileSinks(); + // Record any MM compliant FileSinkOperators too, since we also add transaction ID to them + mmSinks = sem.getMmFileSinks(); + LOG.info("Semantic Analysis Completed"); // validate the plan @@ -1117,7 +1112,7 @@ private int acquireLocksAndOpenTxn(boolean startTxnImplicitly) { boolean initiatingTransaction = false; boolean readOnlyQueryInAutoCommit = false; if((txnMgr.getAutoCommit() && haveAcidWrite()) || plan.getOperation() == HiveOperation.START_TRANSACTION || - (!txnMgr.getAutoCommit() && startTxnImplicitly)) { + (!txnMgr.getAutoCommit() && startTxnImplicitly) || (txnMgr.getAutoCommit() && haveMmWrite())) { if(txnMgr.isTxnOpen()) { throw new RuntimeException("Already have an open transaction txnid:" + txnMgr.getCurrentTxnId()); } @@ -1137,6 +1132,18 @@ private int acquireLocksAndOpenTxn(boolean startTxnImplicitly) { desc.setStatementId(txnMgr.getWriteIdAndIncrement()); } } + + if (haveMmWrite()) { + for (FileSinkDesc desc : mmSinks) { + // Set the mmWriteId in all of the mm file sinks + desc.setTransactionId(txnMgr.getCurrentTxnId()); + desc.setStatementId(txnMgr.getWriteIdAndIncrement()); + } + + // Set up the framework for replacement of mmId with txnId + revisitMmWriteId(this.getPlan(), txnMgr.getCurrentTxnId()); + } + /*Note, we have to record snapshot after lock acquisition to prevent lost update problem consider 2 concurrent "update table T set x = x + 1". 1st will get the locks and the 2nd will block until 1st one commits and only then lock in the snapshot, i.e. it will @@ -1168,6 +1175,11 @@ private int acquireLocksAndOpenTxn(boolean startTxnImplicitly) { private boolean haveAcidWrite() { return acidSinks != null && !acidSinks.isEmpty(); } + + private boolean haveMmWrite() { + return mmSinks != null && !mmSinks.isEmpty(); + } + /** * @param commit if there is an open transaction and if true, commit, * if false rollback. If there is no open transaction this parameter is ignored. @@ -1528,13 +1540,6 @@ else if(!plan.getAutoCommitValue() && txnManager.getAutoCommit()) { return rollback(createProcessorResponse(ret)); } } - - try { - acquireWriteIds(plan, conf); - } catch (HiveException e) { - return handleHiveException(e, 1); - } - ret = execute(true); if (ret != 0) { //if needRequireLock is false, the release here will do nothing because there is no lock @@ -1595,46 +1600,196 @@ else if(plan.getOperation() == HiveOperation.ROLLBACK) { } } - - private static void acquireWriteIds(QueryPlan plan, HiveConf conf) throws HiveException { - // Output IDs are put directly into FileSinkDesc; here, we only need to take care of inputs. - Configuration fetchConf = null; - if (plan.getFetchTask() != null) { - fetchConf = plan.getFetchTask().getFetchConf(); - } - for (ReadEntity input : plan.getInputs()) { - Utilities.LOG14535.debug("Looking at " + input); - Table t = extractTable(input); - if (t == null) continue; - Utilities.LOG14535.info("Checking " + t.getTableName() + " for being a MM table: " + t.getParameters()); - if (!MetaStoreUtils.isInsertOnlyTable(t.getParameters())) { - ValidWriteIds.clearConf(conf, t.getDbName(), t.getTableName()); - if (fetchConf != null) { - ValidWriteIds.clearConf(fetchConf, t.getDbName(), t.getTableName()); + /** + * Traverse the plan tasks, and make following changes if applicable + * 1. for the 4 types of descriptors which have mmId defined, update the mmId with the newMmId + * 2. since in parsing phase the mmId has already been used to set up paths, need to go thru every + * impacted descriptor, and update the paths (db/tbl/mm_0) with the newMmId + */ + private static void revisitMmWriteId(QueryPlan plan, long txnId) { + + Stack taskList = new Stack<>(); + // As we walk thru the plan, we add all the tasks into taskList, and examine them one by one + taskList.addAll(plan.getRootTasks()); + while (!taskList.isEmpty()) { + // examine one task at a time + Task task = taskList.pop(); + Serializable work = task.getWork(); + + // Deal with different scenarios + if (work instanceof MapredWork) { + // MapWork has several maps to update: pathToAliases, pathToPartitionInfo, aliasToWork, aliasToPartnInfo and nameToSplitSample + MapWork mapWork = ((MapredWork) work).getMapWork(); + if (mapWork != null) { + if (mapWork.getPathToAliases() != null) { + mapWork.setPathToAliases(updatePathToAlias(mapWork.getPathToAliases(), txnId)); + } + if (mapWork.getPathToPartitionInfo() != null) { + mapWork.setPathToPartitionInfo(updatePathToPartitionInfo(mapWork.getPathToPartitionInfo(), txnId)); + } + if (mapWork.getAliasToWork() != null) { + mapWork.setAliasToWork(updateAliasToWork(mapWork.getAliasToWork(), txnId)); + } + if (mapWork.getAliasToPartnInfo() != null) { + mapWork.setAliasToPartnInfo(updateAliasToPartnInfo(mapWork.getAliasToPartnInfo(), txnId)); + } + if (mapWork.getNameToSplitSample() != null) { + mapWork.setNameToSplitSample(updateNameToSplitSample(mapWork.getNameToSplitSample(), txnId)); + } + } + } else if (work instanceof MergeFileWork) { + MergeFileWork mergeFileWork = (MergeFileWork) work; + mergeFileWork.setInputPaths(getNewPaths(mergeFileWork.getInputPaths(), txnId)); + mergeFileWork.setOutputDir(replacePathWithTxnId(mergeFileWork.getOutputDir(), txnId)); + // Since MergeFileWork extends MapWork + if (mergeFileWork.getPathToAliases() != null) { + mergeFileWork.setPathToAliases(updatePathToAlias(mergeFileWork.getPathToAliases(), txnId)); + } + if (mergeFileWork.getPathToPartitionInfo() != null) { + mergeFileWork.setPathToPartitionInfo(updatePathToPartitionInfo(mergeFileWork.getPathToPartitionInfo(), txnId)); + } + if (mergeFileWork.getAliasToWork() != null) { + mergeFileWork.setAliasToWork(updateAliasToWork(mergeFileWork.getAliasToWork(), txnId)); + } + if (mergeFileWork.getAliasToPartnInfo() != null) { + mergeFileWork.setAliasToPartnInfo(updateAliasToPartnInfo(mergeFileWork.getAliasToPartnInfo(), txnId)); + } + if (mergeFileWork.getNameToSplitSample() != null) { + mergeFileWork.setNameToSplitSample(updateNameToSplitSample(mergeFileWork.getNameToSplitSample(), txnId)); + } + } else if (work instanceof MoveWork) { + MoveWork moveWork = (MoveWork) work; + if (moveWork.getLoadFileWork() != null) { + moveWork.getLoadFileWork().setSourcePath(replacePathWithTxnId(moveWork.getLoadFileWork().getSourcePath(), txnId)); + moveWork.getLoadFileWork().setTargetDir(replacePathWithTxnId(moveWork.getLoadFileWork().getTargetDir(), txnId)); + } else if (moveWork.getLoadTableWork() != null) { + moveWork.getLoadTableWork().setMmWriteId(txnId); // reset mmWriteId + moveWork.getLoadTableWork().setSourcePath(replacePathWithTxnId(moveWork.getLoadTableWork().getSourcePath(), txnId)); + } else if (moveWork.getLoadMultiFilesWork() != null) { + moveWork.getLoadMultiFilesWork().setSourceDirs(getNewPaths(moveWork.getLoadMultiFilesWork().getSourceDirs(), txnId)); + moveWork.getLoadMultiFilesWork().setTargetDirs(getNewPaths(moveWork.getLoadMultiFilesWork().getTargetDirs(), txnId)); + } + } else if (work instanceof DDLWork) { + DDLWork ddlWork = (DDLWork) work; + if (ddlWork.getCreateTblDesc() != null) { + ddlWork.getCreateTblDesc().setInitialMmWriteId(txnId); // reset mmWriteId + } + } + + // add more to taskList for processing if any + if (task.getNumChild() > 0) { + for (Object childTask : task.getChildTasks()) { + if (childTask instanceof ConditionalTask) { + taskList.addAll(((ConditionalTask) childTask).getListTasks()); + } else { + taskList.push((Task) childTask); + } } - continue; } - ValidWriteIds ids = Hive.get().getValidWriteIdsForTable(t.getDbName(), t.getTableName()); - ids.addToConf(conf, t.getDbName(), t.getTableName()); - if (fetchConf != null) { - ids.addToConf(fetchConf, t.getDbName(), t.getTableName()); + } + } + + /** + * Given the String form of a Path, update the transactionId for the delta directory name. + * For example, given: + * pfile:/Users/wzheng/HIVE-16063/hive/itests/qtest/target/warehouse/union_mm/delta_0000000_0000000_0000 + * if txnId is 123, below String will be returned: + * pfile:/Users/wzheng/HIVE-16063/hive/itests/qtest/target/warehouse/union_mm/delta_0000123_0000123_0000 + * @param oldString original String form for the Path + * @param txnId transaction ID used for replacement + * @return String form of Path with transaction ID replaced + */ + private static String replaceStringWithTxnId(String oldString, long txnId) { + if (oldString == null || oldString.isEmpty()) { + return oldString; + } + + // extract the prefix and delta dir name + int slash = oldString.lastIndexOf("/"); + String prefix = oldString.substring(0, slash + 1); // pfile:/Users/../../warehouse/table_name/ + String deltaDir = oldString.substring(slash + 1); // delta_0000000_0000000_0000 + + // If the format of the directory name doesn't conform to "delta_0000000_0000000_0000" format, skip + if (!deltaDir.startsWith(AcidUtils.DELTA_PREFIX) || deltaDir.split("_").length != 4) { + return oldString; + } + + // get the last section which is stmtId (which in most cases is 0000) + int underscore = deltaDir.lastIndexOf("_"); + int stmtId = Integer.valueOf(deltaDir.substring(underscore + 1)); // 0000 + + // we're assuming for mmId, the minTxn and maxTxn are the same + return prefix + AcidUtils.deltaSubdir(txnId, txnId, stmtId); + } + + private static Path replacePathWithTxnId(Path oldPath, long txnId) { + return new Path(replaceStringWithTxnId(oldPath.toString(), txnId)); + } + + private static List getNewPaths(List oldPaths, long txnId) { + List newPaths = new ArrayList<>(); + for (Path path : oldPaths) { + newPaths.add(replacePathWithTxnId(path, txnId)); + } + return newPaths; + } + + private static LinkedHashMap> updatePathToAlias(Map> pathToAliases, long txnId) { + // Given a hashmap, update the key (i.e. Path) for each entry, then put it into the new hashmap + LinkedHashMap> newMap = new LinkedHashMap<>(); + for (Path oldPath : pathToAliases.keySet()) { + List value = pathToAliases.get(oldPath); + Path newPath = replacePathWithTxnId(oldPath, txnId); + newMap.put(newPath, (ArrayList) value); + } + return newMap; + } + + private static LinkedHashMap updatePathToPartitionInfo(Map pathToPartitionInfo, long txnId) { + LinkedHashMap newMap = new LinkedHashMap<>(); + for (Path oldPath : pathToPartitionInfo.keySet()) { + PartitionDesc value = pathToPartitionInfo.get(oldPath); + Path newPath = replacePathWithTxnId(oldPath, txnId); + newMap.put(newPath, value); + } + return newMap; + } + + private static LinkedHashMap> updateAliasToWork(Map> aliasToWork, long txnId) { + LinkedHashMap> newMap = new LinkedHashMap<>(); + for (String oldString : aliasToWork.keySet()) { + Operator operator = aliasToWork.get(oldString); + String newString = replaceStringWithTxnId(oldString, txnId); + + if (operator instanceof AbstractFileMergeOperator) { + FileMergeDesc fileMergeDesc = (FileMergeDesc) operator.getConf(); + fileMergeDesc.setMmWriteId(txnId); // reset mmWriteId + fileMergeDesc.setStmtId(0); } + + newMap.put(newString, operator); } + return newMap; } - private static Table extractTable(ReadEntity input) { - Table t = null; - switch (input.getType()) { - case TABLE: - t = input.getTable(); - break; - case DUMMYPARTITION: - case PARTITION: - t = input.getPartition().getTable(); - break; - default: return null; + private static LinkedHashMap updateAliasToPartnInfo(Map aliasToPartnInfo, long txnId) { + LinkedHashMap newMap = new LinkedHashMap<>(); + for (String oldString : aliasToPartnInfo.keySet()) { + PartitionDesc value = aliasToPartnInfo.get(oldString); + String newString = replaceStringWithTxnId(oldString, txnId); + newMap.put(newString, value); + } + return newMap; + } + + private static HashMap updateNameToSplitSample(Map aliasToPartnInfo, long txnId) { + HashMap newMap = new LinkedHashMap<>(); + for (String oldString : aliasToPartnInfo.keySet()) { + SplitSample value = aliasToPartnInfo.get(oldString); + String newString = replaceStringWithTxnId(oldString, txnId); + newMap.put(newString, value); } - return (t != null && !t.isTemporary()) ? t : null; + return newMap; } private CommandProcessorResponse rollback(CommandProcessorResponse cpr) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java index 1315b99..0fa7630 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -246,7 +246,7 @@ public void closeOp(boolean abort) throws HiveException { // There's always just one file that we have merged. // The union/DP/etc. should already be account for in the path. Utilities.writeMmCommitManifest(Lists.newArrayList(outPath), - tmpPath.getParent(), fs, taskId, conf.getMmWriteId(), null); + tmpPath.getParent(), fs, taskId, conf.getMmWriteId(), conf.getStmtId(), null); LOG.info("Merged into " + finalPath + "(" + fss.getLen() + " bytes)."); } } @@ -281,6 +281,7 @@ public void jobCloseOp(Configuration hconf, boolean success) Path outputDir = conf.getOutputPath(); FileSystem fs = outputDir.getFileSystem(hconf); Long mmWriteId = conf.getMmWriteId(); + int stmtId = conf.getStmtId(); if (mmWriteId == null) { Path backupPath = backupOutputPath(fs, outputDir); Utilities.mvFileToFinalPath( @@ -297,7 +298,7 @@ public void jobCloseOp(Configuration hconf, boolean success) // We don't expect missing buckets from mere (actually there should be no buckets), // so just pass null as bucketing context. Union suffix should also be accounted for. Utilities.handleMmTableFinalPath(outputDir.getParent(), null, hconf, success, - dpLevels, lbLevels, null, mmWriteId, reporter, false); + dpLevels, lbLevels, null, mmWriteId, stmtId, reporter, false); } } catch (IOException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java index e8526f6..14ab6da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java @@ -18,12 +18,12 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hive.common.JavaUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; @@ -31,10 +31,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.DriverContext; -import org.apache.hadoop.hive.ql.parse.LoadSemanticAnalyzer; import org.apache.hadoop.hive.ql.plan.CopyWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.util.StringUtils; @@ -113,7 +111,7 @@ protected int copyOnePath(Path fromPath, Path toPath) { if (!fs.exists(path)) return null; if (!isSourceMm) return matchFilesOneDir(fs, path, null); // TODO: this doesn't handle list bucketing properly. Does the original exim do that? - FileStatus[] mmDirs = fs.listStatus(path, new ValidWriteIds.AnyIdDirFilter()); + FileStatus[] mmDirs = fs.listStatus(path, new JavaUtils.AnyIdDirFilter()); if (mmDirs == null || mmDirs.length == 0) return null; List allFiles = new ArrayList(); for (FileStatus mmDir : mmDirs) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index a1a0862..34e35d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -57,8 +57,10 @@ import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidWriteIds; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -98,7 +100,6 @@ import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.TxnInfo; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.Context; @@ -4025,7 +4026,8 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition + " to false for this query if you want to force the conversion."); } Hive db = getHive(); - ValidWriteIds ids = db.getValidWriteIdsForTable(tbl.getDbName(), tbl.getTableName()); + String value = conf.get(ValidTxnList.VALID_TXNS_KEY); + ValidTxnList validTxnList = value == null ? new ValidReadTxnList() : new ValidReadTxnList(value); if (tbl.getPartitionKeys().size() > 0) { PartitionIterable parts = new PartitionIterable(db, tbl, null, HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); @@ -4033,15 +4035,15 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition while (partIter.hasNext()) { Partition part = partIter.next(); checkMmLb(part); - handleRemoveMm(part.getDataLocation(), ids, allMmDirs); + handleRemoveMm(part.getDataLocation(), validTxnList, allMmDirs); } } else { checkMmLb(tbl); - handleRemoveMm(tbl.getDataLocation(), ids, allMmDirs); + handleRemoveMm(tbl.getDataLocation(), validTxnList, allMmDirs); } List targetPaths = new ArrayList<>(allMmDirs.size()); List targetPrefix = new ArrayList<>(allMmDirs.size()); - int prefixLen = ValidWriteIds.MM_PREFIX.length(); + int prefixLen = JavaUtils.DELTA_PREFIX.length(); for (int i = 0; i < allMmDirs.size(); ++i) { Path src = allMmDirs.get(i); Path tgt = src.getParent(); @@ -4072,7 +4074,7 @@ private void checkMmLb(Partition part) throws HiveException { } private void handleRemoveMm( - Path path, ValidWriteIds ids, List result) throws HiveException { + Path path, ValidTxnList validTxnList, List result) throws HiveException { // Note: doesn't take LB into account; that is not presently supported here (throws above). try { FileSystem fs = path.getFileSystem(conf); @@ -4082,10 +4084,10 @@ private void handleRemoveMm( ensureDelete(fs, childPath, "a non-directory file"); continue; } - Long writeId = ValidWriteIds.extractWriteId(childPath); + Long writeId = JavaUtils.extractWriteId(childPath); if (writeId == null) { ensureDelete(fs, childPath, "an unknown directory"); - } else if (!ids.isValid(writeId)) { + } else if (!validTxnList.isTxnValid(writeId)) { // Assume no concurrent active writes - we rely on locks here. We could check and fail. ensureDelete(fs, childPath, "an uncommitted directory"); } else { @@ -4112,9 +4114,10 @@ private static void ensureDelete(FileSystem fs, Path path, String what) throws I // We will move all the files in the table/partition directories into the first MM // directory, then commit the first write ID. List srcs = new ArrayList<>(), tgts = new ArrayList<>(); + long mmWriteId = SessionState.get().getTxnMgr().getCurrentTxnId(); + int stmtId = 0; //todo + String mmDir = AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId); Hive db = getHive(); - long mmWriteId = db.getNextTableWriteId(tbl.getDbName(), tbl.getTableName()); - String mmDir = ValidWriteIds.getMmFilePrefix(mmWriteId); if (tbl.getPartitionKeys().size() > 0) { PartitionIterable parts = new PartitionIterable(db, tbl, null, HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); @@ -4137,7 +4140,7 @@ private static void ensureDelete(FileSystem fs, Path path, String what) throws I // Don't set inputs and outputs - the locks have already been taken so it's pointless. MoveWork mw = new MoveWork(null, null, null, null, false); mw.setMultiFilesDesc(new LoadMultiFilesDesc(srcs, tgts, true, null, null)); - ImportCommitWork icw = new ImportCommitWork(tbl.getDbName(), tbl.getTableName(), mmWriteId); + ImportCommitWork icw = new ImportCommitWork(tbl.getDbName(), tbl.getTableName(), mmWriteId, stmtId); // TODO# this is hacky and will be gone with ACID. The problem is getting the write ID above // modifies the table, but the table object above is preserved and modified without // getting this change, so saving it will overwrite write ID. Ideally, when we save @@ -4145,7 +4148,6 @@ private static void ensureDelete(FileSystem fs, Path path, String what) throws I // There's probably some way in DN to achieve that, but for now let's just update the // original object here. This is safe due to DDL lock and the fact that converting // the table to MM here from non-MM should mean no concurrent write ID updates. - tbl.setMmNextWriteId(mmWriteId + 1); Task mv = TaskFactory.get(mw, conf), ic = TaskFactory.get(icw, conf); mv.addDependentTask(ic); return Lists.>newArrayList(mv); @@ -4559,18 +4561,11 @@ private int createTable(Hive db, CreateTableDesc crtTbl) throws HiveException { if (crtTbl.isCTAS() || mmWriteId != null) { Table createdTable = db.getTable(tbl.getDbName(), tbl.getTableName()); if (mmWriteId != null) { - // TODO# this would be retrieved via ACID before the query runs; for now we rely on it - // being zero at start; we can't create a write ID before we create the table here. - long initialWriteId = db.getNextTableWriteId(tbl.getDbName(), tbl.getTableName()); + long initialWriteId = SessionState.get().getTxnMgr().getCurrentTxnId(); if (initialWriteId != mmWriteId) { throw new HiveException("Initial write ID mismatch - expected " + mmWriteId + " but got " + initialWriteId); } - // CTAS create the table on a directory that already exists; import creates the table - // first (in parallel with copies?), then commits after all the loads. - if (crtTbl.isCTAS()) { - db.commitMmTableWrite(tbl, initialWriteId); - } } if (crtTbl.isCTAS()) { DataContainer dc = new DataContainer(createdTable.getTTable()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 4102d02..e743108 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -35,10 +35,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader; @@ -128,7 +127,6 @@ private transient StructObjectInspector outputOI; private transient Object[] row; - private transient Map writeIdMap; public FetchOperator(FetchWork work, JobConf job) throws HiveException { this(work, job, null, null); @@ -275,7 +273,7 @@ private boolean getNextPath() throws Exception { } FileSystem fs = currPath.getFileSystem(job); if (fs.exists(currPath)) { - if (extractWriteIdsForCurrentTable() != null) { + if (extractValidTxnList() != null) { return true; } for (FileStatus fStat : listStatusUnderPath(fs, currPath)) { @@ -403,12 +401,12 @@ private String processCurrPathForMmWriteIds(InputFormat inputFormat) throws IOEx if (inputFormat instanceof HiveInputFormat) { return StringUtils.escapeString(currPath.toString()); // No need to process here. } - ValidWriteIds ids = extractWriteIdsForCurrentTable(); - if (ids != null) { - Utilities.LOG14535.info("Observing " + currDesc.getTableName() + ": " + ids); + ValidTxnList validTxnList = extractValidTxnList(); + if (validTxnList != null) { + Utilities.LOG14535.info("Observing " + currDesc.getTableName() + ": " + validTxnList); } - Path[] dirs = HiveInputFormat.processPathsForMmRead(Lists.newArrayList(currPath), job, ids); + Path[] dirs = HiveInputFormat.processPathsForMmRead(Lists.newArrayList(currPath), job, validTxnList); if (dirs == null || dirs.length == 0) { return null; // No valid inputs. This condition is logged inside the call. } @@ -419,11 +417,16 @@ private String processCurrPathForMmWriteIds(InputFormat inputFormat) throws IOEx return str.toString(); } - private ValidWriteIds extractWriteIdsForCurrentTable() { - if (writeIdMap == null) { - writeIdMap = new HashMap(); + private ValidTxnList extractValidTxnList() { + ValidTxnList validTxnList; + if (org.apache.commons.lang.StringUtils.isBlank(currDesc.getTableName())) { + validTxnList = null; // i.e. not fetching from a table directly but from a temp location + } else { + String txnString = job.get(ValidTxnList.VALID_TXNS_KEY); + validTxnList = txnString == null ? new ValidReadTxnList() : + new ValidReadTxnList(txnString); } - return HiveInputFormat.extractWriteIds(writeIdMap, job, currDesc.getTableName()); + return validTxnList; } private FetchInputFormatSplit[] splitSampling(SplitSample splitSample, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java index bd822df..f6d27fb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java @@ -193,9 +193,4 @@ public void clearFetch() throws HiveException { fetch.clearFetchContext(); } } - - public Configuration getFetchConf() { - return fetch.getJobConf(); - } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 3ad1733..c49e580 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -33,16 +33,11 @@ import java.util.Set; import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -176,6 +171,8 @@ int acidLastBucket = -1; int acidFileOffset = -1; private boolean isMmTable; + private Long txnId; + private int stmtId; public FSPaths(Path specPath, boolean isMmTable) { this.isMmTable = isMmTable; @@ -185,6 +182,8 @@ public FSPaths(Path specPath, boolean isMmTable) { } else { tmpPath = specPath; taskOutputTempPath = null; // Should not be used. + txnId = conf.getTransactionId(); + stmtId = conf.getStatementId(); } Utilities.LOG14535.info("new FSPaths for " + numFiles + " files, dynParts = " + bDynParts + ": tmpPath " + tmpPath + ", task path " + taskOutputTempPath @@ -327,7 +326,7 @@ public void initializeBucketPaths(int filesIdx, String taskId, boolean isNativeT } outPaths[filesIdx] = getTaskOutPath(taskId); } else { - String subdirPath = ValidWriteIds.getMmFilePrefix(conf.getMmWriteId()); + String subdirPath = AcidUtils.deltaSubdir(txnId, txnId, stmtId); if (unionPath != null) { // Create the union directory inside the MM directory. subdirPath += Path.SEPARATOR + unionPath; @@ -735,10 +734,9 @@ protected void createBucketForFileIdx(FSPaths fsp, int filesIdx) Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), jc); // only create bucket files only if no dynamic partitions, // buckets of dynamic partitions will be created for each newly created partition - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || - conf.getWriteType() == AcidUtils.Operation.INSERT_ONLY) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) { Path outPath = fsp.outPaths[filesIdx]; - if ((conf.getWriteType() == AcidUtils.Operation.INSERT_ONLY || conf.isMmTable()) + if (conf.isMmTable() && inheritPerms && !FileUtils.mkdir(fs, outPath.getParent(), inheritPerms, hconf)) { LOG.warn("Unable to create directory with inheritPerms: " + outPath); } @@ -884,8 +882,7 @@ public void process(Object row, int tag) throws HiveException { // for a given operator branch prediction should work quite nicely on it. // RecordUpdateer expects to get the actual row, not a serialized version of it. Thus we // pass the row rather than recordValue. - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || - conf.getWriteType() == AcidUtils.Operation.INSERT_ONLY) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) { rowOutWriters[writerOffset].write(recordValue); } else if (conf.getWriteType() == AcidUtils.Operation.INSERT) { fpaths.updaters[writerOffset].insert(conf.getTransactionId(), row); @@ -929,8 +926,7 @@ public void process(Object row, int tag) throws HiveException { protected boolean areAllTrue(boolean[] statsFromRW) { // If we are doing an acid operation they will always all be true as RecordUpdaters always // collect stats - if (conf.getWriteType() != AcidUtils.Operation.NOT_ACID && - conf.getWriteType() != AcidUtils.Operation.INSERT_ONLY) { + if (conf.getWriteType() != AcidUtils.Operation.NOT_ACID) { return true; } for(boolean b : statsFromRW) { @@ -1074,8 +1070,7 @@ protected FSPaths getDynOutPaths(List row, String lbDirName) throws Hive // stats from the record writer and store in the previous fsp that is cached if (conf.isGatherStats() && isCollectRWStats) { SerDeStats stats = null; - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || - conf.getWriteType() == AcidUtils.Operation.INSERT_ONLY) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) { RecordWriter outWriter = prevFsp.outWriters[0]; if (outWriter != null) { stats = ((StatsProvidingRecordWriter) outWriter).getStats(); @@ -1177,8 +1172,7 @@ public void closeOp(boolean abort) throws HiveException { // record writer already gathers the statistics, it can simply return the // accumulated statistics which will be aggregated in case of spray writers if (conf.isGatherStats() && isCollectRWStats) { - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || - conf.getWriteType() == AcidUtils.Operation.INSERT_ONLY) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) { for (int idx = 0; idx < fsp.outWriters.length; idx++) { RecordWriter outWriter = fsp.outWriters[idx]; if (outWriter != null) { @@ -1208,7 +1202,7 @@ public void closeOp(boolean abort) throws HiveException { } if (conf.getMmWriteId() != null) { Utilities.writeMmCommitManifest( - commitPaths, specPath, fs, taskId, conf.getMmWriteId(), unionPath); + commitPaths, specPath, fs, taskId, conf.getMmWriteId(), conf.getStatementId(), unionPath); } // Only publish stats if this operator's flag was set to gather stats if (conf.isGatherStats()) { @@ -1264,7 +1258,7 @@ public void jobCloseOp(Configuration hconf, boolean success) MissingBucketsContext mbc = new MissingBucketsContext( conf.getTableInfo(), numBuckets, conf.getCompressed()); Utilities.handleMmTableFinalPath(specPath, unionSuffix, hconf, success, - dpLevels, lbLevels, mbc, conf.getMmWriteId(), reporter, conf.isMmCtas()); + dpLevels, lbLevels, mbc, conf.getMmWriteId(), conf.getStatementId(), reporter, conf.isMmCtas()); } } } catch (IOException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java index ba009b9..c420473 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java @@ -42,9 +42,6 @@ public int execute(DriverContext driverContext) { Utilities.LOG14535.info("Exiting due to explain"); return 0; } - Hive db = getHive(); - Table tbl = db.getTable(work.getDbName(), work.getTblName()); - db.commitMmTableWrite(tbl, work.getMmWriteId()); return 0; } catch (Exception e) { console.printError("Failed with exception " + e.getMessage(), "\n" diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java index f62d237..28d5bf0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java @@ -27,9 +27,11 @@ private static final long serialVersionUID = 1L; private String dbName, tblName; private long mmWriteId; + private int stmtId; - public ImportCommitWork(String dbName, String tblName, long mmWriteId) { + public ImportCommitWork(String dbName, String tblName, long mmWriteId, int stmtId) { this.mmWriteId = mmWriteId; + this.stmtId = stmtId; this.dbName = dbName; this.tblName = tblName; } @@ -38,6 +40,10 @@ public long getMmWriteId() { return mmWriteId; } + public int getStmtId() { + return stmtId; + } + public String getDbName() { return dbName; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 29b72a0..90b8d6d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -343,10 +343,10 @@ public int execute(DriverContext driverContext) { checkFileFormats(db, tbd, table); - boolean isAcid = work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.INSERT_ONLY; - if (tbd.isMmTable() && isAcid) { - throw new HiveException("ACID and MM are not supported"); + boolean isFullAcidOp = work.getLoadTableWork().getWriteType() == AcidUtils.Operation.UPDATE || + work.getLoadTableWork().getWriteType() == AcidUtils.Operation.DELETE; + if (tbd.isMmTable() && isFullAcidOp) { + throw new HiveException("UPDATE and DELETE operations are not supported for MM table"); } // Create a data container @@ -359,8 +359,8 @@ public int execute(DriverContext driverContext) { "Only single-partition LoadTableDesc can skip commiting write ID"); } db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getReplace(), - work.isSrcLocal(), isSkewedStoredAsDirs(tbd), isAcid, hasFollowingStatsTask(), - tbd.getMmWriteId()); + work.isSrcLocal(), isSkewedStoredAsDirs(tbd), isFullAcidOp, hasFollowingStatsTask(), + tbd.getMmWriteId(), tbd.getStmtId()); if (work.getOutputs() != null) { DDLTask.addIfAbsentByName(new WriteEntity(table, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs()); @@ -417,13 +417,12 @@ private DataContainer handleStaticParts(Hive db, Table table, LoadTableDesc tbd, db.validatePartitionNameCharacters(partVals); Utilities.LOG14535.info("loadPartition called from " + tbd.getSourcePath() + " into " + tbd.getTable().getTableName()); - boolean isCommitMmWrite = tbd.isCommitMmWrite(); db.loadSinglePartition(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), - (work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.INSERT_ONLY), - hasFollowingStatsTask(), tbd.getMmWriteId(), isCommitMmWrite); + work.getLoadTableWork().getWriteType() == AcidUtils.Operation.UPDATE || + work.getLoadTableWork().getWriteType() == AcidUtils.Operation.DELETE, + hasFollowingStatsTask(), tbd.getMmWriteId(), tbd.getStmtId()); Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); // See the comment inside updatePartitionBucketSortColumns. @@ -467,11 +466,10 @@ private DataContainer handleDynParts(Hive db, Table table, LoadTableDesc tbd, tbd.getReplace(), dpCtx.getNumDPCols(), (tbd.getLbCtx() == null) ? 0 : tbd.getLbCtx().calculateListBucketingLevel(), - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.INSERT_ONLY, - SessionState.get().getTxnMgr().getCurrentTxnId(), hasFollowingStatsTask(), - work.getLoadTableWork().getWriteType(), - tbd.getMmWriteId()); + work.getLoadTableWork().getWriteType() == AcidUtils.Operation.UPDATE || + work.getLoadTableWork().getWriteType() == AcidUtils.Operation.DELETE, + SessionState.get().getTxnMgr().getCurrentTxnId(), tbd.getStmtId(), hasFollowingStatsTask(), + work.getLoadTableWork().getWriteType()); // publish DP columns to its subscribers if (dps != null && dps.size() > 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 5b5ddc3..25d3f71 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -67,9 +67,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.Deflater; @@ -99,8 +96,8 @@ import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.common.StringInternUtils; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; @@ -203,67 +200,14 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.Shell; import org.apache.hive.common.util.ACLConfigurationParser; import org.apache.hive.common.util.ReflectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.esotericsoftware.kryo.Kryo; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import java.beans.DefaultPersistenceDelegate; -import java.beans.Encoder; -import java.beans.Expression; -import java.beans.Statement; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInput; -import java.io.EOFException; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.Serializable; -import java.net.URI; -import java.net.URL; -import java.net.URLClassLoader; -import java.net.URLDecoder; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.SQLFeatureNotSupportedException; -import java.sql.SQLTransientException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Collection; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Random; -import java.util.Set; -import java.util.UUID; import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.zip.Deflater; -import java.util.zip.DeflaterOutputStream; -import java.util.zip.InflaterInputStream; /** @@ -1590,7 +1534,7 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(), numBuckets = (conf != null && conf.getTable() != null) ? conf.getTable().getNumBuckets() : 0; - return removeTempOrDuplicateFiles(fs, fileStats, dpLevels, numBuckets, hconf, null); + return removeTempOrDuplicateFiles(fs, fileStats, dpLevels, numBuckets, hconf, null, 0); } private static boolean removeEmptyDpDirectory(FileSystem fs, Path path) throws IOException { @@ -1606,7 +1550,7 @@ private static boolean removeEmptyDpDirectory(FileSystem fs, Path path) throws I } public static List removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats, - int dpLevels, int numBuckets, Configuration hconf, Long mmWriteId) throws IOException { + int dpLevels, int numBuckets, Configuration hconf, Long mmWriteId, int stmtId) throws IOException { if (fileStats == null) { return null; } @@ -1627,7 +1571,7 @@ private static boolean removeEmptyDpDirectory(FileSystem fs, Path path) throws I if (mmWriteId != null) { Path mmDir = parts[i].getPath(); - if (!mmDir.getName().equals(ValidWriteIds.getMmFilePrefix(mmWriteId))) { + if (!mmDir.getName().equals(AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId))) { throw new IOException("Unexpected non-MM directory name " + mmDir); } Utilities.LOG14535.info("removeTempOrDuplicateFiles processing files in MM directory " + mmDir); @@ -1649,7 +1593,7 @@ private static boolean removeEmptyDpDirectory(FileSystem fs, Path path) throws I throw new IOException("Unexpected directories for non-DP MM: " + Arrays.toString(items)); } Path mmDir = items[0].getPath(); - if (!mmDir.getName().equals(ValidWriteIds.getMmFilePrefix(mmWriteId))) { + if (!mmDir.getName().equals(AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId))) { throw new IOException("Unexpected non-MM directory " + mmDir); } Utilities.LOG14535.info( @@ -3993,10 +3937,10 @@ private static void tryDelete(FileSystem fs, Path path) { } public static Path[] getMmDirectoryCandidates(FileSystem fs, Path path, int dpLevels, - int lbLevels, PathFilter filter, long mmWriteId, Configuration conf) throws IOException { + int lbLevels, PathFilter filter, long mmWriteId, int stmtId, Configuration conf) throws IOException { int skipLevels = dpLevels + lbLevels; if (filter == null) { - filter = new ValidWriteIds.IdPathFilter(mmWriteId, true); + filter = new JavaUtils.IdPathFilter(mmWriteId, stmtId, true); } if (skipLevels == 0) { return statusToPath(fs.listStatus(path, filter)); @@ -4004,7 +3948,7 @@ private static void tryDelete(FileSystem fs, Path path) { if (HiveConf.getBoolVar(conf, ConfVars.HIVE_MM_AVOID_GLOBSTATUS_ON_S3) && isS3(fs)) { return getMmDirectoryCandidatesRecursive(fs, path, skipLevels, filter); } - return getMmDirectoryCandidatesGlobStatus(fs, path, skipLevels, filter, mmWriteId); + return getMmDirectoryCandidatesGlobStatus(fs, path, skipLevels, filter, mmWriteId, stmtId); } private static boolean isS3(FileSystem fs) { @@ -4072,22 +4016,22 @@ private static boolean isS3(FileSystem fs) { } private static Path[] getMmDirectoryCandidatesGlobStatus(FileSystem fs, - Path path, int skipLevels, PathFilter filter, long mmWriteId) throws IOException { + Path path, int skipLevels, PathFilter filter, long mmWriteId, int stmtId) throws IOException { StringBuilder sb = new StringBuilder(path.toUri().getPath()); for (int i = 0; i < skipLevels; i++) { sb.append(Path.SEPARATOR).append("*"); } - sb.append(Path.SEPARATOR).append(ValidWriteIds.getMmFilePrefix(mmWriteId)); + sb.append(Path.SEPARATOR).append(AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId)); Path pathPattern = new Path(path, sb.toString()); Utilities.LOG14535.info("Looking for files via: " + pathPattern); return statusToPath(fs.globStatus(pathPattern, filter)); } private static void tryDeleteAllMmFiles(FileSystem fs, Path specPath, Path manifestDir, - int dpLevels, int lbLevels, String unionSuffix, ValidWriteIds.IdPathFilter filter, - long mmWriteId, Configuration conf) throws IOException { + int dpLevels, int lbLevels, JavaUtils.IdPathFilter filter, + long mmWriteId, int stmtId, Configuration conf) throws IOException { Path[] files = getMmDirectoryCandidates( - fs, specPath, dpLevels, lbLevels, filter, mmWriteId, conf); + fs, specPath, dpLevels, lbLevels, filter, mmWriteId, stmtId, conf); if (files != null) { for (Path path : files) { Utilities.LOG14535.info("Deleting " + path + " on failure"); @@ -4100,10 +4044,10 @@ private static void tryDeleteAllMmFiles(FileSystem fs, Path specPath, Path manif public static void writeMmCommitManifest(List commitPaths, Path specPath, FileSystem fs, - String taskId, Long mmWriteId, String unionSuffix) throws HiveException { + String taskId, Long mmWriteId, int stmtId, String unionSuffix) throws HiveException { if (commitPaths.isEmpty()) return; // We assume one FSOP per task (per specPath), so we create it in specPath. - Path manifestPath = getManifestDir(specPath, mmWriteId, unionSuffix); + Path manifestPath = getManifestDir(specPath, mmWriteId, stmtId, unionSuffix); manifestPath = new Path(manifestPath, taskId + MANIFEST_EXTENSION); Utilities.LOG14535.info("Writing manifest to " + manifestPath + " with " + commitPaths); try { @@ -4122,8 +4066,8 @@ public static void writeMmCommitManifest(List commitPaths, Path specPath, } } - private static Path getManifestDir(Path specPath, long mmWriteId, String unionSuffix) { - Path manifestPath = new Path(specPath, "_tmp." + ValidWriteIds.getMmFilePrefix(mmWriteId)); + private static Path getManifestDir(Path specPath, long mmWriteId, int stmtId, String unionSuffix) { + Path manifestPath = new Path(specPath, "_tmp." + AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId)); return (unionSuffix == null) ? manifestPath : new Path(manifestPath, unionSuffix); } @@ -4139,14 +4083,14 @@ public MissingBucketsContext(TableDesc tableInfo, int numBuckets, boolean isComp } public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Configuration hconf, - boolean success, int dpLevels, int lbLevels, MissingBucketsContext mbc, long mmWriteId, + boolean success, int dpLevels, int lbLevels, MissingBucketsContext mbc, long mmWriteId, int stmtId, Reporter reporter, boolean isMmCtas) throws IOException, HiveException { FileSystem fs = specPath.getFileSystem(hconf); - Path manifestDir = getManifestDir(specPath, mmWriteId, unionSuffix); + Path manifestDir = getManifestDir(specPath, mmWriteId, stmtId, unionSuffix); if (!success) { - ValidWriteIds.IdPathFilter filter = new ValidWriteIds.IdPathFilter(mmWriteId, true); + JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(mmWriteId, stmtId, true); tryDeleteAllMmFiles(fs, specPath, manifestDir, dpLevels, lbLevels, - unionSuffix, filter, mmWriteId, hconf); + filter, mmWriteId, stmtId, hconf); return; } @@ -4170,14 +4114,14 @@ public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Con } Utilities.LOG14535.info("Looking for files in: " + specPath); - ValidWriteIds.IdPathFilter filter = new ValidWriteIds.IdPathFilter(mmWriteId, true); + JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(mmWriteId, stmtId, true); if (isMmCtas && !fs.exists(specPath)) { // TODO: do we also need to do this when creating an empty partition from select? Utilities.LOG14535.info("Creating table directory for CTAS with no output at " + specPath); FileUtils.mkdir(fs, specPath, hconf); } Path[] files = getMmDirectoryCandidates( - fs, specPath, dpLevels, lbLevels, filter, mmWriteId, hconf); + fs, specPath, dpLevels, lbLevels, filter, mmWriteId, stmtId, hconf); ArrayList mmDirectories = new ArrayList<>(); if (files != null) { for (Path path : files) { @@ -4233,7 +4177,7 @@ public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Con finalResults[i] = new PathOnlyFileStatus(mmDirectories.get(i)); } List emptyBuckets = Utilities.removeTempOrDuplicateFiles( - fs, finalResults, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, mmWriteId); + fs, finalResults, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, mmWriteId, stmtId); // create empty buckets if necessary if (emptyBuckets.size() > 0) { assert mbc != null; @@ -4284,7 +4228,7 @@ private static void deleteUncommitedFile(Path childPath, FileSystem fs) * if the entire directory is valid (has no uncommitted/temporary files). */ public static List getValidMmDirectoriesFromTableOrPart(Path path, Configuration conf, - ValidWriteIds ids, int lbLevels) throws IOException { + ValidTxnList validTxnList, int lbLevels) throws IOException { Utilities.LOG14535.info("Looking for valid MM paths under " + path); // NULL means this directory is entirely valid. List result = null; @@ -4294,8 +4238,8 @@ private static void deleteUncommitedFile(Path childPath, FileSystem fs) for (int i = 0; i < children.length; ++i) { FileStatus file = children[i]; Path childPath = file.getPath(); - Long writeId = ValidWriteIds.extractWriteId(childPath); - if (!file.isDirectory() || writeId == null || !ids.isValid(writeId)) { + Long writeId = JavaUtils.extractWriteId(childPath); + if (!file.isDirectory() || writeId == null || !validTxnList.isTxnValid(writeId)) { Utilities.LOG14535.info("Skipping path " + childPath); if (result == null) { result = new ArrayList<>(children.length - 1); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 740488c..39509cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -278,7 +278,7 @@ static long parseBase(Path path) { // INSERT_ONLY is a special operation which we only support INSERT operations, no UPDATE/DELETE public enum Operation { - NOT_ACID, INSERT, UPDATE, DELETE, INSERT_ONLY + NOT_ACID, INSERT, UPDATE, DELETE } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index c697407..0996708 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -20,11 +20,9 @@ import java.io.DataInput; import java.io.DataOutput; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; @@ -34,8 +32,11 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.Map.Entry; -import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StringInternUtils; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,8 +45,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; @@ -424,12 +423,11 @@ protected void init(JobConf job) { */ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, JobConf conf, InputFormat inputFormat, Class inputFormatClass, int splits, - TableDesc table, Map writeIdMap, List result) + TableDesc table, List result) throws IOException { - ValidWriteIds writeIds = extractWriteIds(writeIdMap, conf, table.getTableName()); - if (writeIds != null) { - Utilities.LOG14535.info("Observing " + table.getTableName() + ": " + writeIds); - } + String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); + ValidTxnList validTxnList = txnString == null ? new ValidReadTxnList() : + new ValidReadTxnList(txnString); Utilities.copyTablePropertiesToConf(table, conf); @@ -437,7 +435,7 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job pushFilters(conf, tableScan); } - Path[] finalDirs = processPathsForMmRead(dirs, conf, writeIds); + Path[] finalDirs = processPathsForMmRead(dirs, conf, validTxnList); if (finalDirs == null) { return; // No valid inputs. } @@ -462,13 +460,13 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job } public static Path[] processPathsForMmRead(List dirs, JobConf conf, - ValidWriteIds writeIds) throws IOException { - if (writeIds == null) { + ValidTxnList validTxnList) throws IOException { + if (validTxnList == null) { return dirs.toArray(new Path[dirs.size()]); } else { List finalPaths = new ArrayList<>(dirs.size()); for (Path dir : dirs) { - processForWriteIds(dir, conf, writeIds, finalPaths); + processForWriteIds(dir, conf, validTxnList, finalPaths); } if (finalPaths.isEmpty()) { LOG.warn("No valid inputs found in " + dirs); @@ -479,7 +477,7 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job } private static void processForWriteIds(Path dir, JobConf conf, - ValidWriteIds writeIds, List finalPaths) throws IOException { + ValidTxnList validTxnList, List finalPaths) throws IOException { FileSystem fs = dir.getFileSystem(conf); Utilities.LOG14535.warn("Checking " + dir + " (root) for inputs"); // Ignore nullscan-optimized paths. @@ -490,17 +488,17 @@ private static void processForWriteIds(Path dir, JobConf conf, FileStatus[] files = fs.listStatus(dir); // TODO: batch? LinkedList subdirs = new LinkedList<>(); for (FileStatus file : files) { - handleNonMmDirChild(file, writeIds, subdirs, finalPaths); + handleNonMmDirChild(file, validTxnList, subdirs, finalPaths); } while (!subdirs.isEmpty()) { Path subdir = subdirs.poll(); for (FileStatus file : fs.listStatus(subdir)) { - handleNonMmDirChild(file, writeIds, subdirs, finalPaths); + handleNonMmDirChild(file, validTxnList, subdirs, finalPaths); } } } - private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, + private static void handleNonMmDirChild(FileStatus file, ValidTxnList validTxnList, LinkedList subdirs, List finalPaths) { Path path = file.getPath(); Utilities.LOG14535.warn("Checking " + path + " for inputs"); @@ -508,12 +506,12 @@ private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, Utilities.LOG14535.warn("Ignoring a file not in MM directory " + path); return; } - Long writeId = ValidWriteIds.extractWriteId(path); + Long writeId = JavaUtils.extractWriteId(path); if (writeId == null) { subdirs.add(path); return; } - if (!writeIds.isValid(writeId)) { + if (!validTxnList.isTxnValid(writeId)) { Utilities.LOG14535.warn("Ignoring an uncommitted directory " + path); return; } @@ -565,7 +563,6 @@ private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, StringBuilder readColumnNamesBuffer = new StringBuilder(newjob. get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "")); // for each dir, get the InputFormat, and do getSplits. - Map writeIdMap = new HashMap<>(); for (Path dir : dirs) { PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir); Class inputFormatClass = part.getInputFileFormatClass(); @@ -616,7 +613,7 @@ private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, addSplitsForGroup(currentDirs, currentTableScan, newjob, getInputFormatFromCache(currentInputFormatClass, job), currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length), - currentTable, writeIdMap, result); + currentTable, result); } currentDirs.clear(); @@ -638,7 +635,7 @@ private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, addSplitsForGroup(currentDirs, currentTableScan, newjob, getInputFormatFromCache(currentInputFormatClass, job), currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length), - currentTable, writeIdMap, result); + currentTable, result); } Utilities.clearWorkMapForConf(job); @@ -649,19 +646,6 @@ private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, return result.toArray(new HiveInputSplit[result.size()]); } - public static ValidWriteIds extractWriteIds(Map writeIdMap, - JobConf newjob, String tableName) { - if (StringUtils.isBlank(tableName)) return null; - ValidWriteIds writeIds = writeIdMap.get(tableName); - if (writeIds == null) { - writeIds = ValidWriteIds.createFromConf(newjob, tableName); - writeIdMap.put(tableName, writeIds != null ? writeIds : ValidWriteIds.NO_WRITE_IDS); - } else if (writeIds == ValidWriteIds.NO_WRITE_IDS) { - writeIds = null; - } - return writeIds; - } - private void pushProjection(final JobConf newjob, final StringBuilder readColumnsBuffer, final StringBuilder readColumnNamesBuffer) { String readColIds = readColumnsBuffer.toString(); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index ea87cb4..c06b0e3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -32,7 +32,6 @@ import java.io.PrintStream; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -52,7 +51,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.ConcurrentHashMap; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; @@ -70,9 +68,9 @@ import org.apache.hadoop.hive.common.BlobStorageUtils; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveStatsUtils; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable; import org.apache.hadoop.hive.conf.HiveConf; @@ -104,7 +102,6 @@ import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest; import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse; -import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsResult; import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; import org.apache.hadoop.hive.metastore.api.HiveObjectRef; import org.apache.hadoop.hive.metastore.api.HiveObjectType; @@ -157,7 +154,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -1581,27 +1577,13 @@ public Database getDatabaseCurrent() throws HiveException { public void loadSinglePartition(Path loadPath, String tableName, Map partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcid, - boolean hasFollowingStatsTask, Long mmWriteId, boolean isCommitMmWrite) + boolean hasFollowingStatsTask, Long mmWriteId, int stmtId) throws HiveException { Table tbl = getTable(tableName); boolean isMmTableWrite = (mmWriteId != null); Preconditions.checkState(isMmTableWrite == MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())); loadPartition(loadPath, tbl, partSpec, replace, inheritTableSpecs, - isSkewedStoreAsSubdir, isSrcLocal, isAcid, hasFollowingStatsTask, mmWriteId); - if (isMmTableWrite && isCommitMmWrite) { - // The assumption behind committing here is that this partition is the only one outputted. - commitMmTableWrite(tbl, mmWriteId); - } - } - - - public void commitMmTableWrite(Table tbl, Long mmWriteId) - throws HiveException { - try { - getMSC().finalizeTableWrite(tbl.getDbName(), tbl.getTableName(), mmWriteId, true); - } catch (TException e) { - throw new HiveException(e); - } + isSkewedStoreAsSubdir, isSrcLocal, isAcid, hasFollowingStatsTask, mmWriteId, stmtId); } /** @@ -1627,7 +1609,7 @@ public void commitMmTableWrite(Table tbl, Long mmWriteId) */ public Partition loadPartition(Path loadPath, Table tbl, Map partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, - boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask, Long mmWriteId) + boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask, Long mmWriteId, int stmtId) throws HiveException { Path tblDataLocationPath = tbl.getDataLocation(); try { @@ -1674,12 +1656,12 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par Utilities.LOG14535.info("not moving " + loadPath + " to " + newPartPath + " (MM)"); assert !isAcid; if (areEventsForDmlNeeded(tbl, oldPart)) { - newFiles = listFilesCreatedByQuery(loadPath, mmWriteId); + newFiles = listFilesCreatedByQuery(loadPath, mmWriteId, stmtId); } Utilities.LOG14535.info("maybe deleting stuff from " + oldPartPath + " (new " + newPartPath + ") for replace"); if (replace && oldPartPath != null) { deleteOldPathForReplace(newPartPath, oldPartPath, getConf(), - new ValidWriteIds.IdPathFilter(mmWriteId, false, true), mmWriteId != null, + new JavaUtils.IdPathFilter(mmWriteId, stmtId, false, true), true, tbl.isStoredAsSubDirectories() ? tbl.getSkewedColNames().size() : 0); } } else { @@ -1688,8 +1670,8 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par Path destPath = newPartPath; if (mmWriteId != null) { // We will load into MM directory, and delete from the parent if needed. - destPath = new Path(destPath, ValidWriteIds.getMmFilePrefix(mmWriteId)); - filter = replace ? new ValidWriteIds.IdPathFilter(mmWriteId, false, true) : filter; + destPath = new Path(destPath, AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId)); + filter = replace ? new JavaUtils.IdPathFilter(mmWriteId, stmtId, false, true) : filter; } Utilities.LOG14535.info("moving " + loadPath + " to " + destPath); if (replace || (oldPart == null && !isAcid)) { @@ -1779,9 +1761,9 @@ private boolean areEventsForDmlNeeded(Table tbl, Partition oldPart) { return conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null; } - private List listFilesCreatedByQuery(Path loadPath, long mmWriteId) throws HiveException { + private List listFilesCreatedByQuery(Path loadPath, long mmWriteId, int stmtId) throws HiveException { List newFiles = new ArrayList(); - final String filePrefix = ValidWriteIds.getMmFilePrefix(mmWriteId); + final String filePrefix = AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId); FileStatus[] srcs; FileSystem srcFs; try { @@ -1944,7 +1926,7 @@ private void constructOneLBLocationMap(FileStatus fSta, * @throws HiveException */ private Set getValidPartitionsInPath( - int numDP, int numLB, Path loadPath, Long mmWriteId) throws HiveException { + int numDP, int numLB, Path loadPath, Long mmWriteId, int stmtId) throws HiveException { Set validPartitions = new HashSet(); try { FileSystem fs = loadPath.getFileSystem(conf); @@ -1963,7 +1945,7 @@ private void constructOneLBLocationMap(FileStatus fSta, // The non-MM path only finds new partitions, as it is looking at the temp path. // To produce the same effect, we will find all the partitions affected by this write ID. Path[] leafStatus = Utilities.getMmDirectoryCandidates( - fs, loadPath, numDP, numLB, null, mmWriteId, conf); + fs, loadPath, numDP, numLB, null, mmWriteId, stmtId, conf); for (Path p : leafStatus) { Path dpPath = p.getParent(); // Skip the MM directory that we have found. for (int i = 0; i < numLB; ++i) { @@ -2009,8 +1991,8 @@ private void constructOneLBLocationMap(FileStatus fSta, */ public Map, Partition> loadDynamicPartitions(final Path loadPath, final String tableName, final Map partSpec, final boolean replace, - final int numDP, final int numLB, final boolean isAcid, final long txnId, - final boolean hasFollowingStatsTask, final AcidUtils.Operation operation, final Long mmWriteId) + final int numDP, final int numLB, final boolean isAcid, final long txnId, final int stmtId, + final boolean hasFollowingStatsTask, final AcidUtils.Operation operation) throws HiveException { final Map, Partition> partitionsMap = @@ -2025,7 +2007,7 @@ private void constructOneLBLocationMap(FileStatus fSta, // Get all valid partition paths and existing partitions for them (if any) final Table tbl = getTable(tableName); - final Set validPartitions = getValidPartitionsInPath(numDP, numLB, loadPath, mmWriteId); + final Set validPartitions = getValidPartitionsInPath(numDP, numLB, loadPath, txnId, stmtId); final int partsToLoad = validPartitions.size(); final AtomicInteger partitionsLoaded = new AtomicInteger(0); @@ -2059,7 +2041,7 @@ public Void call() throws Exception { Utilities.LOG14535.info("loadPartition called for DPP from " + partPath + " to " + tbl.getTableName()); Partition newPartition = loadPartition(partPath, tbl, fullPartSpec, replace, true, numLB > 0, - false, isAcid, hasFollowingStatsTask, mmWriteId); + false, isAcid, hasFollowingStatsTask, txnId, stmtId); partitionsMap.put(fullPartSpec, newPartition); if (inPlaceEligible) { @@ -2091,10 +2073,6 @@ public Void call() throws Exception { for (Future future : futures) { future.get(); } - if (mmWriteId != null) { - // Commit after we have processed all the partitions. - commitMmTableWrite(tbl, mmWriteId); - } } catch (InterruptedException | ExecutionException e) { LOG.debug("Cancelling " + futures.size() + " dynamic loading tasks"); //cancel other futures @@ -2145,8 +2123,7 @@ public Void call() throws Exception { */ public void loadTable(Path loadPath, String tableName, boolean replace, boolean isSrcLocal, boolean isSkewedStoreAsSubdir, boolean isAcid, boolean hasFollowingStatsTask, - Long mmWriteId) throws HiveException { - + Long mmWriteId, int stmtId) throws HiveException { List newFiles = null; Table tbl = getTable(tableName); HiveConf sessionConf = SessionState.getSessionConf(); @@ -2159,18 +2136,18 @@ public void loadTable(Path loadPath, String tableName, boolean replace, boolean if (replace) { Path tableDest = tbl.getPath(); deleteOldPathForReplace(tableDest, tableDest, sessionConf, - new ValidWriteIds.IdPathFilter(mmWriteId, false, true), mmWriteId != null, + new JavaUtils.IdPathFilter(mmWriteId, stmtId, false, true), true, tbl.isStoredAsSubDirectories() ? tbl.getSkewedColNames().size() : 0); } - newFiles = listFilesCreatedByQuery(loadPath, mmWriteId); + newFiles = listFilesCreatedByQuery(loadPath, mmWriteId, stmtId); } else { // Either a non-MM query, or a load into MM table from an external source. Path tblPath = tbl.getPath(), destPath = tblPath; PathFilter filter = FileUtils.HIDDEN_FILES_PATH_FILTER; if (mmWriteId != null) { // We will load into MM directory, and delete from the parent if needed. - destPath = new Path(destPath, ValidWriteIds.getMmFilePrefix(mmWriteId)); - filter = replace ? new ValidWriteIds.IdPathFilter(mmWriteId, false, true) : filter; + destPath = new Path(destPath, AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId)); + filter = replace ? new JavaUtils.IdPathFilter(mmWriteId, stmtId, false, true) : filter; } Utilities.LOG14535.info("moving " + loadPath + " to " + tblPath + " (replace = " + replace + ")"); if (replace) { @@ -2216,11 +2193,6 @@ public void loadTable(Path loadPath, String tableName, boolean replace, boolean } catch (InvalidOperationException e) { throw new HiveException(e); } - - if (mmWriteId != null) { - commitMmTableWrite(tbl, mmWriteId); - } - fireInsertEvent(tbl, null, newFiles); } @@ -4267,25 +4239,4 @@ public void addForeignKey(List foreignKeyCols) throw new HiveException(e); } } - - public long getNextTableWriteId(String dbName, String tableName) throws HiveException { - try { - return getMSC().getNextTableWriteId(dbName, tableName); - } catch (Exception e) { - throw new HiveException(e); - } - } - - public ValidWriteIds getValidWriteIdsForTable( - String dbName, String tableName) throws HiveException { - try { - // TODO: decode ID ranges here if we use that optimization - GetValidWriteIdsResult result = getMSC().getValidWriteIds(dbName, tableName); - return new ValidWriteIds(result.getLowWatermarkId(), result.getHighWatermarkId(), - result.isSetAreIdsValid() && result.isAreIdsValid(), - result.isSetIds() ? new HashSet(result.getIds()) : null); - } catch (Exception e) { - throw new HiveException(e); - } - } -}; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 38157a6..d54cc63 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -1641,6 +1641,7 @@ public static MapWork createMergeTask(FileSinkDesc fsInputDesc, Path finalName, fmd = new OrcFileMergeDesc(); } fmd.setMmWriteId(fsInputDesc.getMmWriteId()); + fmd.setStmtId(fsInputDesc.getStatementId()); fmd.setDpCtx(fsInputDesc.getDynPartCtx()); fmd.setOutputPath(finalName); fmd.setHasDynamicPartitions(work.hasDynamicPartitions()); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index f762fee..33961f9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -119,6 +119,9 @@ // whether any ACID table is involved in a query protected boolean acidInQuery; + // Similar to acidFileSinks + protected Set mmFileSinks = new HashSet(); + public static int HIVE_COLUMN_ORDER_ASC = 1; public static int HIVE_COLUMN_ORDER_DESC = 0; public static int HIVE_COLUMN_NULLS_FIRST = 0; @@ -1349,6 +1352,10 @@ public QueryProperties getQueryProperties() { return acidFileSinks; } + public Set getMmFileSinks() { + return mmFileSinks; + } + public boolean hasAcidInQuery() { return acidInQuery; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java index b5820d6..f33252c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java @@ -18,15 +18,8 @@ package org.apache.hadoop.hive.ql.parse; -import org.apache.hadoop.hive.ql.metadata.HiveException; - -import org.apache.hadoop.hive.common.ValidWriteIds; - -import java.util.List; - -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.ql.metadata.HiveException; import java.io.FileNotFoundException; import java.io.IOException; @@ -41,13 +34,17 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ReplCopyTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.metadata.Hive; @@ -212,8 +209,6 @@ public static void prepareExport( int lbLevels = isMmTable && ts.tableHandle.isStoredAsSubDirectories() ? ts.tableHandle.getSkewedColNames().size() : 0; - ValidWriteIds ids = isMmTable ? db.getValidWriteIdsForTable( - ts.tableHandle.getDbName(), ts.tableHandle.getTableName()) : null; if (ts.tableHandle.isPartitioned()) { for (Partition partition : partitions) { Path fromPath = partition.getDataLocation(); @@ -227,7 +222,7 @@ public static void prepareExport( } copyTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toPartPath, conf); } else { - CopyWork cw = createCopyWork(isMmTable, lbLevels, ids, fromPath, toPartPath, conf); + CopyWork cw = createCopyWork(isMmTable, lbLevels, new ValidReadTxnList(), fromPath, toPartPath, conf); copyTask = TaskFactory.get(cw, conf); } rootTasks.add(copyTask); @@ -246,7 +241,7 @@ public static void prepareExport( copyTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toDataPath, conf); } else { // TODO# master merge - did master remove this path or did it never exit? we need it for MM - CopyWork cw = createCopyWork(isMmTable, lbLevels, ids, fromPath, toDataPath, conf); + CopyWork cw = createCopyWork(isMmTable, lbLevels, new ValidReadTxnList(), fromPath, toDataPath, conf); copyTask = TaskFactory.get(cw, conf); } rootTasks.add(copyTask); @@ -258,14 +253,14 @@ public static void prepareExport( } } - private static CopyWork createCopyWork(boolean isMmTable, int lbLevels, ValidWriteIds ids, + private static CopyWork createCopyWork(boolean isMmTable, int lbLevels, ValidTxnList validTxnList, Path fromPath, Path toDataPath, Configuration conf) throws IOException { List validPaths = null; if (isMmTable) { fromPath = fromPath.getFileSystem(conf).makeQualified(fromPath); - validPaths = Utilities.getValidMmDirectoriesFromTableOrPart(fromPath, conf, ids, lbLevels); + validPaths = Utilities.getValidMmDirectoriesFromTableOrPart(fromPath, conf, validTxnList, lbLevels); } - if (validPaths == null) { + if (validPaths == null || validPaths.isEmpty()) { return new CopyWork(fromPath, toDataPath, false); // Not MM, or no need to skip anything. } else { return createCopyWorkForValidPaths(fromPath, toDataPath, validPaths); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index f4fe6ac..c1d4a76 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -34,13 +34,11 @@ import org.antlr.runtime.tree.Tree; import org.apache.commons.lang.ObjectUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TableType; @@ -58,6 +56,7 @@ import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -298,8 +297,9 @@ public static boolean prepareImport( } Long mmWriteId = null; + int stmtId = 0; if (table != null && MetaStoreUtils.isInsertOnlyTable(table.getParameters())) { - mmWriteId = x.getHive().getNextTableWriteId(table.getDbName(), table.getTableName()); + mmWriteId = 0l; //todo it will be replaced with txnId in Driver } else if (table == null && isSourceMm) { // We could import everything as is - directories and IDs, but that won't work with ACID // txn ids in future. So, let's import everything into the new MM directory with ID == 0. @@ -312,12 +312,12 @@ public static boolean prepareImport( createRegularImportTasks( tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, - fromURI, fs, wh, x, mmWriteId, isSourceMm); + fromURI, fs, wh, x, mmWriteId, stmtId, isSourceMm); } else { createReplImportTasks( tblDesc, partitionDescs, isPartSpecSet, replicationSpec, waitOnPrecursor, table, - fromURI, fs, wh, x, mmWriteId, isSourceMm); + fromURI, fs, wh, x, mmWriteId, stmtId, isSourceMm); } return tableExists; } @@ -378,10 +378,10 @@ private static CreateTableDesc getBaseCreateTableDescFromTable(String dbName, private static Task loadTable(URI fromURI, Table table, boolean replace, Path tgtPath, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x, - Long mmWriteId, boolean isSourceMm) { + Long mmWriteId, int stmtId, boolean isSourceMm) { Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME); Path destPath = mmWriteId == null ? x.getCtx().getExternalTmpPath(tgtPath) - : new Path(tgtPath, ValidWriteIds.getMmFilePrefix(mmWriteId)); + : new Path(tgtPath, AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId)); Utilities.LOG14535.info("adding import work for table with source location: " + dataPath + "; table: " + tgtPath + "; copy destination " + destPath + "; mm " + mmWriteId + " (src " + isSourceMm + ") for " + (table == null ? "a new table" : table.getTableName())); @@ -402,6 +402,8 @@ private static CreateTableDesc getBaseCreateTableDescFromTable(String dbName, LoadTableDesc loadTableWork = new LoadTableDesc(destPath, Utilities.getTableDesc(table), new TreeMap(), replace, mmWriteId); + loadTableWork.setMmWriteId(mmWriteId); + loadTableWork.setStmtId(stmtId); MoveWork mv = new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false); Task loadTableTask = TaskFactory.get(mv, x.getConf()); copyTask.addDependentTask(loadTableTask); @@ -457,7 +459,7 @@ private static CreateTableDesc getBaseCreateTableDescFromTable(String dbName, private static Task addSinglePartition(URI fromURI, FileSystem fs, CreateTableDesc tblDesc, Table table, Warehouse wh, AddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec, - EximUtil.SemanticAnalyzerWrapperContext x, Long mmWriteId, boolean isSourceMm, + EximUtil.SemanticAnalyzerWrapperContext x, Long mmWriteId, int stmtId, boolean isSourceMm, Task commitTask) throws MetaException, IOException, HiveException { AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0); @@ -477,7 +479,7 @@ private static CreateTableDesc getBaseCreateTableDescFromTable(String dbName, + " with source location: " + srcLocation); Path tgtLocation = new Path(partSpec.getLocation()); Path destPath = mmWriteId == null ? x.getCtx().getExternalTmpPath(tgtLocation) - : new Path(tgtLocation, ValidWriteIds.getMmFilePrefix(mmWriteId)); + : new Path(tgtLocation, AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId)); Path moveTaskSrc = mmWriteId == null ? destPath : tgtLocation; Utilities.LOG14535.info("adding import work for partition with source location: " + srcLocation + "; target: " + tgtLocation + "; copy dest " + destPath + "; mm " @@ -503,6 +505,8 @@ private static CreateTableDesc getBaseCreateTableDescFromTable(String dbName, x.getOutputs(), addPartitionDesc), x.getConf()); LoadTableDesc loadTableWork = new LoadTableDesc(moveTaskSrc, Utilities.getTableDesc(table), partSpec.getPartSpec(), true, mmWriteId); + loadTableWork.setMmWriteId(mmWriteId); + loadTableWork.setStmtId(stmtId); loadTableWork.setInheritTableSpecs(false); // Do not commit the write ID from each task; need to commit once. // TODO: we should just change the import to use a single MoveTask, like dynparts. @@ -802,21 +806,21 @@ private static String checkParams(Map map1, private static void createRegularImportTasks( CreateTableDesc tblDesc, List partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, Table table, URI fromURI, FileSystem fs, Warehouse wh, - EximUtil.SemanticAnalyzerWrapperContext x, Long mmWriteId, boolean isSourceMm) + EximUtil.SemanticAnalyzerWrapperContext x, Long mmWriteId, int stmtId, boolean isSourceMm) throws HiveException, URISyntaxException, IOException, MetaException { if (table != null) { if (table.isPartitioned()) { x.getLOG().debug("table partitioned"); Task ict = createImportCommitTask( - table.getDbName(), table.getTableName(), mmWriteId, x.getConf()); + table.getDbName(), table.getTableName(), mmWriteId, stmtId, x.getConf()); for (AddPartitionDesc addPartitionDesc : partitionDescs) { Map partSpec = addPartitionDesc.getPartition(0).getPartSpec(); org.apache.hadoop.hive.ql.metadata.Partition ptn = null; if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) { x.getTasks().add(addSinglePartition( - fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, isSourceMm, ict)); + fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, stmtId, isSourceMm, ict)); } else { throw new SemanticException( ErrorMsg.PARTITION_EXISTS.getMsg(partSpecToString(partSpec))); @@ -828,7 +832,7 @@ private static void createRegularImportTasks( Path tgtPath = new Path(table.getDataLocation().toString()); FileSystem tgtFs = FileSystem.get(tgtPath.toUri(), x.getConf()); checkTargetLocationEmpty(tgtFs, tgtPath, replicationSpec, x); - loadTable(fromURI, table, false, tgtPath, replicationSpec, x, mmWriteId, isSourceMm); + loadTable(fromURI, table, false, tgtPath, replicationSpec, x, mmWriteId, stmtId, isSourceMm); } // Set this to read because we can't overwrite any existing partitions x.getOutputs().add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK)); @@ -846,10 +850,10 @@ private static void createRegularImportTasks( if (isPartitioned(tblDesc)) { Task ict = createImportCommitTask( - tblDesc.getDatabaseName(), tblDesc.getTableName(), mmWriteId, x.getConf()); + tblDesc.getDatabaseName(), tblDesc.getTableName(), mmWriteId, stmtId, x.getConf()); for (AddPartitionDesc addPartitionDesc : partitionDescs) { t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, - replicationSpec, x, mmWriteId, isSourceMm, ict)); + replicationSpec, x, mmWriteId, stmtId, isSourceMm, ict)); } } else { x.getLOG().debug("adding dependent CopyWork/MoveWork for table"); @@ -866,7 +870,7 @@ private static void createRegularImportTasks( } FileSystem tgtFs = FileSystem.get(tablePath.toUri(), x.getConf()); checkTargetLocationEmpty(tgtFs, tablePath, replicationSpec,x); - t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x, mmWriteId, isSourceMm)); + t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x, mmWriteId, stmtId, isSourceMm)); } } x.getTasks().add(t); @@ -874,10 +878,10 @@ private static void createRegularImportTasks( } private static Task createImportCommitTask( - String dbName, String tblName, Long mmWriteId, HiveConf conf) { + String dbName, String tblName, Long mmWriteId, int stmtId, HiveConf conf) { @SuppressWarnings("unchecked") Task ict = (mmWriteId == null) ? null : TaskFactory.get( - new ImportCommitWork(dbName, tblName, mmWriteId), conf); + new ImportCommitWork(dbName, tblName, mmWriteId, stmtId), conf); return ict; } @@ -889,7 +893,7 @@ private static void createReplImportTasks( List partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, boolean waitOnPrecursor, Table table, URI fromURI, FileSystem fs, Warehouse wh, - EximUtil.SemanticAnalyzerWrapperContext x, Long mmWriteId, boolean isSourceMm) + EximUtil.SemanticAnalyzerWrapperContext x, Long mmWriteId, int stmtId, boolean isSourceMm) throws HiveException, URISyntaxException, IOException, MetaException { Task dr = null; @@ -958,15 +962,15 @@ private static void createReplImportTasks( if (!replicationSpec.isMetadataOnly()) { if (isPartitioned(tblDesc)) { Task ict = createImportCommitTask( - tblDesc.getDatabaseName(), tblDesc.getTableName(), mmWriteId, x.getConf()); + tblDesc.getDatabaseName(), tblDesc.getTableName(), mmWriteId, stmtId, x.getConf()); for (AddPartitionDesc addPartitionDesc : partitionDescs) { addPartitionDesc.setReplicationSpec(replicationSpec); t.addDependentTask( - addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, isSourceMm, ict)); + addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, stmtId, isSourceMm, ict)); } } else { x.getLOG().debug("adding dependent CopyWork/MoveWork for table"); - t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x, mmWriteId, isSourceMm)); + t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x, mmWriteId, stmtId, isSourceMm)); } } if (dr == null){ @@ -986,11 +990,11 @@ private static void createReplImportTasks( Map partSpec = addPartitionDesc.getPartition(0).getPartSpec(); org.apache.hadoop.hive.ql.metadata.Partition ptn = null; Task ict = replicationSpec.isMetadataOnly() ? null : createImportCommitTask( - tblDesc.getDatabaseName(), tblDesc.getTableName(), mmWriteId, x.getConf()); + tblDesc.getDatabaseName(), tblDesc.getTableName(), mmWriteId, stmtId, x.getConf()); if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) { if (!replicationSpec.isMetadataOnly()){ x.getTasks().add(addSinglePartition( - fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, isSourceMm, ict)); + fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, stmtId, isSourceMm, ict)); } } else { // If replicating, then the partition already existing means we need to replace, maybe, if @@ -998,7 +1002,7 @@ private static void createReplImportTasks( if (replicationSpec.allowReplacementInto(ptn)){ if (!replicationSpec.isMetadataOnly()){ x.getTasks().add(addSinglePartition( - fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, isSourceMm, ict)); + fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, stmtId, isSourceMm, ict)); } else { x.getTasks().add(alterSinglePartition( fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, ptn, x)); @@ -1027,7 +1031,7 @@ private static void createReplImportTasks( if (!replicationSpec.isMetadataOnly()) { // repl-imports are replace-into unless the event is insert-into loadTable(fromURI, table, !replicationSpec.isInsert(), new Path(fromURI), - replicationSpec, x, mmWriteId, isSourceMm); + replicationSpec, x, mmWriteId, stmtId, isSourceMm); } else { x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec)); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java index d3b4da1..f31775e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java @@ -20,7 +20,6 @@ import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.ql.Driver; @@ -44,7 +43,6 @@ public class IndexUpdater { private List loadTableWork; private HiveConf conf; - private Configuration parentConf; // Assumes one instance of this + single-threaded compilation for each query. private Hive hive; private List> tasks; @@ -54,7 +52,6 @@ public IndexUpdater(List loadTableWork, Set inputs, Configuration conf) { this.loadTableWork = loadTableWork; this.inputs = inputs; - this.parentConf = conf; this.conf = new HiveConf(conf, IndexUpdater.class); this.tasks = new LinkedList>(); } @@ -63,7 +60,6 @@ public IndexUpdater(LoadTableDesc loadTableWork, Set inputs, Configuration conf) { this.loadTableWork = new LinkedList(); this.loadTableWork.add(loadTableWork); - this.parentConf = conf; this.conf = new HiveConf(conf, IndexUpdater.class); this.tasks = new LinkedList>(); this.inputs = inputs; @@ -79,15 +75,15 @@ public IndexUpdater(LoadTableDesc loadTableWork, Set inputs, Map partSpec = ltd.getPartitionSpec(); if (partSpec == null || partSpec.size() == 0) { //unpartitioned table, update whole index - doIndexUpdate(tblIndexes, ltd.getMmWriteId()); + doIndexUpdate(tblIndexes); } else { - doIndexUpdate(tblIndexes, partSpec, ltd.getMmWriteId()); + doIndexUpdate(tblIndexes, partSpec); } } return tasks; } - private void doIndexUpdate(List tblIndexes, Long mmWriteId) throws HiveException { + private void doIndexUpdate(List tblIndexes) throws HiveException { for (Index idx : tblIndexes) { StringBuilder sb = new StringBuilder(); sb.append("ALTER INDEX "); @@ -96,21 +92,20 @@ private void doIndexUpdate(List tblIndexes, Long mmWriteId) throws HiveEx sb.append(idx.getDbName()).append('.'); sb.append(idx.getOrigTableName()); sb.append(" REBUILD"); - compileRebuild(sb.toString(), idx, mmWriteId); + compileRebuild(sb.toString()); } } private void doIndexUpdate(List tblIndexes, Map - partSpec, Long mmWriteId) throws HiveException { + partSpec) throws HiveException { for (Index index : tblIndexes) { if (containsPartition(index, partSpec)) { - doIndexUpdate(index, partSpec, mmWriteId); + doIndexUpdate(index, partSpec); } } } - private void doIndexUpdate(Index index, Map partSpec, Long mmWriteId) - throws HiveException { + private void doIndexUpdate(Index index, Map partSpec) { StringBuilder ps = new StringBuilder(); boolean first = true; ps.append("("); @@ -134,18 +129,12 @@ private void doIndexUpdate(Index index, Map partSpec, Long mmWri sb.append(" PARTITION "); sb.append(ps.toString()); sb.append(" REBUILD"); - compileRebuild(sb.toString(), index, mmWriteId); + compileRebuild(sb.toString()); } - private void compileRebuild(String query, Index index, Long mmWriteId) - throws HiveException { + private void compileRebuild(String query) { Driver driver = new Driver(this.conf); driver.compile(query, false); - if (mmWriteId != null) { - // TODO: this is rather fragile - ValidWriteIds.addCurrentToConf( - parentConf, index.getDbName(), index.getOrigTableName(), mmWriteId); - } tasks.addAll(driver.getPlan().getRootTasks()); inputs.addAll(driver.getPlan().getInputs()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 04e8cac..94cf7a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -272,18 +272,17 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } Long mmWriteId = null; + int stmtId = 0; Table tbl = ts.tableHandle; if (MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())) { - try { - mmWriteId = db.getNextTableWriteId(tbl.getDbName(), tbl.getTableName()); - } catch (HiveException e) { - throw new SemanticException(e); - } + mmWriteId = 0l; //todo to be replaced with txnId in Driver } LoadTableDesc loadTableWork; loadTableWork = new LoadTableDesc(new Path(fromURI), Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite, mmWriteId); + loadTableWork.setMmWriteId(mmWriteId); + loadTableWork.setStmtId(stmtId); if (preservePartitionSpecs){ // Note : preservePartitionSpecs=true implies inheritTableSpecs=false but // but preservePartitionSpecs=false(default) here is not sufficient enough diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index b5f79c8..715a363 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -125,6 +125,7 @@ private ColumnAccessInfo columnAccessInfo; private boolean needViewColumnAuthorization; private Set acidFileSinks = Collections.emptySet(); + private Set mmFileSinks = Collections.emptySet(); // Map to store mapping between reduce sink Operator and TS Operator for semijoin private Map rsOpToTsOpMap = @@ -192,7 +193,7 @@ public ParseContext( List reduceSinkOperatorsAddedByEnforceBucketingSorting, AnalyzeRewriteContext analyzeRewrite, CreateTableDesc createTableDesc, CreateViewDesc createViewDesc, QueryProperties queryProperties, - Map viewProjectToTableSchema, Set acidFileSinks) { + Map viewProjectToTableSchema, Set acidFileSinks, Set mmFileSinks) { this.queryState = queryState; this.conf = queryState.getConf(); this.opToPartPruner = opToPartPruner; @@ -235,6 +236,10 @@ public ParseContext( this.acidFileSinks = new HashSet<>(); this.acidFileSinks.addAll(acidFileSinks); } + if (mmFileSinks != null && !mmFileSinks.isEmpty()) { + this.mmFileSinks = new HashSet<>(); + this.mmFileSinks.addAll(mmFileSinks); + } } public Set getAcidSinks() { return acidFileSinks; @@ -242,6 +247,11 @@ public ParseContext( public boolean hasAcidWrite() { return !acidFileSinks.isEmpty(); } + + public Set getMmFileSinks() { + return mmFileSinks; + } + /** * @return the context */ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 34f2ac4..05808de 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -75,7 +75,6 @@ import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SQLForeignKey; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -477,7 +476,7 @@ public ParseContext getParseContext() { listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, - analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks); + analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks, mmFileSinks); } public CompilationOpContext getOpContext() { @@ -6618,7 +6617,7 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, } input = genReduceSinkPlan(input, partnCols, sortCols, order.toString(), nullOrder.toString(), maxReducers, (AcidUtils.isFullAcidTable(dest_tab) ? - getAcidType(dest_tab, table_desc.getOutputFileFormatClass(), dest) : AcidUtils.Operation.NOT_ACID)); + getAcidType(table_desc.getOutputFileFormatClass(), dest) : AcidUtils.Operation.NOT_ACID)); reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)input.getParentOperators().get(0)); ctx.setMultiFileSpray(multiFileSpray); ctx.setNumFiles(numFiles); @@ -6751,13 +6750,14 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) if (!isNonNativeTable) { AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID; if (destTableIsAcid) { - acidOp = getAcidType(dest_tab, table_desc.getOutputFileFormatClass(), dest); + acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); checkAcidConstraints(qb, table_desc, dest_tab, acidOp); } - try { - mmWriteId = getMmWriteId(dest_tab, isMmTable); - } catch (HiveException e) { - throw new SemanticException(e); + if (MetaStoreUtils.isInsertOnlyTable(table_desc.getProperties())) { + acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); + } + if (isMmTable) { + mmWriteId = 0l; //todo to be replaced by txnId in Driver } boolean isReplace = !qb.getParseInfo().isInsertIntoTable( dest_tab.getDbName(), dest_tab.getTableName()); @@ -6814,14 +6814,14 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) dest_part.isStoredAsSubDirectories(), conf); AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID; if (destTableIsAcid) { - acidOp = getAcidType(dest_tab, table_desc.getOutputFileFormatClass(), dest); + acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); checkAcidConstraints(qb, table_desc, dest_tab, acidOp); } - try { - mmWriteId = getMmWriteId(dest_tab, isMmTable); - } catch (HiveException e) { - // How is this a semantic exception? Stupid Java and signatures. - throw new SemanticException(e); + if (MetaStoreUtils.isInsertOnlyTable(dest_part.getTable().getParameters())) { + acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); + } + if (isMmTable) { + mmWriteId = 0l; //todo to be replaced by txnId in Driver } ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp, mmWriteId); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -6857,9 +6857,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) destTableIsMaterialization = tblDesc.isMaterialization(); if (!destTableIsTemporary && MetaStoreUtils.isInsertOnlyTable(tblDesc.getTblProps(), true)) { isMmTable = isMmCtas = true; - // TODO# this should really get current ACID txn; assuming ACID works correctly the txn - // should have been opened to create the ACID table. For now use the first ID. - mmWriteId = 0l; + mmWriteId = 0l; //todo to be replaced by txnId in Driver tblDesc.setInitialMmWriteId(mmWriteId); } } else if (viewDesc != null) { @@ -7094,12 +7092,6 @@ private ColsAndTypes deriveFileSinkColTypes( return result; } - private static Long getMmWriteId(Table tbl, boolean isMmTable) throws HiveException { - if (!isMmTable) return null; - // Get the next write ID for this table. We will prefix files with this write ID. - return Hive.get().getNextTableWriteId(tbl.getDbName(), tbl.getTableName()); - } - private FileSinkDesc createFileSinkDesc(String dest, TableDesc table_desc, Partition dest_part, Path dest_path, int currentTableId, boolean destTableIsAcid, boolean destTableIsTemporary, @@ -7119,7 +7111,12 @@ private FileSinkDesc createFileSinkDesc(String dest, TableDesc table_desc, MetaStoreUtils.isInsertOnlyTable(dest_part.getTable().getParameters())) || (table_desc != null && MetaStoreUtils.isInsertOnlyTable(table_desc.getProperties())); - if (destTableIsAcid && !isDestInsertOnly) { + if (isDestInsertOnly) { + fileSinkDesc.setWriteType(Operation.NOT_ACID); + mmFileSinks.add(fileSinkDesc); + } + + if (destTableIsAcid) { AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : (deleting(dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT); fileSinkDesc.setWriteType(wt); @@ -7331,7 +7328,7 @@ String fixCtasColumnName(String colName) { private void checkAcidConstraints(QB qb, TableDesc tableDesc, Table table, AcidUtils.Operation acidOp) throws SemanticException { String tableName = tableDesc.getTableName(); - if (!qb.getParseInfo().isInsertIntoTable(tableName) && !Operation.INSERT_ONLY.equals(acidOp)) { + if (!qb.getParseInfo().isInsertIntoTable(tableName)) { LOG.debug("Couldn't find table " + tableName + " in insertIntoTable"); throw new SemanticException(ErrorMsg.NO_INSERT_OVERWRITE_WITH_ACID.getMsg()); } @@ -7346,7 +7343,7 @@ These props are now enabled elsewhere (see commit diffs). It would be better in */ conf.set(AcidUtils.CONF_ACID_KEY, "true"); - if (!Operation.NOT_ACID.equals(acidOp) && !Operation.INSERT_ONLY.equals(acidOp)) { + if (!Operation.NOT_ACID.equals(acidOp)) { if (table.getNumBuckets() < 1) { throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, table.getTableName()); } @@ -11151,7 +11148,7 @@ void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticExce listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, - analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks); + analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks, mmFileSinks); // 5. Take care of view creation if (createVwDesc != null) { @@ -13404,12 +13401,9 @@ private boolean isAcidOutputFormat(Class of) { AcidUtils.Operation.INSERT); } - private AcidUtils.Operation getAcidType( - Table table, Class of, String dest) { + private AcidUtils.Operation getAcidType(Class of, String dest) { if (SessionState.get() == null || !SessionState.get().getTxnMgr().supportsAcid()) { return AcidUtils.Operation.NOT_ACID; - } else if (MetaStoreUtils.isInsertOnlyTable(table.getParameters())) { - return AcidUtils.Operation.INSERT_ONLY; } else if (isAcidOutputFormat(of)) { return getAcidType(dest); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 91c343c..ed19df1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -20,23 +20,18 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; -import java.util.LinkedList; import java.util.List; -import java.util.Queue; import java.util.Set; -import java.util.Stack; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.HiveStatsUtils; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.Context; @@ -44,7 +39,6 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -56,7 +50,6 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; -import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; @@ -320,6 +313,7 @@ private void setLoadFileLocation( final ParseContext pCtx, LoadFileDesc lfd) throws SemanticException { // CTAS; make the movetask's destination directory the table's destination. Long mmWriteIdForCtas = null; + int stmtId = 0; // CTAS cannot be part of multi-txn stmt FileSinkDesc dataSinkForCtas = null; String loc = null; if (pCtx.getQueryProperties().isCTAS()) { @@ -333,7 +327,7 @@ private void setLoadFileLocation( Path location = (loc == null) ? getDefaultCtasLocation(pCtx) : new Path(loc); if (mmWriteIdForCtas != null) { dataSinkForCtas.setDirName(location); - location = new Path(location, ValidWriteIds.getMmFilePrefix(mmWriteIdForCtas)); + location = new Path(location, AcidUtils.deltaSubdir(mmWriteIdForCtas, mmWriteIdForCtas, stmtId)); lfd.setSourcePath(location); Utilities.LOG14535.info("Setting MM CTAS to " + location); } @@ -552,7 +546,7 @@ public ParseContext getParseContext(ParseContext pCtx, List sourceDirs, final List targetDi return srcDirs; } + public void setSourceDirs(List srcs) { + this.srcDirs = srcs; + } + + public void setTargetDirs(final List targetDir) { + this.targetDirs = targetDir; + } + @Explain(displayName = "hdfs directory") public boolean getIsDfsDir() { return isDfsDir; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java index 762d946..1ff1edd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java @@ -43,6 +43,7 @@ // insert, update, or delete. private AcidUtils.Operation writeType; private Long mmWriteId; + private int stmtId; // TODO: the below seems like they should just be combined into partitionDesc private org.apache.hadoop.hive.ql.plan.TableDesc table; @@ -207,6 +208,18 @@ public Long getMmWriteId() { return mmWriteId; } + public void setMmWriteId(Long mmWriteId) { + this.mmWriteId = mmWriteId; + } + + public int getStmtId() { + return stmtId; + } + + public void setStmtId(int stmtId) { + this.stmtId = stmtId; + } + public void setIntermediateInMmWrite(boolean b) { this.commitMmWriteId = !b; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java index 4a13e1f..55b9da9 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java @@ -141,7 +141,7 @@ db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true); db.createTable(src, cols, null, TextInputFormat.class, HiveIgnoreKeyTextOutputFormat.class); - db.loadTable(hadoopDataFile[i], src, false, true, false, false, false, null); + db.loadTable(hadoopDataFile[i], src, false, true, false, false, false, null, 0); i++; } diff --git ql/src/test/queries/clientpositive/mm_all.q ql/src/test/queries/clientpositive/mm_all.q index a6a7c8f..32942a9 100644 --- ql/src/test/queries/clientpositive/mm_all.q +++ ql/src/test/queries/clientpositive/mm_all.q @@ -33,7 +33,7 @@ drop table part_mm; drop table simple_mm; create table simple_mm(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); insert into table simple_mm select key from intermediate; -insert overwrite table simple_mm select key from intermediate; +-- insert overwrite table simple_mm select key from intermediate; select * from simple_mm order by key; insert into table simple_mm select key from intermediate; select * from simple_mm order by key; @@ -193,40 +193,40 @@ set hive.merge.mapredfiles=false; -- TODO: need to include merge+union+DP, but it's broken for now -drop table ctas0_mm; -create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate; -select * from ctas0_mm; -drop table ctas0_mm; +--drop table ctas0_mm; +--create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate; +--select * from ctas0_mm; +--drop table ctas0_mm; -drop table ctas1_mm; -create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as - select * from intermediate union all select * from intermediate; -select * from ctas1_mm; -drop table ctas1_mm; +--drop table ctas1_mm; +--create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as +-- select * from intermediate union all select * from intermediate; +--select * from ctas1_mm; +--drop table ctas1_mm; drop table iow0_mm; create table iow0_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); -insert overwrite table iow0_mm select key from intermediate; +--insert overwrite table iow0_mm select key from intermediate; insert into table iow0_mm select key + 1 from intermediate; select * from iow0_mm order by key; -insert overwrite table iow0_mm select key + 2 from intermediate; +--insert overwrite table iow0_mm select key + 2 from intermediate; select * from iow0_mm order by key; drop table iow0_mm; drop table iow1_mm; create table iow1_mm(key int) partitioned by (key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); -insert overwrite table iow1_mm partition (key2) +--insert overwrite table iow1_mm partition (key2) select key as k1, key from intermediate union all select key as k1, key from intermediate; insert into table iow1_mm partition (key2) select key + 1 as k1, key from intermediate union all select key as k1, key from intermediate; select * from iow1_mm order by key, key2; -insert overwrite table iow1_mm partition (key2) +--insert overwrite table iow1_mm partition (key2) select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate; select * from iow1_mm order by key, key2; -insert overwrite table iow1_mm partition (key2) +--insert overwrite table iow1_mm partition (key2) select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate; select * from iow1_mm order by key, key2; drop table iow1_mm; @@ -369,82 +369,82 @@ drop table multi0_2_mm; create table multi0_1_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); create table multi0_2_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); -from intermediate -insert overwrite table multi0_1_mm select key, p -insert overwrite table multi0_2_mm select p, key; - -select * from multi0_1_mm order by key, key2; -select * from multi0_2_mm order by key, key2; - -set hive.merge.mapredfiles=true; -set hive.merge.sparkfiles=true; -set hive.merge.tezfiles=true; - -from intermediate -insert into table multi0_1_mm select p, key -insert overwrite table multi0_2_mm select key, p; -select * from multi0_1_mm order by key, key2; -select * from multi0_2_mm order by key, key2; - -set hive.merge.mapredfiles=false; -set hive.merge.sparkfiles=false; -set hive.merge.tezfiles=false; - -drop table multi0_1_mm; -drop table multi0_2_mm; - - -drop table multi1_mm; -create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); -from intermediate -insert into table multi1_mm partition(p=1) select p, key -insert into table multi1_mm partition(p=2) select key, p; -select * from multi1_mm order by key, key2, p; -from intermediate -insert into table multi1_mm partition(p=2) select p, key -insert overwrite table multi1_mm partition(p=1) select key, p; -select * from multi1_mm order by key, key2, p; - -from intermediate -insert into table multi1_mm partition(p) select p, key, p -insert into table multi1_mm partition(p=1) select key, p; -select key, key2, p from multi1_mm order by key, key2, p; - -from intermediate -insert into table multi1_mm partition(p) select p, key, 1 -insert into table multi1_mm partition(p=1) select key, p; -select key, key2, p from multi1_mm order by key, key2, p; -drop table multi1_mm; - - - - -set datanucleus.cache.collections=false; -set hive.stats.autogather=true; - -drop table stats_mm; -create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); -insert overwrite table stats_mm select key from intermediate; -desc formatted stats_mm; - -insert into table stats_mm select key from intermediate; -desc formatted stats_mm; -drop table stats_mm; - -drop table stats2_mm; -create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src; -desc formatted stats2_mm; -drop table stats2_mm; - - -set hive.optimize.skewjoin=true; -set hive.skewjoin.key=2; -set hive.optimize.metadataonly=false; - -CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only"); -FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value; -select count(distinct key) from skewjoin_mm; -drop table skewjoin_mm; +--from intermediate +--insert overwrite table multi0_1_mm select key, p +--insert overwrite table multi0_2_mm select p, key; +-- +--select * from multi0_1_mm order by key, key2; +--select * from multi0_2_mm order by key, key2; +-- +--set hive.merge.mapredfiles=true; +--set hive.merge.sparkfiles=true; +--set hive.merge.tezfiles=true; +-- +--from intermediate +--insert into table multi0_1_mm select p, key +--insert overwrite table multi0_2_mm select key, p; +--select * from multi0_1_mm order by key, key2; +--select * from multi0_2_mm order by key, key2; +-- +--set hive.merge.mapredfiles=false; +--set hive.merge.sparkfiles=false; +--set hive.merge.tezfiles=false; +-- +--drop table multi0_1_mm; +--drop table multi0_2_mm; +-- +-- +--drop table multi1_mm; +--create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +--from intermediate +--insert into table multi1_mm partition(p=1) select p, key +--insert into table multi1_mm partition(p=2) select key, p; +--select * from multi1_mm order by key, key2, p; +--from intermediate +--insert into table multi1_mm partition(p=2) select p, key +--insert overwrite table multi1_mm partition(p=1) select key, p; +--select * from multi1_mm order by key, key2, p; +-- +--from intermediate +--insert into table multi1_mm partition(p) select p, key, p +--insert into table multi1_mm partition(p=1) select key, p; +--select key, key2, p from multi1_mm order by key, key2, p; +-- +--from intermediate +--insert into table multi1_mm partition(p) select p, key, 1 +--insert into table multi1_mm partition(p=1) select key, p; +--select key, key2, p from multi1_mm order by key, key2, p; +--drop table multi1_mm; +-- +-- +-- +-- +--set datanucleus.cache.collections=false; +--set hive.stats.autogather=true; +-- +--drop table stats_mm; +--create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +--insert overwrite table stats_mm select key from intermediate; +--desc formatted stats_mm; +-- +--insert into table stats_mm select key from intermediate; +--desc formatted stats_mm; +--drop table stats_mm; +-- +--drop table stats2_mm; +--create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src; +--desc formatted stats2_mm; +--drop table stats2_mm; +-- +-- +--set hive.optimize.skewjoin=true; +--set hive.skewjoin.key=2; +--set hive.optimize.metadataonly=false; +-- +--CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +--FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value; +--select count(distinct key) from skewjoin_mm; +--drop table skewjoin_mm; set hive.optimize.skewjoin=false; diff --git ql/src/test/queries/clientpositive/mm_conversions.q ql/src/test/queries/clientpositive/mm_conversions.q index 2dc7a74..3dc5a1f 100644 --- ql/src/test/queries/clientpositive/mm_conversions.q +++ ql/src/test/queries/clientpositive/mm_conversions.q @@ -4,7 +4,8 @@ set hive.fetch.task.conversion=none; set tez.grouping.min-size=1; set tez.grouping.max-size=2; set hive.exec.dynamic.partition.mode=nonstrict; - +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -- Force multiple writers when reading drop table intermediate; diff --git ql/src/test/queries/clientpositive/mm_insertonly_acid.q ql/src/test/queries/clientpositive/mm_insertonly_acid.q deleted file mode 100644 index 7da99c5..0000000 --- ql/src/test/queries/clientpositive/mm_insertonly_acid.q +++ /dev/null @@ -1,16 +0,0 @@ -set hive.mapred.mode=nonstrict; -set hive.explain.user=false; -set hive.fetch.task.conversion=none; -set hive.exec.dynamic.partition.mode=nonstrict; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; - - -drop table qtr_acid; -create table qtr_acid (key int) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only"); -insert into table qtr_acid partition(p='123') select distinct key from src where key > 0 order by key asc limit 10; -insert into table qtr_acid partition(p='456') select distinct key from src where key > 0 order by key desc limit 10; -explain -select * from qtr_acid order by key; -select * from qtr_acid order by key; -drop table qtr_acid; \ No newline at end of file diff --git ql/src/test/results/clientpositive/mm_all.q.out ql/src/test/results/clientpositive/mm_all.q.out index 116f2b1..fc792c2 100644 --- ql/src/test/results/clientpositive/mm_all.q.out +++ ql/src/test/results/clientpositive/mm_all.q.out @@ -288,21 +288,6 @@ POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@simple_mm POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: insert overwrite table simple_mm select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@simple_mm -POSTHOOK: query: insert overwrite table simple_mm select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@simple_mm -POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from simple_mm order by key PREHOOK: type: QUERY PREHOOK: Input: default@simple_mm @@ -1236,102 +1221,6 @@ POSTHOOK: query: drop table merge1_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@merge1_mm POSTHOOK: Output: default@merge1_mm -PREHOOK: query: drop table ctas0_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ctas0_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: database:default -PREHOOK: Output: default@ctas0_mm -POSTHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: database:default -POSTHOOK: Output: default@ctas0_mm -POSTHOOK: Lineage: ctas0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: ctas0_mm.p SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -PREHOOK: query: select * from ctas0_mm -PREHOOK: type: QUERY -PREHOOK: Input: default@ctas0_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from ctas0_mm -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ctas0_mm -#### A masked pattern was here #### -98 455 -97 455 -0 456 -10 456 -100 457 -103 457 -PREHOOK: query: drop table ctas0_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ctas0_mm -PREHOOK: Output: default@ctas0_mm -POSTHOOK: query: drop table ctas0_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ctas0_mm -POSTHOOK: Output: default@ctas0_mm -PREHOOK: query: drop table ctas1_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ctas1_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as - select * from intermediate union all select * from intermediate -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: database:default -PREHOOK: Output: default@ctas1_mm -POSTHOOK: query: create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as - select * from intermediate union all select * from intermediate -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: database:default -POSTHOOK: Output: default@ctas1_mm -POSTHOOK: Lineage: ctas1_mm.key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: ctas1_mm.p EXPRESSION [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -PREHOOK: query: select * from ctas1_mm -PREHOOK: type: QUERY -PREHOOK: Input: default@ctas1_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from ctas1_mm -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ctas1_mm -#### A masked pattern was here #### -98 455 -98 455 -97 455 -97 455 -0 456 -0 456 -10 456 -10 456 -100 457 -100 457 -103 457 -103 457 -PREHOOK: query: drop table ctas1_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ctas1_mm -PREHOOK: Output: default@ctas1_mm -POSTHOOK: query: drop table ctas1_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ctas1_mm -POSTHOOK: Output: default@ctas1_mm PREHOOK: query: drop table iow0_mm PREHOOK: type: DROPTABLE POSTHOOK: query: drop table iow0_mm @@ -1344,21 +1233,6 @@ POSTHOOK: query: create table iow0_mm(key int) tblproperties("transactional"="tr POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@iow0_mm -PREHOOK: query: insert overwrite table iow0_mm select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@iow0_mm -POSTHOOK: query: insert overwrite table iow0_mm select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@iow0_mm -POSTHOOK: Lineage: iow0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: insert into table iow0_mm select key + 1 from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate @@ -1382,33 +1256,12 @@ POSTHOOK: query: select * from iow0_mm order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@iow0_mm #### A masked pattern was here #### -0 1 -10 11 -97 -98 98 99 -100 101 -103 104 -PREHOOK: query: insert overwrite table iow0_mm select key + 2 from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@iow0_mm -POSTHOOK: query: insert overwrite table iow0_mm select key + 2 from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@iow0_mm -POSTHOOK: Lineage: iow0_mm.key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from iow0_mm order by key PREHOOK: type: QUERY PREHOOK: Input: default@iow0_mm @@ -1417,12 +1270,12 @@ POSTHOOK: query: select * from iow0_mm order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@iow0_mm #### A masked pattern was here #### -2 -12 +1 +11 +98 99 -100 -102 -105 +101 +104 PREHOOK: query: drop table iow0_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@iow0_mm @@ -1443,33 +1296,32 @@ POSTHOOK: query: create table iow1_mm(key int) partitioned by (key2 int) tblpro POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@iow1_mm -PREHOOK: query: insert overwrite table iow1_mm partition (key2) -select key as k1, key from intermediate union all select key as k1, key from intermediate +PREHOOK: query: select key as k1, key from intermediate union all select key as k1, key from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@iow1_mm -POSTHOOK: query: insert overwrite table iow1_mm partition (key2) -select key as k1, key from intermediate union all select key as k1, key from intermediate +#### A masked pattern was here #### +POSTHOOK: query: select key as k1, key from intermediate union all select key as k1, key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@iow1_mm@key2=0 -POSTHOOK: Output: default@iow1_mm@key2=10 -POSTHOOK: Output: default@iow1_mm@key2=100 -POSTHOOK: Output: default@iow1_mm@key2=103 -POSTHOOK: Output: default@iow1_mm@key2=97 -POSTHOOK: Output: default@iow1_mm@key2=98 -POSTHOOK: Lineage: iow1_mm PARTITION(key2=0).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=100).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=103).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=10).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=97).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=98).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +#### A masked pattern was here #### +98 98 +98 98 +97 97 +97 97 +0 0 +0 0 +10 10 +10 10 +100 100 +100 100 +103 103 +103 103 PREHOOK: query: insert into table iow1_mm partition (key2) select key + 1 as k1, key from intermediate union all select key as k1, key from intermediate PREHOOK: type: QUERY @@ -1518,56 +1370,43 @@ POSTHOOK: Input: default@iow1_mm@key2=97 POSTHOOK: Input: default@iow1_mm@key2=98 #### A masked pattern was here #### 0 0 -0 0 -0 0 1 0 10 10 -10 10 -10 10 11 10 97 97 -97 97 -97 97 98 97 98 98 -98 98 -98 98 99 98 100 100 -100 100 -100 100 101 100 103 103 -103 103 -103 103 104 103 -PREHOOK: query: insert overwrite table iow1_mm partition (key2) -select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate +PREHOOK: query: select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@iow1_mm -POSTHOOK: query: insert overwrite table iow1_mm partition (key2) -select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate +#### A masked pattern was here #### +POSTHOOK: query: select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@iow1_mm@key2=0 -POSTHOOK: Output: default@iow1_mm@key2=10 -POSTHOOK: Output: default@iow1_mm@key2=100 -POSTHOOK: Output: default@iow1_mm@key2=103 -POSTHOOK: Output: default@iow1_mm@key2=97 -POSTHOOK: Output: default@iow1_mm@key2=98 -POSTHOOK: Lineage: iow1_mm PARTITION(key2=0).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=100).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=103).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=10).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=97).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=98).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +#### A masked pattern was here #### +101 98 +102 98 +100 97 +101 97 +3 0 +4 0 +13 10 +14 10 +103 100 +104 100 +106 103 +107 103 PREHOOK: query: select * from iow1_mm order by key, key2 PREHOOK: type: QUERY PREHOOK: Input: default@iow1_mm @@ -1588,73 +1427,53 @@ POSTHOOK: Input: default@iow1_mm@key2=103 POSTHOOK: Input: default@iow1_mm@key2=97 POSTHOOK: Input: default@iow1_mm@key2=98 #### A masked pattern was here #### -3 0 -4 0 -13 10 -14 10 -100 97 -101 97 -101 98 -102 98 -103 100 -104 100 -106 103 -107 103 -PREHOOK: query: insert overwrite table iow1_mm partition (key2) -select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate +0 0 +1 0 +10 10 +11 10 +97 97 +98 97 +98 98 +99 98 +100 100 +101 100 +103 103 +104 103 +PREHOOK: query: select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@iow1_mm -POSTHOOK: query: insert overwrite table iow1_mm partition (key2) -select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate +#### A masked pattern was here #### +POSTHOOK: query: select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@iow1_mm@key2=100 -POSTHOOK: Output: default@iow1_mm@key2=101 -POSTHOOK: Output: default@iow1_mm@key2=102 -POSTHOOK: Output: default@iow1_mm@key2=103 -POSTHOOK: Output: default@iow1_mm@key2=105 -POSTHOOK: Output: default@iow1_mm@key2=106 -POSTHOOK: Output: default@iow1_mm@key2=12 -POSTHOOK: Output: default@iow1_mm@key2=13 -POSTHOOK: Output: default@iow1_mm@key2=2 -POSTHOOK: Output: default@iow1_mm@key2=3 -POSTHOOK: Output: default@iow1_mm@key2=99 -POSTHOOK: Lineage: iow1_mm PARTITION(key2=100).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=101).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=102).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=103).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=105).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=106).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=12).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=13).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=2).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=3).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=99).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +#### A masked pattern was here #### +101 101 +100 100 +100 100 +99 99 +3 3 +2 2 +13 13 +12 12 +103 103 +102 102 +106 106 +105 105 PREHOOK: query: select * from iow1_mm order by key, key2 PREHOOK: type: QUERY PREHOOK: Input: default@iow1_mm PREHOOK: Input: default@iow1_mm@key2=0 PREHOOK: Input: default@iow1_mm@key2=10 PREHOOK: Input: default@iow1_mm@key2=100 -PREHOOK: Input: default@iow1_mm@key2=101 -PREHOOK: Input: default@iow1_mm@key2=102 PREHOOK: Input: default@iow1_mm@key2=103 -PREHOOK: Input: default@iow1_mm@key2=105 -PREHOOK: Input: default@iow1_mm@key2=106 -PREHOOK: Input: default@iow1_mm@key2=12 -PREHOOK: Input: default@iow1_mm@key2=13 -PREHOOK: Input: default@iow1_mm@key2=2 -PREHOOK: Input: default@iow1_mm@key2=3 PREHOOK: Input: default@iow1_mm@key2=97 PREHOOK: Input: default@iow1_mm@key2=98 -PREHOOK: Input: default@iow1_mm@key2=99 #### A masked pattern was here #### POSTHOOK: query: select * from iow1_mm order by key, key2 POSTHOOK: type: QUERY @@ -1662,39 +1481,22 @@ POSTHOOK: Input: default@iow1_mm POSTHOOK: Input: default@iow1_mm@key2=0 POSTHOOK: Input: default@iow1_mm@key2=10 POSTHOOK: Input: default@iow1_mm@key2=100 -POSTHOOK: Input: default@iow1_mm@key2=101 -POSTHOOK: Input: default@iow1_mm@key2=102 POSTHOOK: Input: default@iow1_mm@key2=103 -POSTHOOK: Input: default@iow1_mm@key2=105 -POSTHOOK: Input: default@iow1_mm@key2=106 -POSTHOOK: Input: default@iow1_mm@key2=12 -POSTHOOK: Input: default@iow1_mm@key2=13 -POSTHOOK: Input: default@iow1_mm@key2=2 -POSTHOOK: Input: default@iow1_mm@key2=3 POSTHOOK: Input: default@iow1_mm@key2=97 POSTHOOK: Input: default@iow1_mm@key2=98 -POSTHOOK: Input: default@iow1_mm@key2=99 #### A masked pattern was here #### -2 2 -3 0 -3 3 -4 0 -12 12 -13 10 -13 13 -14 10 -99 99 -100 97 -100 100 +0 0 +1 0 +10 10 +11 10 +97 97 +98 97 +98 98 +99 98 100 100 -101 97 -101 98 -101 101 -102 98 -102 102 +101 100 103 103 -105 105 -106 106 +104 103 PREHOOK: query: drop table iow1_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@iow1_mm @@ -1765,7 +1567,7 @@ POSTHOOK: query: select count(1) from load0_mm POSTHOOK: type: QUERY POSTHOOK: Input: default@load0_mm #### A masked pattern was here #### -500 +1000 PREHOOK: query: drop table load0_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@load0_mm @@ -1916,7 +1718,7 @@ POSTHOOK: query: select count(1) from load1_mm POSTHOOK: type: QUERY POSTHOOK: Input: default@load1_mm #### A masked pattern was here #### -500 +1050 PREHOOK: query: drop table load1_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@load1_mm @@ -2541,649 +2343,6 @@ POSTHOOK: query: create table multi0_2_mm (key int, key2 int) tblproperties("tr POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@multi0_2_mm -PREHOOK: query: from intermediate -insert overwrite table multi0_1_mm select key, p -insert overwrite table multi0_2_mm select p, key -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi0_1_mm -PREHOOK: Output: default@multi0_2_mm -POSTHOOK: query: from intermediate -insert overwrite table multi0_1_mm select key, p -insert overwrite table multi0_2_mm select p, key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi0_1_mm -POSTHOOK: Output: default@multi0_2_mm -POSTHOOK: Lineage: multi0_1_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_1_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_2_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_2_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select * from multi0_1_mm order by key, key2 -PREHOOK: type: QUERY -PREHOOK: Input: default@multi0_1_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from multi0_1_mm order by key, key2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi0_1_mm -#### A masked pattern was here #### -0 456 -10 456 -97 455 -98 455 -100 457 -103 457 -PREHOOK: query: select * from multi0_2_mm order by key, key2 -PREHOOK: type: QUERY -PREHOOK: Input: default@multi0_2_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from multi0_2_mm order by key, key2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi0_2_mm -#### A masked pattern was here #### -455 97 -455 98 -456 0 -456 10 -457 100 -457 103 -PREHOOK: query: from intermediate -insert into table multi0_1_mm select p, key -insert overwrite table multi0_2_mm select key, p -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi0_1_mm -PREHOOK: Output: default@multi0_2_mm -POSTHOOK: query: from intermediate -insert into table multi0_1_mm select p, key -insert overwrite table multi0_2_mm select key, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi0_1_mm -POSTHOOK: Output: default@multi0_2_mm -POSTHOOK: Lineage: multi0_1_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_1_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_2_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_2_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -PREHOOK: query: select * from multi0_1_mm order by key, key2 -PREHOOK: type: QUERY -PREHOOK: Input: default@multi0_1_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from multi0_1_mm order by key, key2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi0_1_mm -#### A masked pattern was here #### -0 456 -10 456 -97 455 -98 455 -100 457 -103 457 -455 97 -455 98 -456 0 -456 10 -457 100 -457 103 -PREHOOK: query: select * from multi0_2_mm order by key, key2 -PREHOOK: type: QUERY -PREHOOK: Input: default@multi0_2_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from multi0_2_mm order by key, key2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi0_2_mm -#### A masked pattern was here #### -0 456 -10 456 -97 455 -98 455 -100 457 -103 457 -PREHOOK: query: drop table multi0_1_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@multi0_1_mm -PREHOOK: Output: default@multi0_1_mm -POSTHOOK: query: drop table multi0_1_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@multi0_1_mm -POSTHOOK: Output: default@multi0_1_mm -PREHOOK: query: drop table multi0_2_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@multi0_2_mm -PREHOOK: Output: default@multi0_2_mm -POSTHOOK: query: drop table multi0_2_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@multi0_2_mm -POSTHOOK: Output: default@multi0_2_mm -PREHOOK: query: drop table multi1_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table multi1_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@multi1_mm -POSTHOOK: query: create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@multi1_mm -PREHOOK: query: from intermediate -insert into table multi1_mm partition(p=1) select p, key -insert into table multi1_mm partition(p=2) select key, p -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi1_mm@p=1 -PREHOOK: Output: default@multi1_mm@p=2 -POSTHOOK: query: from intermediate -insert into table multi1_mm partition(p=1) select p, key -insert into table multi1_mm partition(p=2) select key, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Output: default@multi1_mm@p=2 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -PREHOOK: query: select * from multi1_mm order by key, key2, p -PREHOOK: type: QUERY -PREHOOK: Input: default@multi1_mm -PREHOOK: Input: default@multi1_mm@p=1 -PREHOOK: Input: default@multi1_mm@p=2 -#### A masked pattern was here #### -POSTHOOK: query: select * from multi1_mm order by key, key2, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi1_mm -POSTHOOK: Input: default@multi1_mm@p=1 -POSTHOOK: Input: default@multi1_mm@p=2 -#### A masked pattern was here #### -0 456 2 -10 456 2 -97 455 2 -98 455 2 -100 457 2 -103 457 2 -455 97 1 -455 98 1 -456 0 1 -456 10 1 -457 100 1 -457 103 1 -PREHOOK: query: from intermediate -insert into table multi1_mm partition(p=2) select p, key -insert overwrite table multi1_mm partition(p=1) select key, p -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi1_mm@p=1 -PREHOOK: Output: default@multi1_mm@p=2 -POSTHOOK: query: from intermediate -insert into table multi1_mm partition(p=2) select p, key -insert overwrite table multi1_mm partition(p=1) select key, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Output: default@multi1_mm@p=2 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select * from multi1_mm order by key, key2, p -PREHOOK: type: QUERY -PREHOOK: Input: default@multi1_mm -PREHOOK: Input: default@multi1_mm@p=1 -PREHOOK: Input: default@multi1_mm@p=2 -#### A masked pattern was here #### -POSTHOOK: query: select * from multi1_mm order by key, key2, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi1_mm -POSTHOOK: Input: default@multi1_mm@p=1 -POSTHOOK: Input: default@multi1_mm@p=2 -#### A masked pattern was here #### -0 456 1 -0 456 2 -10 456 1 -10 456 2 -97 455 1 -97 455 2 -98 455 1 -98 455 2 -100 457 1 -100 457 2 -103 457 1 -103 457 2 -455 97 1 -455 97 2 -455 98 1 -455 98 2 -456 0 1 -456 0 2 -456 10 1 -456 10 2 -457 100 1 -457 100 2 -457 103 1 -457 103 2 -PREHOOK: query: from intermediate -insert into table multi1_mm partition(p) select p, key, p -insert into table multi1_mm partition(p=1) select key, p -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi1_mm -PREHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: query: from intermediate -insert into table multi1_mm partition(p) select p, key, p -insert into table multi1_mm partition(p=1) select key, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Output: default@multi1_mm@p=455 -POSTHOOK: Output: default@multi1_mm@p=456 -POSTHOOK: Output: default@multi1_mm@p=457 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=455).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=456).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=457).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=457).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p -PREHOOK: type: QUERY -PREHOOK: Input: default@multi1_mm -PREHOOK: Input: default@multi1_mm@p=1 -PREHOOK: Input: default@multi1_mm@p=2 -PREHOOK: Input: default@multi1_mm@p=455 -PREHOOK: Input: default@multi1_mm@p=456 -PREHOOK: Input: default@multi1_mm@p=457 -#### A masked pattern was here #### -POSTHOOK: query: select key, key2, p from multi1_mm order by key, key2, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi1_mm -POSTHOOK: Input: default@multi1_mm@p=1 -POSTHOOK: Input: default@multi1_mm@p=2 -POSTHOOK: Input: default@multi1_mm@p=455 -POSTHOOK: Input: default@multi1_mm@p=456 -POSTHOOK: Input: default@multi1_mm@p=457 -#### A masked pattern was here #### -0 456 1 -0 456 1 -0 456 2 -10 456 1 -10 456 1 -10 456 2 -97 455 1 -97 455 1 -97 455 2 -98 455 1 -98 455 1 -98 455 2 -100 457 1 -100 457 1 -100 457 2 -103 457 1 -103 457 1 -103 457 2 -455 97 1 -455 97 2 -455 97 455 -455 98 1 -455 98 2 -455 98 455 -456 0 1 -456 0 2 -456 0 456 -456 10 1 -456 10 2 -456 10 456 -457 100 1 -457 100 2 -457 100 457 -457 103 1 -457 103 2 -457 103 457 -PREHOOK: query: from intermediate -insert into table multi1_mm partition(p) select p, key, 1 -insert into table multi1_mm partition(p=1) select key, p -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi1_mm -PREHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: query: from intermediate -insert into table multi1_mm partition(p) select p, key, 1 -insert into table multi1_mm partition(p=1) select key, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p -PREHOOK: type: QUERY -PREHOOK: Input: default@multi1_mm -PREHOOK: Input: default@multi1_mm@p=1 -PREHOOK: Input: default@multi1_mm@p=2 -PREHOOK: Input: default@multi1_mm@p=455 -PREHOOK: Input: default@multi1_mm@p=456 -PREHOOK: Input: default@multi1_mm@p=457 -#### A masked pattern was here #### -POSTHOOK: query: select key, key2, p from multi1_mm order by key, key2, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi1_mm -POSTHOOK: Input: default@multi1_mm@p=1 -POSTHOOK: Input: default@multi1_mm@p=2 -POSTHOOK: Input: default@multi1_mm@p=455 -POSTHOOK: Input: default@multi1_mm@p=456 -POSTHOOK: Input: default@multi1_mm@p=457 -#### A masked pattern was here #### -0 456 1 -0 456 1 -0 456 1 -0 456 2 -10 456 1 -10 456 1 -10 456 1 -10 456 2 -97 455 1 -97 455 1 -97 455 1 -97 455 2 -98 455 1 -98 455 1 -98 455 1 -98 455 2 -100 457 1 -100 457 1 -100 457 1 -100 457 2 -103 457 1 -103 457 1 -103 457 1 -103 457 2 -455 97 1 -455 97 1 -455 97 2 -455 97 455 -455 98 1 -455 98 1 -455 98 2 -455 98 455 -456 0 1 -456 0 1 -456 0 2 -456 0 456 -456 10 1 -456 10 1 -456 10 2 -456 10 456 -457 100 1 -457 100 1 -457 100 2 -457 100 457 -457 103 1 -457 103 1 -457 103 2 -457 103 457 -PREHOOK: query: drop table multi1_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@multi1_mm -PREHOOK: Output: default@multi1_mm -POSTHOOK: query: drop table multi1_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@multi1_mm -POSTHOOK: Output: default@multi1_mm -PREHOOK: query: drop table stats_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table stats_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@stats_mm -POSTHOOK: query: create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats_mm -PREHOOK: query: insert overwrite table stats_mm select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@stats_mm -POSTHOOK: query: insert overwrite table stats_mm select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@stats_mm -POSTHOOK: Lineage: stats_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: desc formatted stats_mm -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_mm -POSTHOOK: query: desc formatted stats_mm -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_mm -# col_name data_type comment - -key int - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 6 - rawDataSize 13 - totalSize 19 - transactional true - transactional_properties insert_only -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: insert into table stats_mm select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@stats_mm -POSTHOOK: query: insert into table stats_mm select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@stats_mm -POSTHOOK: Lineage: stats_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: desc formatted stats_mm -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_mm -POSTHOOK: query: desc formatted stats_mm -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_mm -# col_name data_type comment - -key int - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 2 - numRows 12 - rawDataSize 26 - totalSize 38 - transactional true - transactional_properties insert_only -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table stats_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats_mm -PREHOOK: Output: default@stats_mm -POSTHOOK: query: drop table stats_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats_mm -POSTHOOK: Output: default@stats_mm -PREHOOK: query: drop table stats2_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table stats2_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@stats2_mm -POSTHOOK: query: create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats2_mm -POSTHOOK: Lineage: stats2_mm._c0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats2_mm -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats2_mm -POSTHOOK: query: desc formatted stats2_mm -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats2_mm -# col_name data_type comment - -_c0 array - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 500 - rawDataSize 5312 - totalSize 5812 - transactional true - transactional_properties insert_only -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table stats2_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats2_mm -PREHOOK: Output: default@stats2_mm -POSTHOOK: query: drop table stats2_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats2_mm -POSTHOOK: Output: default@stats2_mm -PREHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@skewjoin_mm -POSTHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@skewjoin_mm -PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@skewjoin_mm -POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@skewjoin_mm -POSTHOOK: Lineage: skewjoin_mm.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: skewjoin_mm.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(distinct key) from skewjoin_mm -PREHOOK: type: QUERY -PREHOOK: Input: default@skewjoin_mm -#### A masked pattern was here #### -POSTHOOK: query: select count(distinct key) from skewjoin_mm -POSTHOOK: type: QUERY -POSTHOOK: Input: default@skewjoin_mm -#### A masked pattern was here #### -309 -PREHOOK: query: drop table skewjoin_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@skewjoin_mm -PREHOOK: Output: default@skewjoin_mm -POSTHOOK: query: drop table skewjoin_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@skewjoin_mm -POSTHOOK: Output: default@skewjoin_mm PREHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git ql/src/test/results/clientpositive/mm_insertonly_acid.q.out ql/src/test/results/clientpositive/mm_insertonly_acid.q.out deleted file mode 100644 index 22bdc93..0000000 --- ql/src/test/results/clientpositive/mm_insertonly_acid.q.out +++ /dev/null @@ -1,115 +0,0 @@ -PREHOOK: query: drop table qtr_acid -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table qtr_acid -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table qtr_acid (key int) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@qtr_acid -POSTHOOK: query: create table qtr_acid (key int) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@qtr_acid -PREHOOK: query: insert into table qtr_acid partition(p='123') select distinct key from src where key > 0 order by key asc limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@qtr_acid@p=123 -POSTHOOK: query: insert into table qtr_acid partition(p='123') select distinct key from src where key > 0 order by key asc limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@qtr_acid@p=123 -POSTHOOK: Lineage: qtr_acid PARTITION(p=123).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: insert into table qtr_acid partition(p='456') select distinct key from src where key > 0 order by key desc limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@qtr_acid@p=456 -POSTHOOK: query: insert into table qtr_acid partition(p='456') select distinct key from src where key > 0 order by key desc limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@qtr_acid@p=456 -POSTHOOK: Lineage: qtr_acid PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: explain -select * from qtr_acid order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -select * from qtr_acid order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: qtr_acid - Statistics: Num rows: 20 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), p (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 20 Data size: 47 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 47 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 47 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from qtr_acid order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@qtr_acid -PREHOOK: Input: default@qtr_acid@p=123 -PREHOOK: Input: default@qtr_acid@p=456 -#### A masked pattern was here #### -POSTHOOK: query: select * from qtr_acid order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@qtr_acid -POSTHOOK: Input: default@qtr_acid@p=123 -POSTHOOK: Input: default@qtr_acid@p=456 -#### A masked pattern was here #### -9 456 -10 123 -11 123 -85 456 -86 456 -87 456 -90 456 -92 456 -95 456 -96 456 -97 456 -98 456 -100 123 -103 123 -104 123 -105 123 -111 123 -113 123 -114 123 -116 123 -PREHOOK: query: drop table qtr_acid -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@qtr_acid -PREHOOK: Output: default@qtr_acid -POSTHOOK: query: drop table qtr_acid -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@qtr_acid -POSTHOOK: Output: default@qtr_acid