diff --git common/src/java/org/apache/hadoop/hive/common/JavaUtils.java common/src/java/org/apache/hadoop/hive/common/JavaUtils.java index 3916fe3..28490e2 100644 --- common/src/java/org/apache/hadoop/hive/common/JavaUtils.java +++ common/src/java/org/apache/hadoop/hive/common/JavaUtils.java @@ -28,6 +28,8 @@ import java.util.Arrays; import java.util.List; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,6 +39,10 @@ */ public final class JavaUtils { + public static final String DELTA_PREFIX = "delta"; + public static final String DELTA_DIGITS = "%07d"; + public static final int DELTA_DIGITS_LEN = 7; + public static final String STATEMENT_DIGITS = "%04d"; private static final Logger LOG = LoggerFactory.getLogger(JavaUtils.class); private static final Method SUN_MISC_UTIL_RELEASE; @@ -158,4 +164,65 @@ public static String txnIdsToString(List txnIds) { private JavaUtils() { // prevent instantiation } + + public static Long extractTxnId(Path file) { + String fileName = file.getName(); + String[] parts = fileName.split("_", 4); // e.g. delta_0000001_0000001_0000 + if (parts.length < 4 || !DELTA_PREFIX.equals(parts[0])) { + LOG.debug("Cannot extract transaction ID for a MM table: " + file + + " (" + Arrays.toString(parts) + ")"); + return null; + } + long writeId = -1; + try { + writeId = Long.parseLong(parts[1]); + } catch (NumberFormatException ex) { + LOG.debug("Cannot extract transaction ID for a MM table: " + file + + "; parsing " + parts[1] + " got " + ex.getMessage()); + return null; + } + return writeId; + } + + public static class IdPathFilter implements PathFilter { + private final String mmDirName; + private final boolean isMatch, isIgnoreTemp; + public IdPathFilter(long writeId, int stmtId, boolean isMatch) { + this(writeId, stmtId, isMatch, false); + } + public IdPathFilter(long writeId, int stmtId, boolean isMatch, boolean isIgnoreTemp) { + this.mmDirName = DELTA_PREFIX + "_" + String.format(DELTA_DIGITS, writeId) + "_" + + String.format(DELTA_DIGITS, writeId) + "_" + String.format(STATEMENT_DIGITS, stmtId); + this.isMatch = isMatch; + this.isIgnoreTemp = isIgnoreTemp; + } + + @Override + public boolean accept(Path path) { + String name = path.getName(); + if (name.equals(mmDirName)) { + return isMatch; + } + if (isIgnoreTemp && name.length() > 0) { + char c = name.charAt(0); + if (c == '.' || c == '_') return false; // Regardless of isMatch, ignore this. + } + return !isMatch; + } + } + + public static class AnyIdDirFilter implements PathFilter { + @Override + public boolean accept(Path path) { + String name = path.getName(); + if (!name.startsWith(DELTA_PREFIX + "_")) return false; + String idStr = name.substring(DELTA_PREFIX.length() + 1, DELTA_PREFIX.length() + 1 + DELTA_DIGITS_LEN); + try { + Long.parseLong(idStr); + } catch (NumberFormatException ex) { + return false; + } + return true; + } + } } diff --git common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java deleted file mode 100644 index 4cbeb89..0000000 --- common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java +++ /dev/null @@ -1,218 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.common; - -import java.util.Arrays; -import java.util.HashSet; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.hive.conf.HiveConf; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class ValidWriteIds { - public static final ValidWriteIds NO_WRITE_IDS = new ValidWriteIds(-1, -1, false, null); - - public static final String MM_PREFIX = "mm"; - private static final String CURRENT_SUFFIX = ".current"; - - private final static Logger LOG = LoggerFactory.getLogger(ValidWriteIds.class); - - private static final String VALID_WRITEIDS_PREFIX = "hive.valid.write.ids."; - private final long lowWatermark, highWatermark; - private final boolean areIdsValid; - private final HashSet ids; - private String source = null; - - public ValidWriteIds( - long lowWatermark, long highWatermark, boolean areIdsValid, HashSet ids) { - this.lowWatermark = lowWatermark; - this.highWatermark = highWatermark; - this.areIdsValid = areIdsValid; - this.ids = ids; - } - - public static ValidWriteIds createFromConf(Configuration conf, String dbName, String tblName) { - return createFromConf(conf, dbName + "." + tblName); - } - - public static ValidWriteIds createFromConf(Configuration conf, String fullTblName) { - String key = createConfKey(fullTblName); - String idStr = conf.get(key, null); - String current = conf.get(key + CURRENT_SUFFIX, null); - if (idStr == null || idStr.isEmpty()) return null; - return new ValidWriteIds(idStr, current); - } - - private static String createConfKey(String dbName, String tblName) { - return createConfKey(dbName + "." + tblName); - } - - private static String createConfKey(String fullName) { - return VALID_WRITEIDS_PREFIX + fullName; - } - - private ValidWriteIds(String src, String current) { - // TODO: lifted from ACID config implementation... optimize if needed? e.g. ranges, base64 - String[] values = src.split(":"); - highWatermark = Long.parseLong(values[0]); - lowWatermark = Long.parseLong(values[1]); - if (values.length > 2) { - areIdsValid = Long.parseLong(values[2]) > 0; - ids = new HashSet(); - for(int i = 3; i < values.length; ++i) { - ids.add(Long.parseLong(values[i])); - } - if (current != null) { - long currentId = Long.parseLong(current); - if (areIdsValid) { - ids.add(currentId); - } else { - ids.remove(currentId); - } - } - } else if (current != null) { - long currentId = Long.parseLong(current); - areIdsValid = true; - ids = new HashSet(); - ids.add(currentId); - } else { - areIdsValid = false; - ids = null; - } - } - - public static void addCurrentToConf( - Configuration conf, String dbName, String tblName, long mmWriteId) { - String key = createConfKey(dbName, tblName) + CURRENT_SUFFIX; - if (LOG.isDebugEnabled()) { - LOG.debug("Setting " + key + " => " + mmWriteId); - } - conf.set(key, Long.toString(mmWriteId)); - } - - public void addToConf(Configuration conf, String dbName, String tblName) { - if (source == null) { - source = toString(); - } - String key = createConfKey(dbName, tblName); - if (LOG.isDebugEnabled()) { - LOG.debug("Setting " + key + " => " + source - + " (old value was " + conf.get(key, null) + ")"); - } - conf.set(key, source); - } - - public static void clearConf(Configuration conf, String dbName, String tblName) { - if (LOG.isDebugEnabled()) { - LOG.debug("Unsetting " + createConfKey(dbName, tblName)); - } - conf.unset(createConfKey(dbName, tblName)); - } - - public String toString() { - // TODO: lifted from ACID config implementation... optimize if needed? e.g. ranges, base64 - StringBuilder buf = new StringBuilder(); - buf.append(highWatermark); - buf.append(':'); - buf.append(lowWatermark); - if (ids != null) { - buf.append(':'); - buf.append(areIdsValid ? 1 : 0); - for (long id : ids) { - buf.append(':'); - buf.append(id); - } - } - return buf.toString(); - } - - public boolean isValid(long writeId) { - if (writeId < 0) throw new RuntimeException("Incorrect write ID " + writeId); - if (writeId <= lowWatermark) return true; - if (writeId >= highWatermark) return false; - return ids != null && (areIdsValid == ids.contains(writeId)); - } - - public static String getMmFilePrefix(long mmWriteId) { - return MM_PREFIX + "_" + mmWriteId; - } - - - public static class IdPathFilter implements PathFilter { - private final String mmDirName; - private final boolean isMatch, isIgnoreTemp; - public IdPathFilter(long writeId, boolean isMatch) { - this(writeId, isMatch, false); - } - public IdPathFilter(long writeId, boolean isMatch, boolean isIgnoreTemp) { - this.mmDirName = ValidWriteIds.getMmFilePrefix(writeId); - this.isMatch = isMatch; - this.isIgnoreTemp = isIgnoreTemp; - } - - @Override - public boolean accept(Path path) { - String name = path.getName(); - if (name.equals(mmDirName)) { - return isMatch; - } - if (isIgnoreTemp && name.length() > 0) { - char c = name.charAt(0); - if (c == '.' || c == '_') return false; // Regardless of isMatch, ignore this. - } - return !isMatch; - } - } - - public static class AnyIdDirFilter implements PathFilter { - @Override - public boolean accept(Path path) { - String name = path.getName(); - if (!name.startsWith(MM_PREFIX + "_")) return false; - String idStr = name.substring(MM_PREFIX.length() + 1); - try { - Long.parseLong(idStr); - } catch (NumberFormatException ex) { - return false; - } - return true; - } - } - public static Long extractWriteId(Path file) { - String fileName = file.getName(); - String[] parts = fileName.split("_", 3); - if (parts.length < 2 || !MM_PREFIX.equals(parts[0])) { - LOG.info("Cannot extract write ID for a MM table: " + file - + " (" + Arrays.toString(parts) + ")"); - return null; - } - long writeId = -1; - try { - writeId = Long.parseLong(parts[1]); - } catch (NumberFormatException ex) { - LOG.info("Cannot extract write ID for a MM table: " + file - + "; parsing " + parts[1] + " got " + ex.getMessage()); - return null; - } - return writeId; - } - -} \ No newline at end of file diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java index 0c51a68..c70925a 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java @@ -103,7 +103,7 @@ protected void setUp() { db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true); db.createTable(src, cols, null, TextInputFormat.class, IgnoreKeyTextOutputFormat.class); - db.loadTable(hadoopDataFile[i], src, false, false, false, false, false, null); + db.loadTable(hadoopDataFile[i], src, false, false, false, false, false, null, 0); i++; } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 915bce3..f589ec4 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -7425,7 +7425,6 @@ public void run() { startCompactorInitiator(conf); startCompactorWorkers(conf); startCompactorCleaner(conf); - startMmHousekeepingThread(conf); startHouseKeeperService(conf); } catch (Throwable e) { LOG.error("Failure when starting the compactor, compactions may not happen, " + @@ -7467,16 +7466,6 @@ private static void startCompactorCleaner(HiveConf conf) throws Exception { } } - private static void startMmHousekeepingThread(HiveConf conf) throws Exception { - long intervalMs = HiveConf.getTimeVar(conf, - ConfVars.HIVE_METASTORE_MM_THREAD_SCAN_INTERVAL, TimeUnit.MILLISECONDS); - if (intervalMs > 0) { - MetaStoreThread thread = new MmCleanerThread(intervalMs); - initializeAndStartThread(thread, conf); - } - } - - private static MetaStoreThread instantiateThread(String classname) throws Exception { Class c = Class.forName(classname); Object o = c.newInstance(); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MmCleanerThread.java metastore/src/java/org/apache/hadoop/hive/metastore/MmCleanerThread.java deleted file mode 100644 index d99b0d7..0000000 --- metastore/src/java/org/apache/hadoop/hive/metastore/MmCleanerThread.java +++ /dev/null @@ -1,397 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.metastore; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.ListIterator; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.ValidWriteIds; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.RawStore.FullTableName; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.model.MTableWrite; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Supplier; - -public class MmCleanerThread extends Thread implements MetaStoreThread { - private final static Logger LOG = LoggerFactory.getLogger(MmCleanerThread.class); - private HiveConf conf; - private int threadId; - private AtomicBoolean stop; - private long intervalMs; - private long heartbeatTimeoutMs, absTimeoutMs, abortedGraceMs; - /** Time override for tests. Only used for MM timestamp logic, not for the thread timing. */ - private Supplier timeOverride = null; - - public MmCleanerThread(long intervalMs) { - this.intervalMs = intervalMs; - } - - @VisibleForTesting - void overrideTime(Supplier timeOverride) { - this.timeOverride = timeOverride; - } - - private long getTimeMs() { - return timeOverride == null ? System.currentTimeMillis() : timeOverride.get(); - } - - @Override - public void setHiveConf(HiveConf conf) { - this.conf = conf; - heartbeatTimeoutMs = HiveConf.getTimeVar( - conf, ConfVars.HIVE_METASTORE_MM_HEARTBEAT_TIMEOUT, TimeUnit.MILLISECONDS); - absTimeoutMs = HiveConf.getTimeVar( - conf, ConfVars.HIVE_METASTORE_MM_ABSOLUTE_TIMEOUT, TimeUnit.MILLISECONDS); - abortedGraceMs = HiveConf.getTimeVar( - conf, ConfVars.HIVE_METASTORE_MM_ABORTED_GRACE_PERIOD, TimeUnit.MILLISECONDS); - if (heartbeatTimeoutMs > absTimeoutMs) { - throw new RuntimeException("Heartbeat timeout " + heartbeatTimeoutMs - + " cannot be larger than the absolute timeout " + absTimeoutMs); - } - } - - @Override - public void setThreadId(int threadId) { - this.threadId = threadId; - } - - @Override - public void init(AtomicBoolean stop, AtomicBoolean looped) throws MetaException { - this.stop = stop; - setPriority(MIN_PRIORITY); - setDaemon(true); - } - - @Override - public void run() { - // Only get RS here, when we are already on the thread. - RawStore rs = getRs(); - while (true) { - if (checkStop()) return; - long endTimeNs = System.nanoTime() + intervalMs * 1000000L; - - runOneIteration(rs); - - if (checkStop()) return; - long waitTimeMs = (endTimeNs - System.nanoTime()) / 1000000L; - if (waitTimeMs <= 0) continue; - try { - Thread.sleep(waitTimeMs); - } catch (InterruptedException e) { - LOG.error("Thread was interrupted and will now exit"); - return; - } - } - } - - private RawStore getRs() { - try { - return RawStoreProxy.getProxy(conf, conf, - conf.getVar(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL), threadId); - } catch (MetaException e) { - LOG.error("Failed to get RawStore; the thread will now die", e); - throw new RuntimeException(e); - } - } - - private boolean checkStop() { - if (!stop.get()) return false; - LOG.info("Stopping due to an external request"); - return true; - } - - @VisibleForTesting - void runOneIteration(RawStore rs) { - // We only get the names here; we want to get and process each table in a separate DB txn. - List mmTables = null; - try { - mmTables = rs.getAllMmTablesForCleanup(); - } catch (MetaException e) { - LOG.error("Failed to get tables", e); - return; - } - for (FullTableName tableName : mmTables) { - try { - processOneTable(tableName, rs); - } catch (MetaException e) { - LOG.error("Failed to process " + tableName, e); - } - } - } - - private void processOneTable(FullTableName table, RawStore rs) throws MetaException { - // 1. Time out writes that have been running for a while. - // a) Heartbeat timeouts (not enabled right now as heartbeat is not implemented). - // b) Absolute timeouts. - // c) Gaps that have the next ID and the derived absolute timeout. This is a small special - // case that can happen if we increment next ID but fail to insert the write ID record, - // which we do in separate txns to avoid making the conflict-prone increment txn longer. - LOG.info("Processing table " + table); - Table t = rs.getTable(table.dbName, table.tblName); - HashSet removeWriteIds = new HashSet<>(), cleanupOnlyWriteIds = new HashSet<>(); - getWritesThatReadyForCleanUp(t, table, rs, removeWriteIds, cleanupOnlyWriteIds); - - // 2. Delete the aborted writes' files from the FS. - deleteAbortedWriteIdFiles(table, rs, t, removeWriteIds); - deleteAbortedWriteIdFiles(table, rs, t, cleanupOnlyWriteIds); - // removeWriteIds-s now only contains the writes that were fully cleaned up after. - - // 3. Advance the watermark. - advanceWatermark(table, rs, removeWriteIds); - } - - private void getWritesThatReadyForCleanUp(Table t, FullTableName table, RawStore rs, - HashSet removeWriteIds, HashSet cleanupOnlyWriteIds) throws MetaException { - // We will generally ignore errors here. First, we expect some conflicts; second, we will get - // the final view of things after we do (or try, at any rate) all the updates. - long watermarkId = t.isSetMmWatermarkWriteId() ? t.getMmWatermarkWriteId() : -1, - nextWriteId = t.isSetMmNextWriteId() ? t.getMmNextWriteId() : 0; - long now = getTimeMs(), earliestOkHeartbeatMs = now - heartbeatTimeoutMs, - earliestOkCreateMs = now - absTimeoutMs, latestAbortedMs = now - abortedGraceMs; - - List writes = rs.getTableWrites( - table.dbName, table.tblName, watermarkId, nextWriteId); - ListIterator iter = writes.listIterator(writes.size()); - long expectedId = -1, nextCreated = -1; - // We will go in reverse order and add aborted writes for the gaps that have a following - // write ID that would imply that the previous one (created earlier) would have already - // expired, had it been open and not updated. - while (iter.hasPrevious()) { - MTableWrite write = iter.previous(); - addTimedOutMissingWriteIds(rs, table.dbName, table.tblName, write.getWriteId(), - nextCreated, expectedId, earliestOkHeartbeatMs, cleanupOnlyWriteIds, now); - expectedId = write.getWriteId() - 1; - nextCreated = write.getCreated(); - char state = write.getState().charAt(0); - if (state == HiveMetaStore.MM_WRITE_ABORTED) { - if (write.getLastHeartbeat() < latestAbortedMs) { - removeWriteIds.add(write.getWriteId()); - } else { - cleanupOnlyWriteIds.add(write.getWriteId()); - } - } else if (state == HiveMetaStore.MM_WRITE_OPEN && write.getCreated() < earliestOkCreateMs) { - // TODO: also check for heartbeat here. - if (expireTimedOutWriteId(rs, table.dbName, table.tblName, write.getWriteId(), - now, earliestOkCreateMs, earliestOkHeartbeatMs, cleanupOnlyWriteIds)) { - cleanupOnlyWriteIds.add(write.getWriteId()); - } - } - } - addTimedOutMissingWriteIds(rs, table.dbName, table.tblName, watermarkId, - nextCreated, expectedId, earliestOkHeartbeatMs, cleanupOnlyWriteIds, now); - } - - private void advanceWatermark( - FullTableName table, RawStore rs, HashSet cleanedUpWriteIds) { - if (!rs.openTransaction()) { - LOG.error("Cannot open transaction"); - return; - } - boolean success = false; - try { - Table t = rs.getTable(table.dbName, table.tblName); - if (t == null) { - return; - } - long watermarkId = t.getMmWatermarkWriteId(); - List writeIds = rs.getTableWriteIds(table.dbName, table.tblName, watermarkId, - t.getMmNextWriteId(), HiveMetaStore.MM_WRITE_COMMITTED); - long expectedId = watermarkId + 1; - boolean hasGap = false; - Iterator idIter = writeIds.iterator(); - while (idIter.hasNext()) { - long next = idIter.next(); - if (next < expectedId) continue; - while (next > expectedId) { - if (!cleanedUpWriteIds.contains(expectedId)) { - hasGap = true; - break; - } - ++expectedId; - } - if (hasGap) break; - ++expectedId; - } - // Make sure we also advance over the trailing aborted ones. - if (!hasGap) { - while (cleanedUpWriteIds.contains(expectedId)) { - ++expectedId; - } - } - long newWatermarkId = expectedId - 1; - if (newWatermarkId > watermarkId) { - t.setMmWatermarkWriteId(newWatermarkId); - rs.alterTable(table.dbName, table.tblName, t); - rs.deleteTableWrites(table.dbName, table.tblName, -1, expectedId); - } - success = true; - } catch (Exception ex) { - // TODO: should we try a couple times on conflicts? Aborted writes cannot be unaborted. - LOG.error("Failed to advance watermark", ex); - rs.rollbackTransaction(); - } - if (success) { - tryCommit(rs); - } - } - - private void deleteAbortedWriteIdFiles( - FullTableName table, RawStore rs, Table t, HashSet cleanUpWriteIds) { - if (cleanUpWriteIds.isEmpty()) return; - if (t.getPartitionKeysSize() > 0) { - for (String location : rs.getAllPartitionLocations(table.dbName, table.tblName)) { - deleteAbortedWriteIdFiles(location, cleanUpWriteIds); - } - } else { - deleteAbortedWriteIdFiles(t.getSd().getLocation(), cleanUpWriteIds); - } - } - - private void deleteAbortedWriteIdFiles(String location, HashSet abortedWriteIds) { - LOG.info("Looking for " + abortedWriteIds.size() + " aborted write output in " + location); - Path path = new Path(location); - FileSystem fs; - FileStatus[] files; - try { - fs = path.getFileSystem(conf); - if (!fs.exists(path)) { - LOG.warn(path + " does not exist; assuming that the cleanup is not needed."); - return; - } - // TODO# this doesn't account for list bucketing. Do nothing now, ACID will solve all problems. - files = fs.listStatus(path); - } catch (Exception ex) { - LOG.error("Failed to get files for " + path + "; cannot ensure cleanup for any writes"); - abortedWriteIds.clear(); - return; - } - for (FileStatus file : files) { - Path childPath = file.getPath(); - if (!file.isDirectory()) { - LOG.warn("Skipping a non-directory file " + childPath); - continue; - } - Long writeId = ValidWriteIds.extractWriteId(childPath); - if (writeId == null) { - LOG.warn("Skipping an unknown directory " + childPath); - continue; - } - if (!abortedWriteIds.contains(writeId.longValue())) continue; - try { - if (!fs.delete(childPath, true)) throw new IOException("delete returned false"); - } catch (Exception ex) { - LOG.error("Couldn't delete " + childPath + "; not cleaning up " + writeId, ex); - abortedWriteIds.remove(writeId.longValue()); - } - } - } - - private boolean expireTimedOutWriteId(RawStore rs, String dbName, - String tblName, long writeId, long now, long earliestOkCreatedMs, - long earliestOkHeartbeatMs, HashSet cleanupOnlyWriteIds) { - if (!rs.openTransaction()) { - return false; - } - try { - MTableWrite tw = rs.getTableWrite(dbName, tblName, writeId); - if (tw == null) { - // The write have been updated since the time when we thought it has expired. - tryCommit(rs); - return true; - } - char state = tw.getState().charAt(0); - if (state != HiveMetaStore.MM_WRITE_OPEN - || (tw.getCreated() > earliestOkCreatedMs - && tw.getLastHeartbeat() > earliestOkHeartbeatMs)) { - tryCommit(rs); - return true; // The write has been updated since the time when we thought it has expired. - } - tw.setState(String.valueOf(HiveMetaStore.MM_WRITE_ABORTED)); - tw.setLastHeartbeat(now); - rs.updateTableWrite(tw); - } catch (Exception ex) { - LOG.error("Failed to update an expired table write", ex); - rs.rollbackTransaction(); - return false; - } - boolean result = tryCommit(rs); - if (result) { - cleanupOnlyWriteIds.add(writeId); - } - return result; - } - - private boolean tryCommit(RawStore rs) { - try { - return rs.commitTransaction(); - } catch (Exception ex) { - LOG.error("Failed to commit transaction", ex); - return false; - } - } - - private boolean addTimedOutMissingWriteIds(RawStore rs, String dbName, String tblName, - long foundPrevId, long nextCreated, long expectedId, long earliestOkHeartbeatMs, - HashSet cleanupOnlyWriteIds, long now) throws MetaException { - // Assume all missing ones are created at the same time as the next present write ID. - // We also assume missing writes never had any heartbeats. - if (nextCreated >= earliestOkHeartbeatMs || expectedId < 0) return true; - Table t = null; - List localCleanupOnlyWriteIds = new ArrayList<>(); - while (foundPrevId < expectedId) { - if (t == null && !rs.openTransaction()) { - LOG.error("Cannot open transaction; skipping"); - return false; - } - try { - if (t == null) { - t = rs.getTable(dbName, tblName); - } - // We don't need to double check if the write exists; the unique index will cause an error. - rs.createTableWrite(t, expectedId, HiveMetaStore.MM_WRITE_ABORTED, now); - } catch (Exception ex) { - // TODO: don't log conflict exceptions?.. although we barely ever expect them. - LOG.error("Failed to create a missing table write", ex); - rs.rollbackTransaction(); - return false; - } - localCleanupOnlyWriteIds.add(expectedId); - --expectedId; - } - boolean result = (t == null || tryCommit(rs)); - if (result) { - cleanupOnlyWriteIds.addAll(localCleanupOnlyWriteIds); - } - return result; - } -} diff --git metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java index aaa03fb..15a1a07 100644 --- metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -187,151 +187,6 @@ public void testTableOps() throws MetaException, InvalidObjectException, NoSuchO objectStore.dropDatabase(DB1); } - - - /** - * Test table operations - */ - @Test - public void testMmCleaner() throws Exception { - HiveConf conf = new HiveConf(); - conf.set(ConfVars.HIVE_METASTORE_MM_HEARTBEAT_TIMEOUT.varname, "3ms"); - conf.set(ConfVars.HIVE_METASTORE_MM_ABSOLUTE_TIMEOUT.varname, "20ms"); - conf.set(ConfVars.HIVE_METASTORE_MM_ABORTED_GRACE_PERIOD.varname, "5ms"); - conf.set("fs.mock.impl", MockFileSystem.class.getName()); - - MockFileSystem mfs = (MockFileSystem)(new Path("mock:///").getFileSystem(conf)); - mfs.clear(); - mfs.allowDelete = true; - // Don't add the files just yet... - MockFile[] files = new MockFile[9]; - for (int i = 0; i < files.length; ++i) { - files[i] = new MockFile("mock:/foo/mm_" + i + "/1", 0, new byte[0]); - } - - LongSupplier time = new LongSupplier(); - - MmCleanerThread mct = new MmCleanerThread(0); - mct.setHiveConf(conf); - mct.overrideTime(time); - - Database db1 = new Database(DB1, "description", "locationurl", null); - objectStore.createDatabase(db1); - StorageDescriptor sd = createFakeSd("mock:/foo"); - HashMap params = new HashMap(); - params.put("EXTERNAL", "false"); - params.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); - params.put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, "insert_only"); - Table tbl = new Table(TABLE1, DB1, "owner", 1, 2, 3, sd, - null, params, null, null, "MANAGED_TABLE"); - objectStore.createTable(tbl); - - // Add write #0 so the watermark wouldn't advance; skip write #1, add #2 at 0, skip #3 - createCompleteTableWrite(mfs, files, 0, time, tbl, HiveMetaStore.MM_WRITE_OPEN); - mfs.addFile(files[1]); - createCompleteTableWrite(mfs, files, 2, time, tbl, HiveMetaStore.MM_WRITE_OPEN); - mfs.addFile(files[3]); - tbl.setMmNextWriteId(4); - objectStore.alterTable(DB1, TABLE1, tbl); - - mct.runOneIteration(objectStore); - List writes = getAbortedWrites(); - assertEquals(0, writes.size()); // Missing write is not aborted before timeout. - time.value = 4; // Advance time. - mct.runOneIteration(objectStore); - writes = getAbortedWrites(); - assertEquals(1, writes.size()); // Missing write is aborted after timeout. - assertEquals(1L, writes.get(0).longValue()); - checkDeletedSet(files, 1); - // However, write #3 was not aborted as we cannot determine when it will time out. - createCompleteTableWrite(mfs, files, 4, time, tbl, HiveMetaStore.MM_WRITE_OPEN); - time.value = 8; - // It will now be aborted, since we have a following write. - mct.runOneIteration(objectStore); - writes = getAbortedWrites(); - assertEquals(2, writes.size()); - assertTrue(writes.contains(Long.valueOf(3))); - checkDeletedSet(files, 1, 3); - - // Commit #0 and #2 and confirm that the watermark advances. - // It will only advance over #1, since #3 was aborted at 8 and grace period has not passed. - time.value = 10; - MTableWrite tw = objectStore.getTableWrite(DB1, TABLE1, 0); - tw.setState(String.valueOf(HiveMetaStore.MM_WRITE_COMMITTED)); - objectStore.updateTableWrite(tw); - tw = objectStore.getTableWrite(DB1, TABLE1, 2); - tw.setState(String.valueOf(HiveMetaStore.MM_WRITE_COMMITTED)); - objectStore.updateTableWrite(tw); - mct.runOneIteration(objectStore); - writes = getAbortedWrites(); - assertEquals(1, writes.size()); - assertEquals(3L, writes.get(0).longValue()); - tbl = objectStore.getTable(DB1, TABLE1); - assertEquals(2L, tbl.getMmWatermarkWriteId()); - - // Now advance the time and see that watermark also advances over #3. - time.value = 16; - mct.runOneIteration(objectStore); - writes = getAbortedWrites(); - assertEquals(0, writes.size()); - tbl = objectStore.getTable(DB1, TABLE1); - assertEquals(3L, tbl.getMmWatermarkWriteId()); - - // Check that the open write gets aborted after some time; then the watermark advances. - time.value = 25; - mct.runOneIteration(objectStore); - writes = getAbortedWrites(); - assertEquals(1, writes.size()); - assertEquals(4L, writes.get(0).longValue()); - time.value = 31; - mct.runOneIteration(objectStore); - tbl = objectStore.getTable(DB1, TABLE1); - assertEquals(4L, tbl.getMmWatermarkWriteId()); - checkDeletedSet(files, 1, 3, 4); // The other two should still be deleted. - - // Finally check that we cannot advance watermark if cleanup fails for some file. - createCompleteTableWrite(mfs, files, 5, time, tbl, HiveMetaStore.MM_WRITE_ABORTED); - createCompleteTableWrite(mfs, files, 6, time, tbl, HiveMetaStore.MM_WRITE_ABORTED); - createCompleteTableWrite(mfs, files, 7, time, tbl, HiveMetaStore.MM_WRITE_COMMITTED); - createCompleteTableWrite(mfs, files, 8, time, tbl, HiveMetaStore.MM_WRITE_ABORTED); - time.value = 37; // Skip the grace period. - files[6].cannotDelete = true; - mct.runOneIteration(objectStore); - checkDeletedSet(files, 1, 3, 4, 5, 8); // The other two should still be deleted. - tbl = objectStore.getTable(DB1, TABLE1); - assertEquals(5L, tbl.getMmWatermarkWriteId()); // Watermark only goes up to 5. - files[6].cannotDelete = false; - mct.runOneIteration(objectStore); - checkDeletedSet(files, 1, 3, 4, 5, 6, 8); - tbl = objectStore.getTable(DB1, TABLE1); - assertEquals(8L, tbl.getMmWatermarkWriteId()); // Now it advances all the way. - - objectStore.dropTable(DB1, TABLE1); - objectStore.dropDatabase(DB1); - } - - private void createCompleteTableWrite(MockFileSystem mfs, MockFile[] files, - int id, LongSupplier time, Table tbl, char state) throws MetaException, InvalidObjectException { - objectStore.createTableWrite(tbl, id, state, time.value); - mfs.addFile(files[id]); - tbl.setMmNextWriteId(id + 1); - objectStore.alterTable(DB1, TABLE1, tbl); - } - - private void checkDeletedSet(MockFile[] files, int... deleted) { - for (int id : deleted) { - assertTrue("File " + id + " not deleted", files[id].isDeleted); - } - int count = 0; - for (MockFile file : files) { - if (file.isDeleted) ++count; - } - assertEquals(deleted.length, count); // Make sure nothing else is deleted. - } - - private List getAbortedWrites() throws MetaException { - return objectStore.getTableWriteIds(DB1, TABLE1, -1, 10, HiveMetaStore.MM_WRITE_ABORTED); - } private StorageDescriptor createFakeSd(String location) { return new StorageDescriptor(null, location, null, null, false, 0, diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 0b615cd..791813a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -34,15 +34,15 @@ import java.util.Map; import java.util.Queue; import java.util.Set; +import java.util.Stack; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantLock; import com.google.common.collect.Iterables; import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.common.metrics.common.Metrics; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; @@ -50,21 +50,10 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveVariableSource; import org.apache.hadoop.hive.conf.VariableSubstitution; -import org.apache.hadoop.hive.metastore.LockComponentBuilder; import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.DataOperationType; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.LockComponent; import org.apache.hadoop.hive.metastore.api.Schema; -import org.apache.hadoop.hive.ql.exec.ConditionalTask; -import org.apache.hadoop.hive.ql.exec.ExplainTask; -import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.TaskResult; -import org.apache.hadoop.hive.ql.exec.TaskRunner; -import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext; @@ -80,6 +69,7 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; import org.apache.hadoop.hive.ql.lockmgr.HiveLock; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.lockmgr.LockException; @@ -96,20 +86,18 @@ import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl; import org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.ParseDriver; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; -import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.HiveOperation; -import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.hive.ql.plan.*; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; @@ -1128,6 +1116,8 @@ private int acquireLocksAndOpenTxn(boolean startTxnImplicitly) { else { readOnlyQueryInAutoCommit = txnMgr.getAutoCommit() && plan.getOperation() == HiveOperation.QUERY && !haveAcidWrite(); } + + boolean hasMmTable = false; // Set the transaction id in all of the acid file sinks if (haveAcidWrite()) { for (FileSinkDesc desc : acidSinks) { @@ -1135,8 +1125,18 @@ private int acquireLocksAndOpenTxn(boolean startTxnImplicitly) { //it's possible to have > 1 FileSink writing to the same table/partition //e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes desc.setStatementId(txnMgr.getWriteIdAndIncrement()); + + if (desc.isMmTable()) { + hasMmTable = true; + } } } + + if (hasMmTable) { + // Set up the framework for replacement of mmId with txnId + revisitMmWriteId(this.getPlan(), txnMgr.getCurrentTxnId()); + } + /*Note, we have to record snapshot after lock acquisition to prevent lost update problem consider 2 concurrent "update table T set x = x + 1". 1st will get the locks and the 2nd will block until 1st one commits and only then lock in the snapshot, i.e. it will @@ -1528,13 +1528,6 @@ else if(!plan.getAutoCommitValue() && txnManager.getAutoCommit()) { return rollback(createProcessorResponse(ret)); } } - - try { - acquireWriteIds(plan, conf); - } catch (HiveException e) { - return handleHiveException(e, 1); - } - ret = execute(true); if (ret != 0) { //if needRequireLock is false, the release here will do nothing because there is no lock @@ -1595,46 +1588,202 @@ else if(plan.getOperation() == HiveOperation.ROLLBACK) { } } - - private static void acquireWriteIds(QueryPlan plan, HiveConf conf) throws HiveException { - // Output IDs are put directly into FileSinkDesc; here, we only need to take care of inputs. - Configuration fetchConf = null; - if (plan.getFetchTask() != null) { - fetchConf = plan.getFetchTask().getFetchConf(); - } - for (ReadEntity input : plan.getInputs()) { - Utilities.LOG14535.debug("Looking at " + input); - Table t = extractTable(input); - if (t == null) continue; - Utilities.LOG14535.info("Checking " + t.getTableName() + " for being a MM table: " + t.getParameters()); - if (!MetaStoreUtils.isInsertOnlyTable(t.getParameters())) { - ValidWriteIds.clearConf(conf, t.getDbName(), t.getTableName()); - if (fetchConf != null) { - ValidWriteIds.clearConf(fetchConf, t.getDbName(), t.getTableName()); + /** + * Traverse the plan tasks, and make following changes if applicable + * 1. for the 4 types of descriptors which have mmId defined, update the mmId with the newMmId + * 2. since in parsing phase the mmId has already been used to set up paths, need to go thru every + * impacted descriptor, and update the paths (db/tbl/mm_0) with the newMmId + */ + private static void revisitMmWriteId(QueryPlan plan, long txnId) { + + Stack taskList = new Stack<>(); + // As we walk thru the plan, we add all the tasks into taskList, and examine them one by one + taskList.addAll(plan.getRootTasks()); + while (!taskList.isEmpty()) { + // examine one task at a time + Task task = taskList.pop(); + Serializable work = task.getWork(); + + // Deal with different scenarios + if (work instanceof MapredWork) { + // MapWork has several maps to update: pathToAliases, pathToPartitionInfo, aliasToWork, aliasToPartnInfo and nameToSplitSample + MapWork mapWork = ((MapredWork) work).getMapWork(); + if (mapWork != null) { + if (mapWork.getPathToAliases() != null && !mapWork.getPathToAliases().isEmpty()) { + mapWork.setPathToAliases(updatePathToAlias(mapWork.getPathToAliases(), txnId)); + } + if (mapWork.getPathToPartitionInfo() != null && !mapWork.getPathToPartitionInfo().isEmpty()) { + mapWork.setPathToPartitionInfo(updatePathToPartitionInfo(mapWork.getPathToPartitionInfo(), txnId)); + } + if (mapWork.getAliasToWork() != null && !mapWork.getAliasToWork().isEmpty()) { + mapWork.setAliasToWork(updateAliasToWork(mapWork.getAliasToWork(), txnId)); + } + if (mapWork.getAliasToPartnInfo() != null && !mapWork.getAliasToPartnInfo().isEmpty()) { + mapWork.setAliasToPartnInfo(updateAliasToPartnInfo(mapWork.getAliasToPartnInfo(), txnId)); + } + if (mapWork.getNameToSplitSample() != null && !mapWork.getNameToSplitSample().isEmpty()) { + mapWork.setNameToSplitSample(updateNameToSplitSample(mapWork.getNameToSplitSample(), txnId)); + } + } + } else if (work instanceof MergeFileWork) { + MergeFileWork mergeFileWork = (MergeFileWork) work; + mergeFileWork.setInputPaths(getNewPaths(mergeFileWork.getInputPaths(), txnId)); + mergeFileWork.setOutputDir(replacePathWithTxnId(mergeFileWork.getOutputDir(), txnId)); + // Since MergeFileWork extends MapWork + if (mergeFileWork.getPathToAliases() != null && !mergeFileWork.getPathToAliases().isEmpty()) { + mergeFileWork.setPathToAliases(updatePathToAlias(mergeFileWork.getPathToAliases(), txnId)); + } + if (mergeFileWork.getPathToPartitionInfo() != null && !mergeFileWork.getPathToPartitionInfo().isEmpty()) { + mergeFileWork.setPathToPartitionInfo(updatePathToPartitionInfo(mergeFileWork.getPathToPartitionInfo(), txnId)); + } + if (mergeFileWork.getAliasToWork() != null && !mergeFileWork.getAliasToWork().isEmpty()) { + mergeFileWork.setAliasToWork(updateAliasToWork(mergeFileWork.getAliasToWork(), txnId)); + } + if (mergeFileWork.getAliasToPartnInfo() != null && !mergeFileWork.getAliasToPartnInfo().isEmpty()) { + mergeFileWork.setAliasToPartnInfo(updateAliasToPartnInfo(mergeFileWork.getAliasToPartnInfo(), txnId)); + } + if (mergeFileWork.getNameToSplitSample() != null && !mergeFileWork.getNameToSplitSample().isEmpty()) { + mergeFileWork.setNameToSplitSample(updateNameToSplitSample(mergeFileWork.getNameToSplitSample(), txnId)); + } + } else if (work instanceof MoveWork) { + MoveWork moveWork = (MoveWork) work; + if (moveWork.getLoadFileWork() != null) { + moveWork.getLoadFileWork().setSourcePath(replacePathWithTxnId(moveWork.getLoadFileWork().getSourcePath(), txnId)); + moveWork.getLoadFileWork().setTargetDir(replacePathWithTxnId(moveWork.getLoadFileWork().getTargetDir(), txnId)); + } else if (moveWork.getLoadTableWork() != null) { + moveWork.getLoadTableWork().setTxnId(txnId); // reset mmWriteId + moveWork.getLoadTableWork().setSourcePath(replacePathWithTxnId(moveWork.getLoadTableWork().getSourcePath(), txnId)); + } else if (moveWork.getLoadMultiFilesWork() != null) { + moveWork.getLoadMultiFilesWork().setSourceDirs(getNewPaths(moveWork.getLoadMultiFilesWork().getSourceDirs(), txnId)); + moveWork.getLoadMultiFilesWork().setTargetDirs(getNewPaths(moveWork.getLoadMultiFilesWork().getTargetDirs(), txnId)); + } + } else if (work instanceof DDLWork) { + DDLWork ddlWork = (DDLWork) work; + if (ddlWork.getCreateTblDesc() != null) { + ddlWork.getCreateTblDesc().setInitialMmWriteId(txnId); // reset mmWriteId + } + } + + // add more to taskList for processing if any + if (task.getNumChild() > 0) { + for (Object childTask : task.getChildTasks()) { + if (childTask instanceof ConditionalTask) { + taskList.addAll(((ConditionalTask) childTask).getListTasks()); + } else { + taskList.push((Task) childTask); + } } - continue; } - ValidWriteIds ids = Hive.get().getValidWriteIdsForTable(t.getDbName(), t.getTableName()); - ids.addToConf(conf, t.getDbName(), t.getTableName()); - if (fetchConf != null) { - ids.addToConf(fetchConf, t.getDbName(), t.getTableName()); + } + } + + /** + * Given the String form of a Path, update the transactionId for the delta directory name. + * For example, given: + * pfile:/Users/wzheng/HIVE-16063/hive/itests/qtest/target/warehouse/union_mm/delta_0000000_0000000_0000 + * if txnId is 123, below String will be returned: + * pfile:/Users/wzheng/HIVE-16063/hive/itests/qtest/target/warehouse/union_mm/delta_0000123_0000123_0000 + * @param oldString original String form for the Path + * @param txnId transaction ID used for replacement + * @return String form of Path with transaction ID replaced + */ + private static String replaceStringWithTxnId(String oldString, long txnId) { + if (oldString == null || oldString.isEmpty()) { + return oldString; + } + + // extract the prefix and delta dir name + int slash = oldString.lastIndexOf("/"); + String prefix = oldString.substring(0, slash + 1); // pfile:/Users/../../warehouse/table_name/ + String deltaDir = oldString.substring(slash + 1); // delta_0000000_0000000_0000 + + // If the format of the directory name doesn't conform to "delta_0000000_0000000_0000" format, skip + if (!deltaDir.startsWith(AcidUtils.DELTA_PREFIX) || deltaDir.split("_").length != 4) { + return oldString; + } + + // get the last section which is stmtId (which in most cases is 0000) + int underscore = deltaDir.lastIndexOf("_"); + int stmtId = Integer.valueOf(deltaDir.substring(underscore + 1)); // 0000 + + // we're assuming for mmId, the minTxn and maxTxn are the same + return prefix + AcidUtils.deltaSubdir(txnId, txnId, stmtId); + } + + private static Path replacePathWithTxnId(Path oldPath, long txnId) { + String oldPathString = oldPath.toString(); + String newPathString = replaceStringWithTxnId(oldPathString, txnId); + if (oldPathString.equals(newPathString)) { + return oldPath; + } else { + return new Path(newPathString); + } + } + + private static List getNewPaths(List oldPaths, long txnId) { + List newPaths = new ArrayList<>(); + for (Path path : oldPaths) { + newPaths.add(replacePathWithTxnId(path, txnId)); + } + return newPaths; + } + + private static LinkedHashMap> updatePathToAlias(Map> pathToAliases, long txnId) { + // Given a hashmap, update the key (i.e. Path) for each entry, then put it into the new hashmap + LinkedHashMap> newMap = new LinkedHashMap<>(); + for (Path oldPath : pathToAliases.keySet()) { + List value = pathToAliases.get(oldPath); + Path newPath = replacePathWithTxnId(oldPath, txnId); + newMap.put(newPath, (ArrayList) value); + } + return newMap; + } + + private static LinkedHashMap updatePathToPartitionInfo(Map pathToPartitionInfo, long txnId) { + LinkedHashMap newMap = new LinkedHashMap<>(); + for (Path oldPath : pathToPartitionInfo.keySet()) { + PartitionDesc value = pathToPartitionInfo.get(oldPath); + Path newPath = replacePathWithTxnId(oldPath, txnId); + newMap.put(newPath, value); + } + return newMap; + } + + private static LinkedHashMap> updateAliasToWork(Map> aliasToWork, long txnId) { + LinkedHashMap> newMap = new LinkedHashMap<>(); + for (String oldString : aliasToWork.keySet()) { + Operator operator = aliasToWork.get(oldString); + String newString = replaceStringWithTxnId(oldString, txnId); + + if (operator instanceof AbstractFileMergeOperator) { + FileMergeDesc fileMergeDesc = (FileMergeDesc) operator.getConf(); + fileMergeDesc.setTxnId(txnId); // reset mmWriteId + fileMergeDesc.setStmtId(0); } + + newMap.put(newString, operator); } + return newMap; } - private static Table extractTable(ReadEntity input) { - Table t = null; - switch (input.getType()) { - case TABLE: - t = input.getTable(); - break; - case DUMMYPARTITION: - case PARTITION: - t = input.getPartition().getTable(); - break; - default: return null; + private static LinkedHashMap updateAliasToPartnInfo(Map aliasToPartnInfo, long txnId) { + LinkedHashMap newMap = new LinkedHashMap<>(); + for (String oldString : aliasToPartnInfo.keySet()) { + PartitionDesc value = aliasToPartnInfo.get(oldString); + String newString = replaceStringWithTxnId(oldString, txnId); + newMap.put(newString, value); + } + return newMap; + } + + private static HashMap updateNameToSplitSample(Map aliasToPartnInfo, long txnId) { + HashMap newMap = new LinkedHashMap<>(); + for (String oldString : aliasToPartnInfo.keySet()) { + SplitSample value = aliasToPartnInfo.get(oldString); + String newString = replaceStringWithTxnId(oldString, txnId); + newMap.put(newString, value); } - return (t != null && !t.isTemporary()) ? t : null; + return newMap; } private CommandProcessorResponse rollback(CommandProcessorResponse cpr) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java index 1315b99..7ef4f49 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -89,7 +89,7 @@ public void initializeOp(Configuration hconf) throws HiveException { .isListBucketingAlterTableConcatenate(); listBucketingDepth = conf.getListBucketingDepth(); Path specPath = conf.getOutputPath(); - isMmTable = conf.getMmWriteId() != null; + isMmTable = conf.getTxnId() != null; if (isMmTable) { updatePaths(specPath, null); } else { @@ -246,7 +246,7 @@ public void closeOp(boolean abort) throws HiveException { // There's always just one file that we have merged. // The union/DP/etc. should already be account for in the path. Utilities.writeMmCommitManifest(Lists.newArrayList(outPath), - tmpPath.getParent(), fs, taskId, conf.getMmWriteId(), null); + tmpPath.getParent(), fs, taskId, conf.getTxnId(), conf.getStmtId(), null); LOG.info("Merged into " + finalPath + "(" + fss.getLen() + " bytes)."); } } @@ -280,7 +280,8 @@ public void jobCloseOp(Configuration hconf, boolean success) try { Path outputDir = conf.getOutputPath(); FileSystem fs = outputDir.getFileSystem(hconf); - Long mmWriteId = conf.getMmWriteId(); + Long mmWriteId = conf.getTxnId(); + int stmtId = conf.getStmtId(); if (mmWriteId == null) { Path backupPath = backupOutputPath(fs, outputDir); Utilities.mvFileToFinalPath( @@ -297,7 +298,7 @@ public void jobCloseOp(Configuration hconf, boolean success) // We don't expect missing buckets from mere (actually there should be no buckets), // so just pass null as bucketing context. Union suffix should also be accounted for. Utilities.handleMmTableFinalPath(outputDir.getParent(), null, hconf, success, - dpLevels, lbLevels, null, mmWriteId, reporter, false); + dpLevels, lbLevels, null, mmWriteId, stmtId, reporter, false); } } catch (IOException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java index e8526f6..14ab6da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java @@ -18,12 +18,12 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hive.common.JavaUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; @@ -31,10 +31,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.DriverContext; -import org.apache.hadoop.hive.ql.parse.LoadSemanticAnalyzer; import org.apache.hadoop.hive.ql.plan.CopyWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.util.StringUtils; @@ -113,7 +111,7 @@ protected int copyOnePath(Path fromPath, Path toPath) { if (!fs.exists(path)) return null; if (!isSourceMm) return matchFilesOneDir(fs, path, null); // TODO: this doesn't handle list bucketing properly. Does the original exim do that? - FileStatus[] mmDirs = fs.listStatus(path, new ValidWriteIds.AnyIdDirFilter()); + FileStatus[] mmDirs = fs.listStatus(path, new JavaUtils.AnyIdDirFilter()); if (mmDirs == null || mmDirs.length == 0) return null; List allFiles = new ArrayList(); for (FileStatus mmDir : mmDirs) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index a1a0862..4603d9e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -57,8 +57,10 @@ import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidWriteIds; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -98,7 +100,6 @@ import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.TxnInfo; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.Context; @@ -4025,7 +4026,8 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition + " to false for this query if you want to force the conversion."); } Hive db = getHive(); - ValidWriteIds ids = db.getValidWriteIdsForTable(tbl.getDbName(), tbl.getTableName()); + String value = conf.get(ValidTxnList.VALID_TXNS_KEY); + ValidTxnList validTxnList = value == null ? new ValidReadTxnList() : new ValidReadTxnList(value); if (tbl.getPartitionKeys().size() > 0) { PartitionIterable parts = new PartitionIterable(db, tbl, null, HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); @@ -4033,15 +4035,15 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition while (partIter.hasNext()) { Partition part = partIter.next(); checkMmLb(part); - handleRemoveMm(part.getDataLocation(), ids, allMmDirs); + handleRemoveMm(part.getDataLocation(), validTxnList, allMmDirs); } } else { checkMmLb(tbl); - handleRemoveMm(tbl.getDataLocation(), ids, allMmDirs); + handleRemoveMm(tbl.getDataLocation(), validTxnList, allMmDirs); } List targetPaths = new ArrayList<>(allMmDirs.size()); List targetPrefix = new ArrayList<>(allMmDirs.size()); - int prefixLen = ValidWriteIds.MM_PREFIX.length(); + int prefixLen = JavaUtils.DELTA_PREFIX.length(); for (int i = 0; i < allMmDirs.size(); ++i) { Path src = allMmDirs.get(i); Path tgt = src.getParent(); @@ -4072,7 +4074,7 @@ private void checkMmLb(Partition part) throws HiveException { } private void handleRemoveMm( - Path path, ValidWriteIds ids, List result) throws HiveException { + Path path, ValidTxnList validTxnList, List result) throws HiveException { // Note: doesn't take LB into account; that is not presently supported here (throws above). try { FileSystem fs = path.getFileSystem(conf); @@ -4082,10 +4084,10 @@ private void handleRemoveMm( ensureDelete(fs, childPath, "a non-directory file"); continue; } - Long writeId = ValidWriteIds.extractWriteId(childPath); + Long writeId = JavaUtils.extractTxnId(childPath); if (writeId == null) { ensureDelete(fs, childPath, "an unknown directory"); - } else if (!ids.isValid(writeId)) { + } else if (!validTxnList.isTxnValid(writeId)) { // Assume no concurrent active writes - we rely on locks here. We could check and fail. ensureDelete(fs, childPath, "an uncommitted directory"); } else { @@ -4112,9 +4114,19 @@ private static void ensureDelete(FileSystem fs, Path path, String what) throws I // We will move all the files in the table/partition directories into the first MM // directory, then commit the first write ID. List srcs = new ArrayList<>(), tgts = new ArrayList<>(); + long mmWriteId = 0; + try { + HiveTxnManager txnManager = SessionState.get().getTxnMgr(); + mmWriteId = txnManager.openTxn(new Context(conf), conf.getUser()); + txnManager.commitTxn(); + } catch (Exception e) { + String errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage(); + console.printError(errorMessage, "\n" + + org.apache.hadoop.util.StringUtils.stringifyException(e)); + } + int stmtId = 0; //todo + String mmDir = AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId); Hive db = getHive(); - long mmWriteId = db.getNextTableWriteId(tbl.getDbName(), tbl.getTableName()); - String mmDir = ValidWriteIds.getMmFilePrefix(mmWriteId); if (tbl.getPartitionKeys().size() > 0) { PartitionIterable parts = new PartitionIterable(db, tbl, null, HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); @@ -4137,15 +4149,7 @@ private static void ensureDelete(FileSystem fs, Path path, String what) throws I // Don't set inputs and outputs - the locks have already been taken so it's pointless. MoveWork mw = new MoveWork(null, null, null, null, false); mw.setMultiFilesDesc(new LoadMultiFilesDesc(srcs, tgts, true, null, null)); - ImportCommitWork icw = new ImportCommitWork(tbl.getDbName(), tbl.getTableName(), mmWriteId); - // TODO# this is hacky and will be gone with ACID. The problem is getting the write ID above - // modifies the table, but the table object above is preserved and modified without - // getting this change, so saving it will overwrite write ID. Ideally, when we save - // only specific fields, and not overwrite write ID every time we alter table. - // There's probably some way in DN to achieve that, but for now let's just update the - // original object here. This is safe due to DDL lock and the fact that converting - // the table to MM here from non-MM should mean no concurrent write ID updates. - tbl.setMmNextWriteId(mmWriteId + 1); + ImportCommitWork icw = new ImportCommitWork(tbl.getDbName(), tbl.getTableName(), mmWriteId, stmtId); Task mv = TaskFactory.get(mw, conf), ic = TaskFactory.get(icw, conf); mv.addDependentTask(ic); return Lists.>newArrayList(mv); @@ -4558,20 +4562,6 @@ private int createTable(Hive db, CreateTableDesc crtTbl) throws HiveException { Long mmWriteId = crtTbl.getInitialMmWriteId(); if (crtTbl.isCTAS() || mmWriteId != null) { Table createdTable = db.getTable(tbl.getDbName(), tbl.getTableName()); - if (mmWriteId != null) { - // TODO# this would be retrieved via ACID before the query runs; for now we rely on it - // being zero at start; we can't create a write ID before we create the table here. - long initialWriteId = db.getNextTableWriteId(tbl.getDbName(), tbl.getTableName()); - if (initialWriteId != mmWriteId) { - throw new HiveException("Initial write ID mismatch - expected " + mmWriteId - + " but got " + initialWriteId); - } - // CTAS create the table on a directory that already exists; import creates the table - // first (in parallel with copies?), then commits after all the loads. - if (crtTbl.isCTAS()) { - db.commitMmTableWrite(tbl, initialWriteId); - } - } if (crtTbl.isCTAS()) { DataContainer dc = new DataContainer(createdTable.getTTable()); SessionState.get().getLineageState().setLineage( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 4102d02..e743108 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -35,10 +35,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader; @@ -128,7 +127,6 @@ private transient StructObjectInspector outputOI; private transient Object[] row; - private transient Map writeIdMap; public FetchOperator(FetchWork work, JobConf job) throws HiveException { this(work, job, null, null); @@ -275,7 +273,7 @@ private boolean getNextPath() throws Exception { } FileSystem fs = currPath.getFileSystem(job); if (fs.exists(currPath)) { - if (extractWriteIdsForCurrentTable() != null) { + if (extractValidTxnList() != null) { return true; } for (FileStatus fStat : listStatusUnderPath(fs, currPath)) { @@ -403,12 +401,12 @@ private String processCurrPathForMmWriteIds(InputFormat inputFormat) throws IOEx if (inputFormat instanceof HiveInputFormat) { return StringUtils.escapeString(currPath.toString()); // No need to process here. } - ValidWriteIds ids = extractWriteIdsForCurrentTable(); - if (ids != null) { - Utilities.LOG14535.info("Observing " + currDesc.getTableName() + ": " + ids); + ValidTxnList validTxnList = extractValidTxnList(); + if (validTxnList != null) { + Utilities.LOG14535.info("Observing " + currDesc.getTableName() + ": " + validTxnList); } - Path[] dirs = HiveInputFormat.processPathsForMmRead(Lists.newArrayList(currPath), job, ids); + Path[] dirs = HiveInputFormat.processPathsForMmRead(Lists.newArrayList(currPath), job, validTxnList); if (dirs == null || dirs.length == 0) { return null; // No valid inputs. This condition is logged inside the call. } @@ -419,11 +417,16 @@ private String processCurrPathForMmWriteIds(InputFormat inputFormat) throws IOEx return str.toString(); } - private ValidWriteIds extractWriteIdsForCurrentTable() { - if (writeIdMap == null) { - writeIdMap = new HashMap(); + private ValidTxnList extractValidTxnList() { + ValidTxnList validTxnList; + if (org.apache.commons.lang.StringUtils.isBlank(currDesc.getTableName())) { + validTxnList = null; // i.e. not fetching from a table directly but from a temp location + } else { + String txnString = job.get(ValidTxnList.VALID_TXNS_KEY); + validTxnList = txnString == null ? new ValidReadTxnList() : + new ValidReadTxnList(txnString); } - return HiveInputFormat.extractWriteIds(writeIdMap, job, currDesc.getTableName()); + return validTxnList; } private FetchInputFormatSplit[] splitSampling(SplitSample splitSample, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java index bd822df..f6d27fb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java @@ -193,9 +193,4 @@ public void clearFetch() throws HiveException { fetch.clearFetchContext(); } } - - public Configuration getFetchConf() { - return fetch.getJobConf(); - } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 3ad1733..62a5f6b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -33,16 +33,11 @@ import java.util.Set; import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -176,6 +171,8 @@ int acidLastBucket = -1; int acidFileOffset = -1; private boolean isMmTable; + private Long txnId; + private int stmtId; public FSPaths(Path specPath, boolean isMmTable) { this.isMmTable = isMmTable; @@ -185,6 +182,8 @@ public FSPaths(Path specPath, boolean isMmTable) { } else { tmpPath = specPath; taskOutputTempPath = null; // Should not be used. + txnId = conf.getTransactionId(); + stmtId = conf.getStatementId(); } Utilities.LOG14535.info("new FSPaths for " + numFiles + " files, dynParts = " + bDynParts + ": tmpPath " + tmpPath + ", task path " + taskOutputTempPath @@ -327,7 +326,7 @@ public void initializeBucketPaths(int filesIdx, String taskId, boolean isNativeT } outPaths[filesIdx] = getTaskOutPath(taskId); } else { - String subdirPath = ValidWriteIds.getMmFilePrefix(conf.getMmWriteId()); + String subdirPath = AcidUtils.deltaSubdir(txnId, txnId, stmtId); if (unionPath != null) { // Create the union directory inside the MM directory. subdirPath += Path.SEPARATOR + unionPath; @@ -735,10 +734,9 @@ protected void createBucketForFileIdx(FSPaths fsp, int filesIdx) Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), jc); // only create bucket files only if no dynamic partitions, // buckets of dynamic partitions will be created for each newly created partition - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || - conf.getWriteType() == AcidUtils.Operation.INSERT_ONLY) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || conf.isMmTable()) { Path outPath = fsp.outPaths[filesIdx]; - if ((conf.getWriteType() == AcidUtils.Operation.INSERT_ONLY || conf.isMmTable()) + if (conf.isMmTable() && inheritPerms && !FileUtils.mkdir(fs, outPath.getParent(), inheritPerms, hconf)) { LOG.warn("Unable to create directory with inheritPerms: " + outPath); } @@ -884,8 +882,7 @@ public void process(Object row, int tag) throws HiveException { // for a given operator branch prediction should work quite nicely on it. // RecordUpdateer expects to get the actual row, not a serialized version of it. Thus we // pass the row rather than recordValue. - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || - conf.getWriteType() == AcidUtils.Operation.INSERT_ONLY) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || conf.isMmTable()) { rowOutWriters[writerOffset].write(recordValue); } else if (conf.getWriteType() == AcidUtils.Operation.INSERT) { fpaths.updaters[writerOffset].insert(conf.getTransactionId(), row); @@ -929,8 +926,7 @@ public void process(Object row, int tag) throws HiveException { protected boolean areAllTrue(boolean[] statsFromRW) { // If we are doing an acid operation they will always all be true as RecordUpdaters always // collect stats - if (conf.getWriteType() != AcidUtils.Operation.NOT_ACID && - conf.getWriteType() != AcidUtils.Operation.INSERT_ONLY) { + if (conf.getWriteType() != AcidUtils.Operation.NOT_ACID && !conf.isMmTable()) { return true; } for(boolean b : statsFromRW) { @@ -1074,8 +1070,7 @@ protected FSPaths getDynOutPaths(List row, String lbDirName) throws Hive // stats from the record writer and store in the previous fsp that is cached if (conf.isGatherStats() && isCollectRWStats) { SerDeStats stats = null; - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || - conf.getWriteType() == AcidUtils.Operation.INSERT_ONLY) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || conf.isMmTable()) { RecordWriter outWriter = prevFsp.outWriters[0]; if (outWriter != null) { stats = ((StatsProvidingRecordWriter) outWriter).getStats(); @@ -1177,8 +1172,7 @@ public void closeOp(boolean abort) throws HiveException { // record writer already gathers the statistics, it can simply return the // accumulated statistics which will be aggregated in case of spray writers if (conf.isGatherStats() && isCollectRWStats) { - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || - conf.getWriteType() == AcidUtils.Operation.INSERT_ONLY) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || conf.isMmTable()) { for (int idx = 0; idx < fsp.outWriters.length; idx++) { RecordWriter outWriter = fsp.outWriters[idx]; if (outWriter != null) { @@ -1208,7 +1202,7 @@ public void closeOp(boolean abort) throws HiveException { } if (conf.getMmWriteId() != null) { Utilities.writeMmCommitManifest( - commitPaths, specPath, fs, taskId, conf.getMmWriteId(), unionPath); + commitPaths, specPath, fs, taskId, conf.getMmWriteId(), conf.getStatementId(), unionPath); } // Only publish stats if this operator's flag was set to gather stats if (conf.isGatherStats()) { @@ -1264,7 +1258,7 @@ public void jobCloseOp(Configuration hconf, boolean success) MissingBucketsContext mbc = new MissingBucketsContext( conf.getTableInfo(), numBuckets, conf.getCompressed()); Utilities.handleMmTableFinalPath(specPath, unionSuffix, hconf, success, - dpLevels, lbLevels, mbc, conf.getMmWriteId(), reporter, conf.isMmCtas()); + dpLevels, lbLevels, mbc, conf.getMmWriteId(), conf.getStatementId(), reporter, conf.isMmCtas()); } } } catch (IOException e) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java index ba009b9..27db9a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java @@ -19,8 +19,6 @@ package org.apache.hadoop.hive.ql.exec; import org.apache.hadoop.hive.ql.DriverContext; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.util.StringUtils; @@ -35,16 +33,13 @@ public ImportCommitTask() { @Override public int execute(DriverContext driverContext) { - Utilities.LOG14535.info("Executing ImportCommit for " + work.getMmWriteId()); + Utilities.LOG14535.info("Executing ImportCommit for " + work.getTxnId()); try { if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { Utilities.LOG14535.info("Exiting due to explain"); return 0; } - Hive db = getHive(); - Table tbl = db.getTable(work.getDbName(), work.getTblName()); - db.commitMmTableWrite(tbl, work.getMmWriteId()); return 0; } catch (Exception e) { console.printError("Failed with exception " + e.getMessage(), "\n" diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java index f62d237..5b59635 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java @@ -26,16 +26,22 @@ public class ImportCommitWork implements Serializable { private static final long serialVersionUID = 1L; private String dbName, tblName; - private long mmWriteId; + private long txnId; + private int stmtId; - public ImportCommitWork(String dbName, String tblName, long mmWriteId) { - this.mmWriteId = mmWriteId; + public ImportCommitWork(String dbName, String tblName, long txnId, int stmtId) { + this.txnId = txnId; + this.stmtId = stmtId; this.dbName = dbName; this.tblName = tblName; } - public long getMmWriteId() { - return mmWriteId; + public long getTxnId() { + return txnId; + } + + public int getStmtId() { + return stmtId; } public String getDbName() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 29b72a0..1e56ade 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -38,7 +38,6 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.model.MMasterKey; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; @@ -343,10 +342,10 @@ public int execute(DriverContext driverContext) { checkFileFormats(db, tbd, table); - boolean isAcid = work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.INSERT_ONLY; - if (tbd.isMmTable() && isAcid) { - throw new HiveException("ACID and MM are not supported"); + boolean isFullAcidOp = work.getLoadTableWork().getWriteType() == AcidUtils.Operation.UPDATE || + work.getLoadTableWork().getWriteType() == AcidUtils.Operation.DELETE; + if (tbd.isMmTable() && isFullAcidOp) { + throw new HiveException("UPDATE and DELETE operations are not supported for MM table"); } // Create a data container @@ -359,8 +358,8 @@ public int execute(DriverContext driverContext) { "Only single-partition LoadTableDesc can skip commiting write ID"); } db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getReplace(), - work.isSrcLocal(), isSkewedStoredAsDirs(tbd), isAcid, hasFollowingStatsTask(), - tbd.getMmWriteId()); + work.isSrcLocal(), isSkewedStoredAsDirs(tbd), isFullAcidOp, hasFollowingStatsTask(), + tbd.getTxnId(), tbd.getStmtId()); if (work.getOutputs() != null) { DDLTask.addIfAbsentByName(new WriteEntity(table, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs()); @@ -417,13 +416,12 @@ private DataContainer handleStaticParts(Hive db, Table table, LoadTableDesc tbd, db.validatePartitionNameCharacters(partVals); Utilities.LOG14535.info("loadPartition called from " + tbd.getSourcePath() + " into " + tbd.getTable().getTableName()); - boolean isCommitMmWrite = tbd.isCommitMmWrite(); db.loadSinglePartition(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getReplace(), tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), - (work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.INSERT_ONLY), - hasFollowingStatsTask(), tbd.getMmWriteId(), isCommitMmWrite); + work.getLoadTableWork().getWriteType() == AcidUtils.Operation.UPDATE || + work.getLoadTableWork().getWriteType() == AcidUtils.Operation.DELETE, + hasFollowingStatsTask(), tbd.getTxnId(), tbd.getStmtId()); Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); // See the comment inside updatePartitionBucketSortColumns. @@ -467,11 +465,10 @@ private DataContainer handleDynParts(Hive db, Table table, LoadTableDesc tbd, tbd.getReplace(), dpCtx.getNumDPCols(), (tbd.getLbCtx() == null) ? 0 : tbd.getLbCtx().calculateListBucketingLevel(), - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.INSERT_ONLY, - SessionState.get().getTxnMgr().getCurrentTxnId(), hasFollowingStatsTask(), - work.getLoadTableWork().getWriteType(), - tbd.getMmWriteId()); + work.getLoadTableWork().getWriteType() == AcidUtils.Operation.UPDATE || + work.getLoadTableWork().getWriteType() == AcidUtils.Operation.DELETE, + SessionState.get().getTxnMgr().getCurrentTxnId(), tbd.getStmtId(), hasFollowingStatsTask(), + work.getLoadTableWork().getWriteType()); // publish DP columns to its subscribers if (dps != null && dps.size() > 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 5b5ddc3..9cff5ad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -67,9 +67,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.Deflater; @@ -99,8 +96,8 @@ import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.common.StringInternUtils; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; @@ -203,67 +200,14 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.Shell; import org.apache.hive.common.util.ACLConfigurationParser; import org.apache.hive.common.util.ReflectionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.esotericsoftware.kryo.Kryo; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import java.beans.DefaultPersistenceDelegate; -import java.beans.Encoder; -import java.beans.Expression; -import java.beans.Statement; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInput; -import java.io.EOFException; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.Serializable; -import java.net.URI; -import java.net.URL; -import java.net.URLClassLoader; -import java.net.URLDecoder; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.SQLFeatureNotSupportedException; -import java.sql.SQLTransientException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Collection; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Random; -import java.util.Set; -import java.util.UUID; import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.zip.Deflater; -import java.util.zip.DeflaterOutputStream; -import java.util.zip.InflaterInputStream; /** @@ -1590,7 +1534,7 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(), numBuckets = (conf != null && conf.getTable() != null) ? conf.getTable().getNumBuckets() : 0; - return removeTempOrDuplicateFiles(fs, fileStats, dpLevels, numBuckets, hconf, null); + return removeTempOrDuplicateFiles(fs, fileStats, dpLevels, numBuckets, hconf, null, 0); } private static boolean removeEmptyDpDirectory(FileSystem fs, Path path) throws IOException { @@ -1606,7 +1550,7 @@ private static boolean removeEmptyDpDirectory(FileSystem fs, Path path) throws I } public static List removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats, - int dpLevels, int numBuckets, Configuration hconf, Long mmWriteId) throws IOException { + int dpLevels, int numBuckets, Configuration hconf, Long txnId, int stmtId) throws IOException { if (fileStats == null) { return null; } @@ -1625,9 +1569,9 @@ private static boolean removeEmptyDpDirectory(FileSystem fs, Path path) throws I } FileStatus[] items = fs.listStatus(path); - if (mmWriteId != null) { + if (txnId != null) { Path mmDir = parts[i].getPath(); - if (!mmDir.getName().equals(ValidWriteIds.getMmFilePrefix(mmWriteId))) { + if (!mmDir.getName().equals(AcidUtils.deltaSubdir(txnId, txnId, stmtId))) { throw new IOException("Unexpected non-MM directory name " + mmDir); } Utilities.LOG14535.info("removeTempOrDuplicateFiles processing files in MM directory " + mmDir); @@ -1642,14 +1586,14 @@ private static boolean removeEmptyDpDirectory(FileSystem fs, Path path) throws I if (items.length == 0) { return result; } - if (mmWriteId == null) { + if (txnId == null) { taskIDToFile = removeTempOrDuplicateFilesNonMm(items, fs); } else { if (items.length > 1) { throw new IOException("Unexpected directories for non-DP MM: " + Arrays.toString(items)); } Path mmDir = items[0].getPath(); - if (!mmDir.getName().equals(ValidWriteIds.getMmFilePrefix(mmWriteId))) { + if (!mmDir.getName().equals(AcidUtils.deltaSubdir(txnId, txnId, stmtId))) { throw new IOException("Unexpected non-MM directory " + mmDir); } Utilities.LOG14535.info( @@ -3993,10 +3937,10 @@ private static void tryDelete(FileSystem fs, Path path) { } public static Path[] getMmDirectoryCandidates(FileSystem fs, Path path, int dpLevels, - int lbLevels, PathFilter filter, long mmWriteId, Configuration conf) throws IOException { + int lbLevels, PathFilter filter, long txnId, int stmtId, Configuration conf) throws IOException { int skipLevels = dpLevels + lbLevels; if (filter == null) { - filter = new ValidWriteIds.IdPathFilter(mmWriteId, true); + filter = new JavaUtils.IdPathFilter(txnId, stmtId, true); } if (skipLevels == 0) { return statusToPath(fs.listStatus(path, filter)); @@ -4004,7 +3948,7 @@ private static void tryDelete(FileSystem fs, Path path) { if (HiveConf.getBoolVar(conf, ConfVars.HIVE_MM_AVOID_GLOBSTATUS_ON_S3) && isS3(fs)) { return getMmDirectoryCandidatesRecursive(fs, path, skipLevels, filter); } - return getMmDirectoryCandidatesGlobStatus(fs, path, skipLevels, filter, mmWriteId); + return getMmDirectoryCandidatesGlobStatus(fs, path, skipLevels, filter, txnId, stmtId); } private static boolean isS3(FileSystem fs) { @@ -4072,22 +4016,22 @@ private static boolean isS3(FileSystem fs) { } private static Path[] getMmDirectoryCandidatesGlobStatus(FileSystem fs, - Path path, int skipLevels, PathFilter filter, long mmWriteId) throws IOException { + Path path, int skipLevels, PathFilter filter, long txnId, int stmtId) throws IOException { StringBuilder sb = new StringBuilder(path.toUri().getPath()); for (int i = 0; i < skipLevels; i++) { sb.append(Path.SEPARATOR).append("*"); } - sb.append(Path.SEPARATOR).append(ValidWriteIds.getMmFilePrefix(mmWriteId)); + sb.append(Path.SEPARATOR).append(AcidUtils.deltaSubdir(txnId, txnId, stmtId)); Path pathPattern = new Path(path, sb.toString()); Utilities.LOG14535.info("Looking for files via: " + pathPattern); return statusToPath(fs.globStatus(pathPattern, filter)); } private static void tryDeleteAllMmFiles(FileSystem fs, Path specPath, Path manifestDir, - int dpLevels, int lbLevels, String unionSuffix, ValidWriteIds.IdPathFilter filter, - long mmWriteId, Configuration conf) throws IOException { + int dpLevels, int lbLevels, JavaUtils.IdPathFilter filter, + long txnId, int stmtId, Configuration conf) throws IOException { Path[] files = getMmDirectoryCandidates( - fs, specPath, dpLevels, lbLevels, filter, mmWriteId, conf); + fs, specPath, dpLevels, lbLevels, filter, txnId, stmtId, conf); if (files != null) { for (Path path : files) { Utilities.LOG14535.info("Deleting " + path + " on failure"); @@ -4100,10 +4044,10 @@ private static void tryDeleteAllMmFiles(FileSystem fs, Path specPath, Path manif public static void writeMmCommitManifest(List commitPaths, Path specPath, FileSystem fs, - String taskId, Long mmWriteId, String unionSuffix) throws HiveException { + String taskId, Long txnId, int stmtId, String unionSuffix) throws HiveException { if (commitPaths.isEmpty()) return; // We assume one FSOP per task (per specPath), so we create it in specPath. - Path manifestPath = getManifestDir(specPath, mmWriteId, unionSuffix); + Path manifestPath = getManifestDir(specPath, txnId, stmtId, unionSuffix); manifestPath = new Path(manifestPath, taskId + MANIFEST_EXTENSION); Utilities.LOG14535.info("Writing manifest to " + manifestPath + " with " + commitPaths); try { @@ -4122,8 +4066,8 @@ public static void writeMmCommitManifest(List commitPaths, Path specPath, } } - private static Path getManifestDir(Path specPath, long mmWriteId, String unionSuffix) { - Path manifestPath = new Path(specPath, "_tmp." + ValidWriteIds.getMmFilePrefix(mmWriteId)); + private static Path getManifestDir(Path specPath, long txnId, int stmtId, String unionSuffix) { + Path manifestPath = new Path(specPath, "_tmp." + AcidUtils.deltaSubdir(txnId, txnId, stmtId)); return (unionSuffix == null) ? manifestPath : new Path(manifestPath, unionSuffix); } @@ -4139,18 +4083,18 @@ public MissingBucketsContext(TableDesc tableInfo, int numBuckets, boolean isComp } public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Configuration hconf, - boolean success, int dpLevels, int lbLevels, MissingBucketsContext mbc, long mmWriteId, + boolean success, int dpLevels, int lbLevels, MissingBucketsContext mbc, long txnId, int stmtId, Reporter reporter, boolean isMmCtas) throws IOException, HiveException { FileSystem fs = specPath.getFileSystem(hconf); - Path manifestDir = getManifestDir(specPath, mmWriteId, unionSuffix); + Path manifestDir = getManifestDir(specPath, txnId, stmtId, unionSuffix); if (!success) { - ValidWriteIds.IdPathFilter filter = new ValidWriteIds.IdPathFilter(mmWriteId, true); + JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true); tryDeleteAllMmFiles(fs, specPath, manifestDir, dpLevels, lbLevels, - unionSuffix, filter, mmWriteId, hconf); + filter, txnId, stmtId, hconf); return; } - Utilities.LOG14535.info("Looking for manifests in: " + manifestDir + " (" + mmWriteId + ")"); + Utilities.LOG14535.info("Looking for manifests in: " + manifestDir + " (" + txnId + ")"); // TODO# may be wrong if there are no splits (empty insert/CTAS) List manifests = new ArrayList<>(); if (fs.exists(manifestDir)) { @@ -4170,14 +4114,14 @@ public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Con } Utilities.LOG14535.info("Looking for files in: " + specPath); - ValidWriteIds.IdPathFilter filter = new ValidWriteIds.IdPathFilter(mmWriteId, true); + JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true); if (isMmCtas && !fs.exists(specPath)) { // TODO: do we also need to do this when creating an empty partition from select? Utilities.LOG14535.info("Creating table directory for CTAS with no output at " + specPath); FileUtils.mkdir(fs, specPath, hconf); } Path[] files = getMmDirectoryCandidates( - fs, specPath, dpLevels, lbLevels, filter, mmWriteId, hconf); + fs, specPath, dpLevels, lbLevels, filter, txnId, stmtId, hconf); ArrayList mmDirectories = new ArrayList<>(); if (files != null) { for (Path path : files) { @@ -4233,7 +4177,7 @@ public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Con finalResults[i] = new PathOnlyFileStatus(mmDirectories.get(i)); } List emptyBuckets = Utilities.removeTempOrDuplicateFiles( - fs, finalResults, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, mmWriteId); + fs, finalResults, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, txnId, stmtId); // create empty buckets if necessary if (emptyBuckets.size() > 0) { assert mbc != null; @@ -4284,7 +4228,7 @@ private static void deleteUncommitedFile(Path childPath, FileSystem fs) * if the entire directory is valid (has no uncommitted/temporary files). */ public static List getValidMmDirectoriesFromTableOrPart(Path path, Configuration conf, - ValidWriteIds ids, int lbLevels) throws IOException { + ValidTxnList validTxnList, int lbLevels) throws IOException { Utilities.LOG14535.info("Looking for valid MM paths under " + path); // NULL means this directory is entirely valid. List result = null; @@ -4294,8 +4238,8 @@ private static void deleteUncommitedFile(Path childPath, FileSystem fs) for (int i = 0; i < children.length; ++i) { FileStatus file = children[i]; Path childPath = file.getPath(); - Long writeId = ValidWriteIds.extractWriteId(childPath); - if (!file.isDirectory() || writeId == null || !ids.isValid(writeId)) { + Long txnId = JavaUtils.extractTxnId(childPath); + if (!file.isDirectory() || txnId == null || !validTxnList.isTxnValid(txnId)) { Utilities.LOG14535.info("Skipping path " + childPath); if (result == null) { result = new ArrayList<>(children.length - 1); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 740488c..902caa3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -276,9 +276,8 @@ static long parseBase(Path path) { return result; } - // INSERT_ONLY is a special operation which we only support INSERT operations, no UPDATE/DELETE public enum Operation { - NOT_ACID, INSERT, UPDATE, DELETE, INSERT_ONLY + NOT_ACID, INSERT, UPDATE, DELETE } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index c697407..2d71ee4 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -20,11 +20,9 @@ import java.io.DataInput; import java.io.DataOutput; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; @@ -34,8 +32,11 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.Map.Entry; -import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StringInternUtils; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,8 +45,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; @@ -424,12 +423,11 @@ protected void init(JobConf job) { */ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, JobConf conf, InputFormat inputFormat, Class inputFormatClass, int splits, - TableDesc table, Map writeIdMap, List result) + TableDesc table, List result) throws IOException { - ValidWriteIds writeIds = extractWriteIds(writeIdMap, conf, table.getTableName()); - if (writeIds != null) { - Utilities.LOG14535.info("Observing " + table.getTableName() + ": " + writeIds); - } + String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); + ValidTxnList validTxnList = txnString == null ? new ValidReadTxnList() : + new ValidReadTxnList(txnString); Utilities.copyTablePropertiesToConf(table, conf); @@ -437,7 +435,7 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job pushFilters(conf, tableScan); } - Path[] finalDirs = processPathsForMmRead(dirs, conf, writeIds); + Path[] finalDirs = processPathsForMmRead(dirs, conf, validTxnList); if (finalDirs == null) { return; // No valid inputs. } @@ -462,13 +460,13 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job } public static Path[] processPathsForMmRead(List dirs, JobConf conf, - ValidWriteIds writeIds) throws IOException { - if (writeIds == null) { + ValidTxnList validTxnList) throws IOException { + if (validTxnList == null) { return dirs.toArray(new Path[dirs.size()]); } else { List finalPaths = new ArrayList<>(dirs.size()); for (Path dir : dirs) { - processForWriteIds(dir, conf, writeIds, finalPaths); + processForWriteIds(dir, conf, validTxnList, finalPaths); } if (finalPaths.isEmpty()) { LOG.warn("No valid inputs found in " + dirs); @@ -479,7 +477,7 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job } private static void processForWriteIds(Path dir, JobConf conf, - ValidWriteIds writeIds, List finalPaths) throws IOException { + ValidTxnList validTxnList, List finalPaths) throws IOException { FileSystem fs = dir.getFileSystem(conf); Utilities.LOG14535.warn("Checking " + dir + " (root) for inputs"); // Ignore nullscan-optimized paths. @@ -490,17 +488,17 @@ private static void processForWriteIds(Path dir, JobConf conf, FileStatus[] files = fs.listStatus(dir); // TODO: batch? LinkedList subdirs = new LinkedList<>(); for (FileStatus file : files) { - handleNonMmDirChild(file, writeIds, subdirs, finalPaths); + handleNonMmDirChild(file, validTxnList, subdirs, finalPaths); } while (!subdirs.isEmpty()) { Path subdir = subdirs.poll(); for (FileStatus file : fs.listStatus(subdir)) { - handleNonMmDirChild(file, writeIds, subdirs, finalPaths); + handleNonMmDirChild(file, validTxnList, subdirs, finalPaths); } } } - private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, + private static void handleNonMmDirChild(FileStatus file, ValidTxnList validTxnList, LinkedList subdirs, List finalPaths) { Path path = file.getPath(); Utilities.LOG14535.warn("Checking " + path + " for inputs"); @@ -508,12 +506,12 @@ private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, Utilities.LOG14535.warn("Ignoring a file not in MM directory " + path); return; } - Long writeId = ValidWriteIds.extractWriteId(path); - if (writeId == null) { + Long txnId = JavaUtils.extractTxnId(path); + if (txnId == null) { subdirs.add(path); return; } - if (!writeIds.isValid(writeId)) { + if (!validTxnList.isTxnValid(txnId)) { Utilities.LOG14535.warn("Ignoring an uncommitted directory " + path); return; } @@ -565,7 +563,6 @@ private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, StringBuilder readColumnNamesBuffer = new StringBuilder(newjob. get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "")); // for each dir, get the InputFormat, and do getSplits. - Map writeIdMap = new HashMap<>(); for (Path dir : dirs) { PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir); Class inputFormatClass = part.getInputFileFormatClass(); @@ -616,7 +613,7 @@ private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, addSplitsForGroup(currentDirs, currentTableScan, newjob, getInputFormatFromCache(currentInputFormatClass, job), currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length), - currentTable, writeIdMap, result); + currentTable, result); } currentDirs.clear(); @@ -638,7 +635,7 @@ private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, addSplitsForGroup(currentDirs, currentTableScan, newjob, getInputFormatFromCache(currentInputFormatClass, job), currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length), - currentTable, writeIdMap, result); + currentTable, result); } Utilities.clearWorkMapForConf(job); @@ -649,19 +646,6 @@ private static void handleNonMmDirChild(FileStatus file, ValidWriteIds writeIds, return result.toArray(new HiveInputSplit[result.size()]); } - public static ValidWriteIds extractWriteIds(Map writeIdMap, - JobConf newjob, String tableName) { - if (StringUtils.isBlank(tableName)) return null; - ValidWriteIds writeIds = writeIdMap.get(tableName); - if (writeIds == null) { - writeIds = ValidWriteIds.createFromConf(newjob, tableName); - writeIdMap.put(tableName, writeIds != null ? writeIds : ValidWriteIds.NO_WRITE_IDS); - } else if (writeIds == ValidWriteIds.NO_WRITE_IDS) { - writeIds = null; - } - return writeIds; - } - private void pushProjection(final JobConf newjob, final StringBuilder readColumnsBuffer, final StringBuilder readColumnNamesBuffer) { String readColIds = readColumnsBuffer.toString(); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index ea87cb4..fffca90 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -32,7 +32,6 @@ import java.io.PrintStream; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -52,7 +51,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.ConcurrentHashMap; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; @@ -70,9 +68,9 @@ import org.apache.hadoop.hive.common.BlobStorageUtils; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveStatsUtils; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable; import org.apache.hadoop.hive.conf.HiveConf; @@ -104,7 +102,6 @@ import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest; import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse; -import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsResult; import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; import org.apache.hadoop.hive.metastore.api.HiveObjectRef; import org.apache.hadoop.hive.metastore.api.HiveObjectType; @@ -157,7 +154,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -1581,27 +1577,13 @@ public Database getDatabaseCurrent() throws HiveException { public void loadSinglePartition(Path loadPath, String tableName, Map partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcid, - boolean hasFollowingStatsTask, Long mmWriteId, boolean isCommitMmWrite) + boolean hasFollowingStatsTask, Long txnId, int stmtId) throws HiveException { Table tbl = getTable(tableName); - boolean isMmTableWrite = (mmWriteId != null); + boolean isMmTableWrite = (txnId != null); Preconditions.checkState(isMmTableWrite == MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())); loadPartition(loadPath, tbl, partSpec, replace, inheritTableSpecs, - isSkewedStoreAsSubdir, isSrcLocal, isAcid, hasFollowingStatsTask, mmWriteId); - if (isMmTableWrite && isCommitMmWrite) { - // The assumption behind committing here is that this partition is the only one outputted. - commitMmTableWrite(tbl, mmWriteId); - } - } - - - public void commitMmTableWrite(Table tbl, Long mmWriteId) - throws HiveException { - try { - getMSC().finalizeTableWrite(tbl.getDbName(), tbl.getTableName(), mmWriteId, true); - } catch (TException e) { - throw new HiveException(e); - } + isSkewedStoreAsSubdir, isSrcLocal, isAcid, hasFollowingStatsTask, txnId, stmtId); } /** @@ -1627,7 +1609,7 @@ public void commitMmTableWrite(Table tbl, Long mmWriteId) */ public Partition loadPartition(Path loadPath, Table tbl, Map partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, - boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask, Long mmWriteId) + boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask, Long txnId, int stmtId) throws HiveException { Path tblDataLocationPath = tbl.getDataLocation(); try { @@ -1669,32 +1651,32 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin("MoveTask", "FileMoves"); // TODO: this assumes both paths are qualified; which they are, currently. - if (mmWriteId != null && loadPath.equals(newPartPath)) { + if (txnId != null && loadPath.equals(newPartPath)) { // MM insert query, move itself is a no-op. Utilities.LOG14535.info("not moving " + loadPath + " to " + newPartPath + " (MM)"); assert !isAcid; if (areEventsForDmlNeeded(tbl, oldPart)) { - newFiles = listFilesCreatedByQuery(loadPath, mmWriteId); + newFiles = listFilesCreatedByQuery(loadPath, txnId, stmtId); } Utilities.LOG14535.info("maybe deleting stuff from " + oldPartPath + " (new " + newPartPath + ") for replace"); if (replace && oldPartPath != null) { deleteOldPathForReplace(newPartPath, oldPartPath, getConf(), - new ValidWriteIds.IdPathFilter(mmWriteId, false, true), mmWriteId != null, + new JavaUtils.IdPathFilter(txnId, stmtId, false, true), true, tbl.isStoredAsSubDirectories() ? tbl.getSkewedColNames().size() : 0); } } else { // Either a non-MM query, or a load into MM table from an external source. PathFilter filter = FileUtils.HIDDEN_FILES_PATH_FILTER; Path destPath = newPartPath; - if (mmWriteId != null) { + if (txnId != null) { // We will load into MM directory, and delete from the parent if needed. - destPath = new Path(destPath, ValidWriteIds.getMmFilePrefix(mmWriteId)); - filter = replace ? new ValidWriteIds.IdPathFilter(mmWriteId, false, true) : filter; + destPath = new Path(destPath, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); + filter = replace ? new JavaUtils.IdPathFilter(txnId, stmtId, false, true) : filter; } Utilities.LOG14535.info("moving " + loadPath + " to " + destPath); if (replace || (oldPart == null && !isAcid)) { replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, getConf(), - isSrcLocal, filter, mmWriteId != null); + isSrcLocal, filter, txnId != null); } else { if (areEventsForDmlNeeded(tbl, oldPart)) { newFiles = Collections.synchronizedList(new ArrayList()); @@ -1779,9 +1761,9 @@ private boolean areEventsForDmlNeeded(Table tbl, Partition oldPart) { return conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null; } - private List listFilesCreatedByQuery(Path loadPath, long mmWriteId) throws HiveException { + private List listFilesCreatedByQuery(Path loadPath, long txnId, int stmtId) throws HiveException { List newFiles = new ArrayList(); - final String filePrefix = ValidWriteIds.getMmFilePrefix(mmWriteId); + final String filePrefix = AcidUtils.deltaSubdir(txnId, txnId, stmtId); FileStatus[] srcs; FileSystem srcFs; try { @@ -1944,11 +1926,11 @@ private void constructOneLBLocationMap(FileStatus fSta, * @throws HiveException */ private Set getValidPartitionsInPath( - int numDP, int numLB, Path loadPath, Long mmWriteId) throws HiveException { + int numDP, int numLB, Path loadPath, Long txnId, int stmtId) throws HiveException { Set validPartitions = new HashSet(); try { FileSystem fs = loadPath.getFileSystem(conf); - if (mmWriteId == null) { + if (txnId == null) { FileStatus[] leafStatus = HiveStatsUtils.getFileStatusRecurse(loadPath, numDP, fs); // Check for empty partitions for (FileStatus s : leafStatus) { @@ -1963,7 +1945,7 @@ private void constructOneLBLocationMap(FileStatus fSta, // The non-MM path only finds new partitions, as it is looking at the temp path. // To produce the same effect, we will find all the partitions affected by this write ID. Path[] leafStatus = Utilities.getMmDirectoryCandidates( - fs, loadPath, numDP, numLB, null, mmWriteId, conf); + fs, loadPath, numDP, numLB, null, txnId, stmtId, conf); for (Path p : leafStatus) { Path dpPath = p.getParent(); // Skip the MM directory that we have found. for (int i = 0; i < numLB; ++i) { @@ -2009,8 +1991,8 @@ private void constructOneLBLocationMap(FileStatus fSta, */ public Map, Partition> loadDynamicPartitions(final Path loadPath, final String tableName, final Map partSpec, final boolean replace, - final int numDP, final int numLB, final boolean isAcid, final long txnId, - final boolean hasFollowingStatsTask, final AcidUtils.Operation operation, final Long mmWriteId) + final int numDP, final int numLB, final boolean isAcid, final long txnId, final int stmtId, + final boolean hasFollowingStatsTask, final AcidUtils.Operation operation) throws HiveException { final Map, Partition> partitionsMap = @@ -2025,7 +2007,7 @@ private void constructOneLBLocationMap(FileStatus fSta, // Get all valid partition paths and existing partitions for them (if any) final Table tbl = getTable(tableName); - final Set validPartitions = getValidPartitionsInPath(numDP, numLB, loadPath, mmWriteId); + final Set validPartitions = getValidPartitionsInPath(numDP, numLB, loadPath, txnId, stmtId); final int partsToLoad = validPartitions.size(); final AtomicInteger partitionsLoaded = new AtomicInteger(0); @@ -2059,7 +2041,7 @@ public Void call() throws Exception { Utilities.LOG14535.info("loadPartition called for DPP from " + partPath + " to " + tbl.getTableName()); Partition newPartition = loadPartition(partPath, tbl, fullPartSpec, replace, true, numLB > 0, - false, isAcid, hasFollowingStatsTask, mmWriteId); + false, isAcid, hasFollowingStatsTask, txnId, stmtId); partitionsMap.put(fullPartSpec, newPartition); if (inPlaceEligible) { @@ -2091,10 +2073,6 @@ public Void call() throws Exception { for (Future future : futures) { future.get(); } - if (mmWriteId != null) { - // Commit after we have processed all the partitions. - commitMmTableWrite(tbl, mmWriteId); - } } catch (InterruptedException | ExecutionException e) { LOG.debug("Cancelling " + futures.size() + " dynamic loading tasks"); //cancel other futures @@ -2145,8 +2123,7 @@ public Void call() throws Exception { */ public void loadTable(Path loadPath, String tableName, boolean replace, boolean isSrcLocal, boolean isSkewedStoreAsSubdir, boolean isAcid, boolean hasFollowingStatsTask, - Long mmWriteId) throws HiveException { - + Long txnId, int stmtId) throws HiveException { List newFiles = null; Table tbl = getTable(tableName); HiveConf sessionConf = SessionState.getSessionConf(); @@ -2154,28 +2131,28 @@ public void loadTable(Path loadPath, String tableName, boolean replace, boolean newFiles = Collections.synchronizedList(new ArrayList()); } // TODO: this assumes both paths are qualified; which they are, currently. - if (mmWriteId != null && loadPath.equals(tbl.getPath())) { + if (txnId != null && loadPath.equals(tbl.getPath())) { Utilities.LOG14535.info("not moving " + loadPath + " to " + tbl.getPath()); if (replace) { Path tableDest = tbl.getPath(); deleteOldPathForReplace(tableDest, tableDest, sessionConf, - new ValidWriteIds.IdPathFilter(mmWriteId, false, true), mmWriteId != null, + new JavaUtils.IdPathFilter(txnId, stmtId, false, true), true, tbl.isStoredAsSubDirectories() ? tbl.getSkewedColNames().size() : 0); } - newFiles = listFilesCreatedByQuery(loadPath, mmWriteId); + newFiles = listFilesCreatedByQuery(loadPath, txnId, stmtId); } else { // Either a non-MM query, or a load into MM table from an external source. Path tblPath = tbl.getPath(), destPath = tblPath; PathFilter filter = FileUtils.HIDDEN_FILES_PATH_FILTER; - if (mmWriteId != null) { + if (txnId != null) { // We will load into MM directory, and delete from the parent if needed. - destPath = new Path(destPath, ValidWriteIds.getMmFilePrefix(mmWriteId)); - filter = replace ? new ValidWriteIds.IdPathFilter(mmWriteId, false, true) : filter; + destPath = new Path(destPath, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); + filter = replace ? new JavaUtils.IdPathFilter(txnId, stmtId, false, true) : filter; } Utilities.LOG14535.info("moving " + loadPath + " to " + tblPath + " (replace = " + replace + ")"); if (replace) { replaceFiles(tblPath, loadPath, destPath, tblPath, - sessionConf, isSrcLocal, filter, mmWriteId != null); + sessionConf, isSrcLocal, filter, txnId != null); } else { try { FileSystem fs = tbl.getDataLocation().getFileSystem(sessionConf); @@ -2216,11 +2193,6 @@ public void loadTable(Path loadPath, String tableName, boolean replace, boolean } catch (InvalidOperationException e) { throw new HiveException(e); } - - if (mmWriteId != null) { - commitMmTableWrite(tbl, mmWriteId); - } - fireInsertEvent(tbl, null, newFiles); } @@ -4267,25 +4239,4 @@ public void addForeignKey(List foreignKeyCols) throw new HiveException(e); } } - - public long getNextTableWriteId(String dbName, String tableName) throws HiveException { - try { - return getMSC().getNextTableWriteId(dbName, tableName); - } catch (Exception e) { - throw new HiveException(e); - } - } - - public ValidWriteIds getValidWriteIdsForTable( - String dbName, String tableName) throws HiveException { - try { - // TODO: decode ID ranges here if we use that optimization - GetValidWriteIdsResult result = getMSC().getValidWriteIds(dbName, tableName); - return new ValidWriteIds(result.getLowWatermarkId(), result.getHighWatermarkId(), - result.isSetAreIdsValid() && result.isAreIdsValid(), - result.isSetIds() ? new HashSet(result.getIds()) : null); - } catch (Exception e) { - throw new HiveException(e); - } - } -}; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 38157a6..05df611 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -1640,7 +1640,8 @@ public static MapWork createMergeTask(FileSinkDesc fsInputDesc, Path finalName, } else { fmd = new OrcFileMergeDesc(); } - fmd.setMmWriteId(fsInputDesc.getMmWriteId()); + fmd.setTxnId(fsInputDesc.getMmWriteId()); + fmd.setStmtId(fsInputDesc.getStatementId()); fmd.setDpCtx(fsInputDesc.getDynPartCtx()); fmd.setOutputPath(finalName); fmd.setHasDynamicPartitions(work.hasDynamicPartitions()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java index 64db005..b50f664 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java @@ -86,7 +86,7 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) ParseContext pc = physicalContext.getParseContext(); if (pc.getLoadTableWork() != null) { for (LoadTableDesc ltd : pc.getLoadTableWork()) { - if (ltd.getMmWriteId() == null) continue; + if (ltd.getTxnId() == null) continue; // See the path in FSOP that calls fs.exists on finalPath. LOG.debug("Not using skew join because the destination table " + ltd.getTable().getTableName() + " is an insert_only table"); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index f762fee..110a03f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -116,7 +116,8 @@ */ protected Set acidFileSinks = new HashSet(); - // whether any ACID table is involved in a query + // whether any ACID table or Insert-only (mm) table is involved in a query + // They both require DbTxnManager and both need to recordValidTxns when acquiring locks in Driver protected boolean acidInQuery; public static int HIVE_COLUMN_ORDER_ASC = 1; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java index b5820d6..f33252c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java @@ -18,15 +18,8 @@ package org.apache.hadoop.hive.ql.parse; -import org.apache.hadoop.hive.ql.metadata.HiveException; - -import org.apache.hadoop.hive.common.ValidWriteIds; - -import java.util.List; - -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.ql.metadata.HiveException; import java.io.FileNotFoundException; import java.io.IOException; @@ -41,13 +34,17 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ReplCopyTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.metadata.Hive; @@ -212,8 +209,6 @@ public static void prepareExport( int lbLevels = isMmTable && ts.tableHandle.isStoredAsSubDirectories() ? ts.tableHandle.getSkewedColNames().size() : 0; - ValidWriteIds ids = isMmTable ? db.getValidWriteIdsForTable( - ts.tableHandle.getDbName(), ts.tableHandle.getTableName()) : null; if (ts.tableHandle.isPartitioned()) { for (Partition partition : partitions) { Path fromPath = partition.getDataLocation(); @@ -227,7 +222,7 @@ public static void prepareExport( } copyTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toPartPath, conf); } else { - CopyWork cw = createCopyWork(isMmTable, lbLevels, ids, fromPath, toPartPath, conf); + CopyWork cw = createCopyWork(isMmTable, lbLevels, new ValidReadTxnList(), fromPath, toPartPath, conf); copyTask = TaskFactory.get(cw, conf); } rootTasks.add(copyTask); @@ -246,7 +241,7 @@ public static void prepareExport( copyTask = ReplCopyTask.getDumpCopyTask(replicationSpec, fromPath, toDataPath, conf); } else { // TODO# master merge - did master remove this path or did it never exit? we need it for MM - CopyWork cw = createCopyWork(isMmTable, lbLevels, ids, fromPath, toDataPath, conf); + CopyWork cw = createCopyWork(isMmTable, lbLevels, new ValidReadTxnList(), fromPath, toDataPath, conf); copyTask = TaskFactory.get(cw, conf); } rootTasks.add(copyTask); @@ -258,14 +253,14 @@ public static void prepareExport( } } - private static CopyWork createCopyWork(boolean isMmTable, int lbLevels, ValidWriteIds ids, + private static CopyWork createCopyWork(boolean isMmTable, int lbLevels, ValidTxnList validTxnList, Path fromPath, Path toDataPath, Configuration conf) throws IOException { List validPaths = null; if (isMmTable) { fromPath = fromPath.getFileSystem(conf).makeQualified(fromPath); - validPaths = Utilities.getValidMmDirectoriesFromTableOrPart(fromPath, conf, ids, lbLevels); + validPaths = Utilities.getValidMmDirectoriesFromTableOrPart(fromPath, conf, validTxnList, lbLevels); } - if (validPaths == null) { + if (validPaths == null || validPaths.isEmpty()) { return new CopyWork(fromPath, toDataPath, false); // Not MM, or no need to skip anything. } else { return createCopyWorkForValidPaths(fromPath, toDataPath, validPaths); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index f4fe6ac..674f20e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -25,7 +25,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; -import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; @@ -34,13 +33,11 @@ import org.antlr.runtime.tree.Tree; import org.apache.commons.lang.ObjectUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TableType; @@ -58,6 +55,7 @@ import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -297,27 +295,28 @@ public static boolean prepareImport( tableExists = true; } - Long mmWriteId = null; + Long txnId = null; + int stmtId = 0; if (table != null && MetaStoreUtils.isInsertOnlyTable(table.getParameters())) { - mmWriteId = x.getHive().getNextTableWriteId(table.getDbName(), table.getTableName()); + txnId = 0l; //todo it will be replaced with txnId in Driver } else if (table == null && isSourceMm) { // We could import everything as is - directories and IDs, but that won't work with ACID // txn ids in future. So, let's import everything into the new MM directory with ID == 0. - mmWriteId = 0l; + txnId = 0l; } - if (mmWriteId != null) { - tblDesc.setInitialMmWriteId(mmWriteId); + if (txnId != null) { + tblDesc.setInitialMmWriteId(txnId); } if (!replicationSpec.isInReplicationScope()) { createRegularImportTasks( tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, - fromURI, fs, wh, x, mmWriteId, isSourceMm); + fromURI, fs, wh, x, txnId, stmtId, isSourceMm); } else { createReplImportTasks( tblDesc, partitionDescs, isPartSpecSet, replicationSpec, waitOnPrecursor, table, - fromURI, fs, wh, x, mmWriteId, isSourceMm); + fromURI, fs, wh, x, txnId, stmtId, isSourceMm); } return tableExists; } @@ -378,17 +377,17 @@ private static CreateTableDesc getBaseCreateTableDescFromTable(String dbName, private static Task loadTable(URI fromURI, Table table, boolean replace, Path tgtPath, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x, - Long mmWriteId, boolean isSourceMm) { + Long txnId, int stmtId, boolean isSourceMm) { Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME); - Path destPath = mmWriteId == null ? x.getCtx().getExternalTmpPath(tgtPath) - : new Path(tgtPath, ValidWriteIds.getMmFilePrefix(mmWriteId)); + Path destPath = txnId == null ? x.getCtx().getExternalTmpPath(tgtPath) + : new Path(tgtPath, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); Utilities.LOG14535.info("adding import work for table with source location: " + dataPath + "; table: " + tgtPath + "; copy destination " + destPath + "; mm " - + mmWriteId + " (src " + isSourceMm + ") for " + (table == null ? "a new table" : table.getTableName())); + + txnId + " (src " + isSourceMm + ") for " + (table == null ? "a new table" : table.getTableName())); Task copyTask = null; if (replicationSpec.isInReplicationScope()) { - if (isSourceMm || mmWriteId != null) { + if (isSourceMm || txnId != null) { // TODO: ReplCopyTask is completely screwed. Need to support when it's not as screwed. throw new RuntimeException( "Not supported right now because Replication is completely screwed"); @@ -401,7 +400,9 @@ private static CreateTableDesc getBaseCreateTableDescFromTable(String dbName, } LoadTableDesc loadTableWork = new LoadTableDesc(destPath, - Utilities.getTableDesc(table), new TreeMap(), replace, mmWriteId); + Utilities.getTableDesc(table), new TreeMap(), replace, txnId); + loadTableWork.setTxnId(txnId); + loadTableWork.setStmtId(stmtId); MoveWork mv = new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false); Task loadTableTask = TaskFactory.get(mv, x.getConf()); copyTask.addDependentTask(loadTableTask); @@ -457,7 +458,7 @@ private static CreateTableDesc getBaseCreateTableDescFromTable(String dbName, private static Task addSinglePartition(URI fromURI, FileSystem fs, CreateTableDesc tblDesc, Table table, Warehouse wh, AddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec, - EximUtil.SemanticAnalyzerWrapperContext x, Long mmWriteId, boolean isSourceMm, + EximUtil.SemanticAnalyzerWrapperContext x, Long txnId, int stmtId, boolean isSourceMm, Task commitTask) throws MetaException, IOException, HiveException { AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0); @@ -476,17 +477,17 @@ private static CreateTableDesc getBaseCreateTableDescFromTable(String dbName, + partSpecToString(partSpec.getPartSpec()) + " with source location: " + srcLocation); Path tgtLocation = new Path(partSpec.getLocation()); - Path destPath = mmWriteId == null ? x.getCtx().getExternalTmpPath(tgtLocation) - : new Path(tgtLocation, ValidWriteIds.getMmFilePrefix(mmWriteId)); - Path moveTaskSrc = mmWriteId == null ? destPath : tgtLocation; + Path destPath = txnId == null ? x.getCtx().getExternalTmpPath(tgtLocation) + : new Path(tgtLocation, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); + Path moveTaskSrc = txnId == null ? destPath : tgtLocation; Utilities.LOG14535.info("adding import work for partition with source location: " + srcLocation + "; target: " + tgtLocation + "; copy dest " + destPath + "; mm " - + mmWriteId + " (src " + isSourceMm + ") for " + partSpecToString(partSpec.getPartSpec())); + + txnId + " (src " + isSourceMm + ") for " + partSpecToString(partSpec.getPartSpec())); Task copyTask = null; if (replicationSpec.isInReplicationScope()) { - if (isSourceMm || mmWriteId != null) { + if (isSourceMm || txnId != null) { // TODO: ReplCopyTask is completely screwed. Need to support when it's not as screwed. throw new RuntimeException( "Not supported right now because Replication is completely screwed"); @@ -502,11 +503,13 @@ private static CreateTableDesc getBaseCreateTableDescFromTable(String dbName, Task addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc), x.getConf()); LoadTableDesc loadTableWork = new LoadTableDesc(moveTaskSrc, Utilities.getTableDesc(table), - partSpec.getPartSpec(), true, mmWriteId); + partSpec.getPartSpec(), true, txnId); + loadTableWork.setTxnId(txnId); + loadTableWork.setStmtId(stmtId); loadTableWork.setInheritTableSpecs(false); // Do not commit the write ID from each task; need to commit once. // TODO: we should just change the import to use a single MoveTask, like dynparts. - loadTableWork.setIntermediateInMmWrite(mmWriteId != null); + loadTableWork.setIntermediateInMmWrite(txnId != null); Task loadPartTask = TaskFactory.get(new MoveWork( x.getInputs(), x.getOutputs(), loadTableWork, null, false), x.getConf()); copyTask.addDependentTask(loadPartTask); @@ -802,21 +805,21 @@ private static String checkParams(Map map1, private static void createRegularImportTasks( CreateTableDesc tblDesc, List partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, Table table, URI fromURI, FileSystem fs, Warehouse wh, - EximUtil.SemanticAnalyzerWrapperContext x, Long mmWriteId, boolean isSourceMm) + EximUtil.SemanticAnalyzerWrapperContext x, Long txnId, int stmtId, boolean isSourceMm) throws HiveException, URISyntaxException, IOException, MetaException { if (table != null) { if (table.isPartitioned()) { x.getLOG().debug("table partitioned"); Task ict = createImportCommitTask( - table.getDbName(), table.getTableName(), mmWriteId, x.getConf()); + table.getDbName(), table.getTableName(), txnId, stmtId, x.getConf()); for (AddPartitionDesc addPartitionDesc : partitionDescs) { Map partSpec = addPartitionDesc.getPartition(0).getPartSpec(); org.apache.hadoop.hive.ql.metadata.Partition ptn = null; if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) { x.getTasks().add(addSinglePartition( - fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, isSourceMm, ict)); + fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, txnId, stmtId, isSourceMm, ict)); } else { throw new SemanticException( ErrorMsg.PARTITION_EXISTS.getMsg(partSpecToString(partSpec))); @@ -828,7 +831,7 @@ private static void createRegularImportTasks( Path tgtPath = new Path(table.getDataLocation().toString()); FileSystem tgtFs = FileSystem.get(tgtPath.toUri(), x.getConf()); checkTargetLocationEmpty(tgtFs, tgtPath, replicationSpec, x); - loadTable(fromURI, table, false, tgtPath, replicationSpec, x, mmWriteId, isSourceMm); + loadTable(fromURI, table, false, tgtPath, replicationSpec, x, txnId, stmtId, isSourceMm); } // Set this to read because we can't overwrite any existing partitions x.getOutputs().add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK)); @@ -846,10 +849,10 @@ private static void createRegularImportTasks( if (isPartitioned(tblDesc)) { Task ict = createImportCommitTask( - tblDesc.getDatabaseName(), tblDesc.getTableName(), mmWriteId, x.getConf()); + tblDesc.getDatabaseName(), tblDesc.getTableName(), txnId, stmtId, x.getConf()); for (AddPartitionDesc addPartitionDesc : partitionDescs) { t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, - replicationSpec, x, mmWriteId, isSourceMm, ict)); + replicationSpec, x, txnId, stmtId, isSourceMm, ict)); } } else { x.getLOG().debug("adding dependent CopyWork/MoveWork for table"); @@ -866,7 +869,7 @@ private static void createRegularImportTasks( } FileSystem tgtFs = FileSystem.get(tablePath.toUri(), x.getConf()); checkTargetLocationEmpty(tgtFs, tablePath, replicationSpec,x); - t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x, mmWriteId, isSourceMm)); + t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x, txnId, stmtId, isSourceMm)); } } x.getTasks().add(t); @@ -874,10 +877,10 @@ private static void createRegularImportTasks( } private static Task createImportCommitTask( - String dbName, String tblName, Long mmWriteId, HiveConf conf) { + String dbName, String tblName, Long txnId, int stmtId, HiveConf conf) { @SuppressWarnings("unchecked") - Task ict = (mmWriteId == null) ? null : TaskFactory.get( - new ImportCommitWork(dbName, tblName, mmWriteId), conf); + Task ict = (txnId == null) ? null : TaskFactory.get( + new ImportCommitWork(dbName, tblName, txnId, stmtId), conf); return ict; } @@ -889,7 +892,7 @@ private static void createReplImportTasks( List partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, boolean waitOnPrecursor, Table table, URI fromURI, FileSystem fs, Warehouse wh, - EximUtil.SemanticAnalyzerWrapperContext x, Long mmWriteId, boolean isSourceMm) + EximUtil.SemanticAnalyzerWrapperContext x, Long txnId, int stmtId, boolean isSourceMm) throws HiveException, URISyntaxException, IOException, MetaException { Task dr = null; @@ -958,15 +961,15 @@ private static void createReplImportTasks( if (!replicationSpec.isMetadataOnly()) { if (isPartitioned(tblDesc)) { Task ict = createImportCommitTask( - tblDesc.getDatabaseName(), tblDesc.getTableName(), mmWriteId, x.getConf()); + tblDesc.getDatabaseName(), tblDesc.getTableName(), txnId, stmtId, x.getConf()); for (AddPartitionDesc addPartitionDesc : partitionDescs) { addPartitionDesc.setReplicationSpec(replicationSpec); t.addDependentTask( - addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, isSourceMm, ict)); + addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, txnId, stmtId, isSourceMm, ict)); } } else { x.getLOG().debug("adding dependent CopyWork/MoveWork for table"); - t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x, mmWriteId, isSourceMm)); + t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x, txnId, stmtId, isSourceMm)); } } if (dr == null){ @@ -986,11 +989,11 @@ private static void createReplImportTasks( Map partSpec = addPartitionDesc.getPartition(0).getPartSpec(); org.apache.hadoop.hive.ql.metadata.Partition ptn = null; Task ict = replicationSpec.isMetadataOnly() ? null : createImportCommitTask( - tblDesc.getDatabaseName(), tblDesc.getTableName(), mmWriteId, x.getConf()); + tblDesc.getDatabaseName(), tblDesc.getTableName(), txnId, stmtId, x.getConf()); if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) { if (!replicationSpec.isMetadataOnly()){ x.getTasks().add(addSinglePartition( - fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, isSourceMm, ict)); + fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, txnId, stmtId, isSourceMm, ict)); } } else { // If replicating, then the partition already existing means we need to replace, maybe, if @@ -998,7 +1001,7 @@ private static void createReplImportTasks( if (replicationSpec.allowReplacementInto(ptn)){ if (!replicationSpec.isMetadataOnly()){ x.getTasks().add(addSinglePartition( - fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, mmWriteId, isSourceMm, ict)); + fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, txnId, stmtId, isSourceMm, ict)); } else { x.getTasks().add(alterSinglePartition( fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, ptn, x)); @@ -1027,7 +1030,7 @@ private static void createReplImportTasks( if (!replicationSpec.isMetadataOnly()) { // repl-imports are replace-into unless the event is insert-into loadTable(fromURI, table, !replicationSpec.isInsert(), new Path(fromURI), - replicationSpec, x, mmWriteId, isSourceMm); + replicationSpec, x, txnId, stmtId, isSourceMm); } else { x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec)); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java index d3b4da1..f31775e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java @@ -20,7 +20,6 @@ import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.ql.Driver; @@ -44,7 +43,6 @@ public class IndexUpdater { private List loadTableWork; private HiveConf conf; - private Configuration parentConf; // Assumes one instance of this + single-threaded compilation for each query. private Hive hive; private List> tasks; @@ -54,7 +52,6 @@ public IndexUpdater(List loadTableWork, Set inputs, Configuration conf) { this.loadTableWork = loadTableWork; this.inputs = inputs; - this.parentConf = conf; this.conf = new HiveConf(conf, IndexUpdater.class); this.tasks = new LinkedList>(); } @@ -63,7 +60,6 @@ public IndexUpdater(LoadTableDesc loadTableWork, Set inputs, Configuration conf) { this.loadTableWork = new LinkedList(); this.loadTableWork.add(loadTableWork); - this.parentConf = conf; this.conf = new HiveConf(conf, IndexUpdater.class); this.tasks = new LinkedList>(); this.inputs = inputs; @@ -79,15 +75,15 @@ public IndexUpdater(LoadTableDesc loadTableWork, Set inputs, Map partSpec = ltd.getPartitionSpec(); if (partSpec == null || partSpec.size() == 0) { //unpartitioned table, update whole index - doIndexUpdate(tblIndexes, ltd.getMmWriteId()); + doIndexUpdate(tblIndexes); } else { - doIndexUpdate(tblIndexes, partSpec, ltd.getMmWriteId()); + doIndexUpdate(tblIndexes, partSpec); } } return tasks; } - private void doIndexUpdate(List tblIndexes, Long mmWriteId) throws HiveException { + private void doIndexUpdate(List tblIndexes) throws HiveException { for (Index idx : tblIndexes) { StringBuilder sb = new StringBuilder(); sb.append("ALTER INDEX "); @@ -96,21 +92,20 @@ private void doIndexUpdate(List tblIndexes, Long mmWriteId) throws HiveEx sb.append(idx.getDbName()).append('.'); sb.append(idx.getOrigTableName()); sb.append(" REBUILD"); - compileRebuild(sb.toString(), idx, mmWriteId); + compileRebuild(sb.toString()); } } private void doIndexUpdate(List tblIndexes, Map - partSpec, Long mmWriteId) throws HiveException { + partSpec) throws HiveException { for (Index index : tblIndexes) { if (containsPartition(index, partSpec)) { - doIndexUpdate(index, partSpec, mmWriteId); + doIndexUpdate(index, partSpec); } } } - private void doIndexUpdate(Index index, Map partSpec, Long mmWriteId) - throws HiveException { + private void doIndexUpdate(Index index, Map partSpec) { StringBuilder ps = new StringBuilder(); boolean first = true; ps.append("("); @@ -134,18 +129,12 @@ private void doIndexUpdate(Index index, Map partSpec, Long mmWri sb.append(" PARTITION "); sb.append(ps.toString()); sb.append(" REBUILD"); - compileRebuild(sb.toString(), index, mmWriteId); + compileRebuild(sb.toString()); } - private void compileRebuild(String query, Index index, Long mmWriteId) - throws HiveException { + private void compileRebuild(String query) { Driver driver = new Driver(this.conf); driver.compile(query, false); - if (mmWriteId != null) { - // TODO: this is rather fragile - ValidWriteIds.addCurrentToConf( - parentConf, index.getDbName(), index.getOrigTableName(), mmWriteId); - } tasks.addAll(driver.getPlan().getRootTasks()); inputs.addAll(driver.getPlan().getInputs()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 04e8cac..5ef77f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -20,8 +20,6 @@ import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; - import java.io.IOException; import java.io.Serializable; import java.net.URI; @@ -271,19 +269,18 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } } - Long mmWriteId = null; + Long txnId = null; + int stmtId = 0; Table tbl = ts.tableHandle; if (MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())) { - try { - mmWriteId = db.getNextTableWriteId(tbl.getDbName(), tbl.getTableName()); - } catch (HiveException e) { - throw new SemanticException(e); - } + txnId = 0l; //todo to be replaced with txnId in Driver } LoadTableDesc loadTableWork; loadTableWork = new LoadTableDesc(new Path(fromURI), - Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite, mmWriteId); + Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite, txnId); + loadTableWork.setTxnId(txnId); + loadTableWork.setStmtId(stmtId); if (preservePartitionSpecs){ // Note : preservePartitionSpecs=true implies inheritTableSpecs=false but // but preservePartitionSpecs=false(default) here is not sufficient enough diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 34f2ac4..85254ce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -75,7 +75,6 @@ import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SQLForeignKey; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -6618,7 +6617,7 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, } input = genReduceSinkPlan(input, partnCols, sortCols, order.toString(), nullOrder.toString(), maxReducers, (AcidUtils.isFullAcidTable(dest_tab) ? - getAcidType(dest_tab, table_desc.getOutputFileFormatClass(), dest) : AcidUtils.Operation.NOT_ACID)); + getAcidType(table_desc.getOutputFileFormatClass(), dest) : AcidUtils.Operation.NOT_ACID)); reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)input.getParentOperators().get(0)); ctx.setMultiFileSpray(multiFileSpray); ctx.setNumFiles(numFiles); @@ -6697,7 +6696,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) ListBucketingCtx lbCtx = null; Map partSpec = null; boolean isMmTable = false, isMmCtas = false; - Long mmWriteId = null; + Long txnId = null; switch (dest_type.intValue()) { case QBMetaData.DEST_TABLE: { @@ -6751,17 +6750,18 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) if (!isNonNativeTable) { AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID; if (destTableIsAcid) { - acidOp = getAcidType(dest_tab, table_desc.getOutputFileFormatClass(), dest); + acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); checkAcidConstraints(qb, table_desc, dest_tab, acidOp); } - try { - mmWriteId = getMmWriteId(dest_tab, isMmTable); - } catch (HiveException e) { - throw new SemanticException(e); + if (MetaStoreUtils.isInsertOnlyTable(table_desc.getProperties())) { + acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); + } + if (isMmTable) { + txnId = 0l; //todo to be replaced by actual txnId } boolean isReplace = !qb.getParseInfo().isInsertIntoTable( dest_tab.getDbName(), dest_tab.getTableName()); - ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp, isReplace, mmWriteId); + ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp, isReplace, txnId); ltd.setLbCtx(lbCtx); loadTableWork.add(ltd); } else { @@ -6814,16 +6814,16 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) dest_part.isStoredAsSubDirectories(), conf); AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID; if (destTableIsAcid) { - acidOp = getAcidType(dest_tab, table_desc.getOutputFileFormatClass(), dest); + acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); checkAcidConstraints(qb, table_desc, dest_tab, acidOp); } - try { - mmWriteId = getMmWriteId(dest_tab, isMmTable); - } catch (HiveException e) { - // How is this a semantic exception? Stupid Java and signatures. - throw new SemanticException(e); + if (MetaStoreUtils.isInsertOnlyTable(dest_part.getTable().getParameters())) { + acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); } - ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp, mmWriteId); + if (isMmTable) { + txnId = 0l; //todo to be replaced by actual txnId + } + ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp, txnId); ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())); ltd.setLbCtx(lbCtx); @@ -6857,10 +6857,8 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) destTableIsMaterialization = tblDesc.isMaterialization(); if (!destTableIsTemporary && MetaStoreUtils.isInsertOnlyTable(tblDesc.getTblProps(), true)) { isMmTable = isMmCtas = true; - // TODO# this should really get current ACID txn; assuming ACID works correctly the txn - // should have been opened to create the ACID table. For now use the first ID. - mmWriteId = 0l; - tblDesc.setInitialMmWriteId(mmWriteId); + txnId = 0l; //todo to be replaced by txnId in Driver + tblDesc.setInitialMmWriteId(txnId); } } else if (viewDesc != null) { field_schemas = new ArrayList(); @@ -6987,11 +6985,11 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) genPartnCols(dest, input, qb, table_desc, dest_tab, rsCtx); } - assert isMmTable == (mmWriteId != null); + assert isMmTable == (txnId != null); FileSinkDesc fileSinkDesc = createFileSinkDesc(dest, table_desc, dest_part, dest_path, currentTableId, destTableIsAcid, destTableIsTemporary, destTableIsMaterialization, queryTmpdir, rsCtx, dpCtx, lbCtx, fsRS, - canBeMerged, mmWriteId, isMmCtas); + canBeMerged, txnId, isMmCtas); if (isMmCtas) { // Add FSD so that the LoadTask compilation could fix up its path to avoid the move. tableDesc.setWriter(fileSinkDesc); @@ -7094,12 +7092,6 @@ private ColsAndTypes deriveFileSinkColTypes( return result; } - private static Long getMmWriteId(Table tbl, boolean isMmTable) throws HiveException { - if (!isMmTable) return null; - // Get the next write ID for this table. We will prefix files with this write ID. - return Hive.get().getNextTableWriteId(tbl.getDbName(), tbl.getTableName()); - } - private FileSinkDesc createFileSinkDesc(String dest, TableDesc table_desc, Partition dest_part, Path dest_path, int currentTableId, boolean destTableIsAcid, boolean destTableIsTemporary, @@ -7119,7 +7111,12 @@ private FileSinkDesc createFileSinkDesc(String dest, TableDesc table_desc, MetaStoreUtils.isInsertOnlyTable(dest_part.getTable().getParameters())) || (table_desc != null && MetaStoreUtils.isInsertOnlyTable(table_desc.getProperties())); - if (destTableIsAcid && !isDestInsertOnly) { + if (isDestInsertOnly) { + fileSinkDesc.setWriteType(Operation.INSERT); + acidFileSinks.add(fileSinkDesc); + } + + if (destTableIsAcid) { AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : (deleting(dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT); fileSinkDesc.setWriteType(wt); @@ -7331,7 +7328,7 @@ String fixCtasColumnName(String colName) { private void checkAcidConstraints(QB qb, TableDesc tableDesc, Table table, AcidUtils.Operation acidOp) throws SemanticException { String tableName = tableDesc.getTableName(); - if (!qb.getParseInfo().isInsertIntoTable(tableName) && !Operation.INSERT_ONLY.equals(acidOp)) { + if (!qb.getParseInfo().isInsertIntoTable(tableName)) { LOG.debug("Couldn't find table " + tableName + " in insertIntoTable"); throw new SemanticException(ErrorMsg.NO_INSERT_OVERWRITE_WITH_ACID.getMsg()); } @@ -7346,7 +7343,7 @@ These props are now enabled elsewhere (see commit diffs). It would be better in */ conf.set(AcidUtils.CONF_ACID_KEY, "true"); - if (!Operation.NOT_ACID.equals(acidOp) && !Operation.INSERT_ONLY.equals(acidOp)) { + if (!Operation.NOT_ACID.equals(acidOp)) { if (table.getNumBuckets() < 1) { throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, table.getTableName()); } @@ -11676,7 +11673,7 @@ public void validate() throws SemanticException { if (p != null) { tbl = p.getTable(); } - if (tbl != null && AcidUtils.isFullAcidTable(tbl)) { + if (tbl != null && (AcidUtils.isFullAcidTable(tbl) || MetaStoreUtils.isInsertOnlyTable(tbl.getParameters()))) { acidInQuery = true; checkAcidTxnManager(tbl); } @@ -11739,7 +11736,7 @@ public void validate() throws SemanticException { tbl = writeEntity.getTable(); } - if (tbl != null && AcidUtils.isFullAcidTable(tbl)) { + if (tbl != null && (AcidUtils.isFullAcidTable(tbl) || MetaStoreUtils.isInsertOnlyTable(tbl.getParameters()))) { acidInQuery = true; checkAcidTxnManager(tbl); } @@ -13404,12 +13401,9 @@ private boolean isAcidOutputFormat(Class of) { AcidUtils.Operation.INSERT); } - private AcidUtils.Operation getAcidType( - Table table, Class of, String dest) { + private AcidUtils.Operation getAcidType(Class of, String dest) { if (SessionState.get() == null || !SessionState.get().getTxnMgr().supportsAcid()) { return AcidUtils.Operation.NOT_ACID; - } else if (MetaStoreUtils.isInsertOnlyTable(table.getParameters())) { - return AcidUtils.Operation.INSERT_ONLY; } else if (isAcidOutputFormat(of)) { return getAcidType(dest); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 91c343c..2e76c86 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -20,23 +20,18 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; -import java.util.LinkedList; import java.util.List; -import java.util.Queue; import java.util.Set; -import java.util.Stack; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.HiveStatsUtils; -import org.apache.hadoop.hive.common.ValidWriteIds; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.Context; @@ -44,7 +39,6 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -56,7 +50,6 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; -import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; @@ -319,21 +312,22 @@ public void compile(final ParseContext pCtx, final List sourceDirs, final List targetDi return srcDirs; } + public void setSourceDirs(List srcs) { + this.srcDirs = srcs; + } + + public void setTargetDirs(final List targetDir) { + this.targetDirs = targetDir; + } + @Explain(displayName = "hdfs directory") public boolean getIsDfsDir() { return isDfsDir; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java index 762d946..5bb52b4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java @@ -42,7 +42,8 @@ // Need to remember whether this is an acid compliant operation, and if so whether it is an // insert, update, or delete. private AcidUtils.Operation writeType; - private Long mmWriteId; + private Long txnId; + private int stmtId; // TODO: the below seems like they should just be combined into partitionDesc private org.apache.hadoop.hive.ql.plan.TableDesc table; @@ -65,11 +66,11 @@ public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, final Map partitionSpec, final boolean replace, - final AcidUtils.Operation writeType, Long mmWriteId) { + final AcidUtils.Operation writeType, Long txnId) { super(sourcePath); Utilities.LOG14535.info("creating part LTD from " + sourcePath + " to " + ((table.getProperties() == null) ? "null" : table.getTableName())); - init(table, partitionSpec, replace, writeType, mmWriteId); + init(table, partitionSpec, replace, writeType, txnId); } /** @@ -83,15 +84,15 @@ public LoadTableDesc(final Path sourcePath, final TableDesc table, final Map partitionSpec, final boolean replace, - final Long mmWriteId) { - this(sourcePath, table, partitionSpec, replace, AcidUtils.Operation.NOT_ACID, mmWriteId); + final Long txnId) { + this(sourcePath, table, partitionSpec, replace, AcidUtils.Operation.NOT_ACID, txnId); } public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, final Map partitionSpec, - final AcidUtils.Operation writeType, Long mmWriteId) { - this(sourcePath, table, partitionSpec, true, writeType, mmWriteId); + final AcidUtils.Operation writeType, Long txnId) { + this(sourcePath, table, partitionSpec, true, writeType, txnId); } /** @@ -102,22 +103,22 @@ public LoadTableDesc(final Path sourcePath, */ public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, - final Map partitionSpec, Long mmWriteId) { - this(sourcePath, table, partitionSpec, true, AcidUtils.Operation.NOT_ACID, mmWriteId); + final Map partitionSpec, Long txnId) { + this(sourcePath, table, partitionSpec, true, AcidUtils.Operation.NOT_ACID, txnId); } public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, final DynamicPartitionCtx dpCtx, final AcidUtils.Operation writeType, - boolean isReplace, Long mmWriteId) { + boolean isReplace, Long txnId) { super(sourcePath); Utilities.LOG14535.info("creating LTD from " + sourcePath + " to " + table.getTableName()/*, new Exception()*/); this.dpCtx = dpCtx; if (dpCtx != null && dpCtx.getPartSpec() != null && partitionSpec == null) { - init(table, dpCtx.getPartSpec(), isReplace, writeType, mmWriteId); + init(table, dpCtx.getPartSpec(), isReplace, writeType, txnId); } else { - init(table, new LinkedHashMap(), isReplace, writeType, mmWriteId); + init(table, new LinkedHashMap(), isReplace, writeType, txnId); } } @@ -125,12 +126,12 @@ private void init( final org.apache.hadoop.hive.ql.plan.TableDesc table, final Map partitionSpec, final boolean replace, - AcidUtils.Operation writeType, Long mmWriteId) { + AcidUtils.Operation writeType, Long txnId) { this.table = table; this.partitionSpec = partitionSpec; this.replace = replace; this.writeType = writeType; - this.mmWriteId = mmWriteId; + this.txnId = txnId; } @Explain(displayName = "table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) @@ -158,11 +159,11 @@ public boolean getReplace() { @Explain(displayName = "micromanaged table") public Boolean isMmTableExplain() { - return mmWriteId != null? true : null; + return txnId != null? true : null; } public boolean isMmTable() { - return mmWriteId != null; + return txnId != null; } public void setReplace(boolean replace) { @@ -203,8 +204,20 @@ public void setLbCtx(ListBucketingCtx lbCtx) { return writeType; } - public Long getMmWriteId() { - return mmWriteId; + public Long getTxnId() { + return txnId; + } + + public void setTxnId(Long txnId) { + this.txnId = txnId; + } + + public int getStmtId() { + return stmtId; + } + + public void setStmtId(int stmtId) { + this.stmtId = stmtId; } public void setIntermediateInMmWrite(boolean b) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java index 4a13e1f..55b9da9 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java @@ -141,7 +141,7 @@ db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true); db.createTable(src, cols, null, TextInputFormat.class, HiveIgnoreKeyTextOutputFormat.class); - db.loadTable(hadoopDataFile[i], src, false, true, false, false, false, null); + db.loadTable(hadoopDataFile[i], src, false, true, false, false, false, null, 0); i++; } diff --git ql/src/test/queries/clientpositive/mm_all.q ql/src/test/queries/clientpositive/mm_all.q index a6a7c8f..5710f52 100644 --- ql/src/test/queries/clientpositive/mm_all.q +++ ql/src/test/queries/clientpositive/mm_all.q @@ -33,7 +33,6 @@ drop table part_mm; drop table simple_mm; create table simple_mm(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); insert into table simple_mm select key from intermediate; -insert overwrite table simple_mm select key from intermediate; select * from simple_mm order by key; insert into table simple_mm select key from intermediate; select * from simple_mm order by key; @@ -193,40 +192,40 @@ set hive.merge.mapredfiles=false; -- TODO: need to include merge+union+DP, but it's broken for now -drop table ctas0_mm; -create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate; -select * from ctas0_mm; -drop table ctas0_mm; +--drop table ctas0_mm; +--create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate; +--select * from ctas0_mm; +--drop table ctas0_mm; -drop table ctas1_mm; -create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as - select * from intermediate union all select * from intermediate; -select * from ctas1_mm; -drop table ctas1_mm; +--drop table ctas1_mm; +--create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as +-- select * from intermediate union all select * from intermediate; +--select * from ctas1_mm; +--drop table ctas1_mm; drop table iow0_mm; create table iow0_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); -insert overwrite table iow0_mm select key from intermediate; +--insert overwrite table iow0_mm select key from intermediate; insert into table iow0_mm select key + 1 from intermediate; select * from iow0_mm order by key; -insert overwrite table iow0_mm select key + 2 from intermediate; +--insert overwrite table iow0_mm select key + 2 from intermediate; select * from iow0_mm order by key; drop table iow0_mm; drop table iow1_mm; create table iow1_mm(key int) partitioned by (key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); -insert overwrite table iow1_mm partition (key2) +--insert overwrite table iow1_mm partition (key2) select key as k1, key from intermediate union all select key as k1, key from intermediate; insert into table iow1_mm partition (key2) select key + 1 as k1, key from intermediate union all select key as k1, key from intermediate; select * from iow1_mm order by key, key2; -insert overwrite table iow1_mm partition (key2) +--insert overwrite table iow1_mm partition (key2) select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate; select * from iow1_mm order by key, key2; -insert overwrite table iow1_mm partition (key2) +--insert overwrite table iow1_mm partition (key2) select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate; select * from iow1_mm order by key, key2; drop table iow1_mm; @@ -369,82 +368,82 @@ drop table multi0_2_mm; create table multi0_1_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); create table multi0_2_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); -from intermediate -insert overwrite table multi0_1_mm select key, p -insert overwrite table multi0_2_mm select p, key; - -select * from multi0_1_mm order by key, key2; -select * from multi0_2_mm order by key, key2; - -set hive.merge.mapredfiles=true; -set hive.merge.sparkfiles=true; -set hive.merge.tezfiles=true; - -from intermediate -insert into table multi0_1_mm select p, key -insert overwrite table multi0_2_mm select key, p; -select * from multi0_1_mm order by key, key2; -select * from multi0_2_mm order by key, key2; - -set hive.merge.mapredfiles=false; -set hive.merge.sparkfiles=false; -set hive.merge.tezfiles=false; - -drop table multi0_1_mm; -drop table multi0_2_mm; - - -drop table multi1_mm; -create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); -from intermediate -insert into table multi1_mm partition(p=1) select p, key -insert into table multi1_mm partition(p=2) select key, p; -select * from multi1_mm order by key, key2, p; -from intermediate -insert into table multi1_mm partition(p=2) select p, key -insert overwrite table multi1_mm partition(p=1) select key, p; -select * from multi1_mm order by key, key2, p; - -from intermediate -insert into table multi1_mm partition(p) select p, key, p -insert into table multi1_mm partition(p=1) select key, p; -select key, key2, p from multi1_mm order by key, key2, p; - -from intermediate -insert into table multi1_mm partition(p) select p, key, 1 -insert into table multi1_mm partition(p=1) select key, p; -select key, key2, p from multi1_mm order by key, key2, p; -drop table multi1_mm; - - - - -set datanucleus.cache.collections=false; -set hive.stats.autogather=true; - -drop table stats_mm; -create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); -insert overwrite table stats_mm select key from intermediate; -desc formatted stats_mm; - -insert into table stats_mm select key from intermediate; -desc formatted stats_mm; -drop table stats_mm; - -drop table stats2_mm; -create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src; -desc formatted stats2_mm; -drop table stats2_mm; - - -set hive.optimize.skewjoin=true; -set hive.skewjoin.key=2; -set hive.optimize.metadataonly=false; - -CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only"); -FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value; -select count(distinct key) from skewjoin_mm; -drop table skewjoin_mm; +--from intermediate +--insert overwrite table multi0_1_mm select key, p +--insert overwrite table multi0_2_mm select p, key; +-- +--select * from multi0_1_mm order by key, key2; +--select * from multi0_2_mm order by key, key2; +-- +--set hive.merge.mapredfiles=true; +--set hive.merge.sparkfiles=true; +--set hive.merge.tezfiles=true; +-- +--from intermediate +--insert into table multi0_1_mm select p, key +--insert overwrite table multi0_2_mm select key, p; +--select * from multi0_1_mm order by key, key2; +--select * from multi0_2_mm order by key, key2; +-- +--set hive.merge.mapredfiles=false; +--set hive.merge.sparkfiles=false; +--set hive.merge.tezfiles=false; +-- +--drop table multi0_1_mm; +--drop table multi0_2_mm; +-- +-- +--drop table multi1_mm; +--create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +--from intermediate +--insert into table multi1_mm partition(p=1) select p, key +--insert into table multi1_mm partition(p=2) select key, p; +--select * from multi1_mm order by key, key2, p; +--from intermediate +--insert into table multi1_mm partition(p=2) select p, key +--insert overwrite table multi1_mm partition(p=1) select key, p; +--select * from multi1_mm order by key, key2, p; +-- +--from intermediate +--insert into table multi1_mm partition(p) select p, key, p +--insert into table multi1_mm partition(p=1) select key, p; +--select key, key2, p from multi1_mm order by key, key2, p; +-- +--from intermediate +--insert into table multi1_mm partition(p) select p, key, 1 +--insert into table multi1_mm partition(p=1) select key, p; +--select key, key2, p from multi1_mm order by key, key2, p; +--drop table multi1_mm; +-- +-- +-- +-- +--set datanucleus.cache.collections=false; +--set hive.stats.autogather=true; +-- +--drop table stats_mm; +--create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +--insert overwrite table stats_mm select key from intermediate; +--desc formatted stats_mm; +-- +--insert into table stats_mm select key from intermediate; +--desc formatted stats_mm; +--drop table stats_mm; +-- +--drop table stats2_mm; +--create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src; +--desc formatted stats2_mm; +--drop table stats2_mm; +-- +-- +--set hive.optimize.skewjoin=true; +--set hive.skewjoin.key=2; +--set hive.optimize.metadataonly=false; +-- +--CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +--FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value; +--select count(distinct key) from skewjoin_mm; +--drop table skewjoin_mm; set hive.optimize.skewjoin=false; diff --git ql/src/test/queries/clientpositive/mm_conversions.q ql/src/test/queries/clientpositive/mm_conversions.q index 2dc7a74..3dc5a1f 100644 --- ql/src/test/queries/clientpositive/mm_conversions.q +++ ql/src/test/queries/clientpositive/mm_conversions.q @@ -4,7 +4,8 @@ set hive.fetch.task.conversion=none; set tez.grouping.min-size=1; set tez.grouping.max-size=2; set hive.exec.dynamic.partition.mode=nonstrict; - +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; -- Force multiple writers when reading drop table intermediate; diff --git ql/src/test/queries/clientpositive/mm_insertonly_acid.q ql/src/test/queries/clientpositive/mm_insertonly_acid.q deleted file mode 100644 index 7da99c5..0000000 --- ql/src/test/queries/clientpositive/mm_insertonly_acid.q +++ /dev/null @@ -1,16 +0,0 @@ -set hive.mapred.mode=nonstrict; -set hive.explain.user=false; -set hive.fetch.task.conversion=none; -set hive.exec.dynamic.partition.mode=nonstrict; -set hive.support.concurrency=true; -set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; - - -drop table qtr_acid; -create table qtr_acid (key int) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only"); -insert into table qtr_acid partition(p='123') select distinct key from src where key > 0 order by key asc limit 10; -insert into table qtr_acid partition(p='456') select distinct key from src where key > 0 order by key desc limit 10; -explain -select * from qtr_acid order by key; -select * from qtr_acid order by key; -drop table qtr_acid; \ No newline at end of file diff --git ql/src/test/results/clientpositive/mm_all.q.out ql/src/test/results/clientpositive/mm_all.q.out index 116f2b1..85f9995 100644 --- ql/src/test/results/clientpositive/mm_all.q.out +++ ql/src/test/results/clientpositive/mm_all.q.out @@ -82,6 +82,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.part_mm + Write Type: INSERT Stage: Stage-7 Conditional Operator @@ -288,21 +289,6 @@ POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@simple_mm POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: insert overwrite table simple_mm select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@simple_mm -POSTHOOK: query: insert overwrite table simple_mm select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@simple_mm -POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from simple_mm order by key PREHOOK: type: QUERY PREHOOK: Input: default@simple_mm @@ -1236,102 +1222,6 @@ POSTHOOK: query: drop table merge1_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@merge1_mm POSTHOOK: Output: default@merge1_mm -PREHOOK: query: drop table ctas0_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ctas0_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: database:default -PREHOOK: Output: default@ctas0_mm -POSTHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: database:default -POSTHOOK: Output: default@ctas0_mm -POSTHOOK: Lineage: ctas0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: ctas0_mm.p SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -PREHOOK: query: select * from ctas0_mm -PREHOOK: type: QUERY -PREHOOK: Input: default@ctas0_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from ctas0_mm -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ctas0_mm -#### A masked pattern was here #### -98 455 -97 455 -0 456 -10 456 -100 457 -103 457 -PREHOOK: query: drop table ctas0_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ctas0_mm -PREHOOK: Output: default@ctas0_mm -POSTHOOK: query: drop table ctas0_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ctas0_mm -POSTHOOK: Output: default@ctas0_mm -PREHOOK: query: drop table ctas1_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ctas1_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as - select * from intermediate union all select * from intermediate -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: database:default -PREHOOK: Output: default@ctas1_mm -POSTHOOK: query: create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as - select * from intermediate union all select * from intermediate -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: database:default -POSTHOOK: Output: default@ctas1_mm -POSTHOOK: Lineage: ctas1_mm.key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: ctas1_mm.p EXPRESSION [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -PREHOOK: query: select * from ctas1_mm -PREHOOK: type: QUERY -PREHOOK: Input: default@ctas1_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from ctas1_mm -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ctas1_mm -#### A masked pattern was here #### -98 455 -98 455 -97 455 -97 455 -0 456 -0 456 -10 456 -10 456 -100 457 -100 457 -103 457 -103 457 -PREHOOK: query: drop table ctas1_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ctas1_mm -PREHOOK: Output: default@ctas1_mm -POSTHOOK: query: drop table ctas1_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ctas1_mm -POSTHOOK: Output: default@ctas1_mm PREHOOK: query: drop table iow0_mm PREHOOK: type: DROPTABLE POSTHOOK: query: drop table iow0_mm @@ -1344,21 +1234,6 @@ POSTHOOK: query: create table iow0_mm(key int) tblproperties("transactional"="tr POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@iow0_mm -PREHOOK: query: insert overwrite table iow0_mm select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@iow0_mm -POSTHOOK: query: insert overwrite table iow0_mm select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@iow0_mm -POSTHOOK: Lineage: iow0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: insert into table iow0_mm select key + 1 from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate @@ -1382,33 +1257,12 @@ POSTHOOK: query: select * from iow0_mm order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@iow0_mm #### A masked pattern was here #### -0 1 -10 11 -97 -98 98 99 -100 101 -103 104 -PREHOOK: query: insert overwrite table iow0_mm select key + 2 from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@iow0_mm -POSTHOOK: query: insert overwrite table iow0_mm select key + 2 from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@iow0_mm -POSTHOOK: Lineage: iow0_mm.key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from iow0_mm order by key PREHOOK: type: QUERY PREHOOK: Input: default@iow0_mm @@ -1417,12 +1271,12 @@ POSTHOOK: query: select * from iow0_mm order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@iow0_mm #### A masked pattern was here #### -2 -12 +1 +11 +98 99 -100 -102 -105 +101 +104 PREHOOK: query: drop table iow0_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@iow0_mm @@ -1443,33 +1297,32 @@ POSTHOOK: query: create table iow1_mm(key int) partitioned by (key2 int) tblpro POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@iow1_mm -PREHOOK: query: insert overwrite table iow1_mm partition (key2) -select key as k1, key from intermediate union all select key as k1, key from intermediate +PREHOOK: query: select key as k1, key from intermediate union all select key as k1, key from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@iow1_mm -POSTHOOK: query: insert overwrite table iow1_mm partition (key2) -select key as k1, key from intermediate union all select key as k1, key from intermediate +#### A masked pattern was here #### +POSTHOOK: query: select key as k1, key from intermediate union all select key as k1, key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@iow1_mm@key2=0 -POSTHOOK: Output: default@iow1_mm@key2=10 -POSTHOOK: Output: default@iow1_mm@key2=100 -POSTHOOK: Output: default@iow1_mm@key2=103 -POSTHOOK: Output: default@iow1_mm@key2=97 -POSTHOOK: Output: default@iow1_mm@key2=98 -POSTHOOK: Lineage: iow1_mm PARTITION(key2=0).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=100).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=103).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=10).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=97).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=98).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +#### A masked pattern was here #### +98 98 +98 98 +97 97 +97 97 +0 0 +0 0 +10 10 +10 10 +100 100 +100 100 +103 103 +103 103 PREHOOK: query: insert into table iow1_mm partition (key2) select key + 1 as k1, key from intermediate union all select key as k1, key from intermediate PREHOOK: type: QUERY @@ -1518,56 +1371,43 @@ POSTHOOK: Input: default@iow1_mm@key2=97 POSTHOOK: Input: default@iow1_mm@key2=98 #### A masked pattern was here #### 0 0 -0 0 -0 0 1 0 10 10 -10 10 -10 10 11 10 97 97 -97 97 -97 97 98 97 98 98 -98 98 -98 98 99 98 100 100 -100 100 -100 100 101 100 103 103 -103 103 -103 103 104 103 -PREHOOK: query: insert overwrite table iow1_mm partition (key2) -select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate +PREHOOK: query: select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@iow1_mm -POSTHOOK: query: insert overwrite table iow1_mm partition (key2) -select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate +#### A masked pattern was here #### +POSTHOOK: query: select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@iow1_mm@key2=0 -POSTHOOK: Output: default@iow1_mm@key2=10 -POSTHOOK: Output: default@iow1_mm@key2=100 -POSTHOOK: Output: default@iow1_mm@key2=103 -POSTHOOK: Output: default@iow1_mm@key2=97 -POSTHOOK: Output: default@iow1_mm@key2=98 -POSTHOOK: Lineage: iow1_mm PARTITION(key2=0).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=100).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=103).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=10).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=97).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=98).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +#### A masked pattern was here #### +101 98 +102 98 +100 97 +101 97 +3 0 +4 0 +13 10 +14 10 +103 100 +104 100 +106 103 +107 103 PREHOOK: query: select * from iow1_mm order by key, key2 PREHOOK: type: QUERY PREHOOK: Input: default@iow1_mm @@ -1588,73 +1428,53 @@ POSTHOOK: Input: default@iow1_mm@key2=103 POSTHOOK: Input: default@iow1_mm@key2=97 POSTHOOK: Input: default@iow1_mm@key2=98 #### A masked pattern was here #### -3 0 -4 0 -13 10 -14 10 -100 97 -101 97 -101 98 -102 98 -103 100 -104 100 -106 103 -107 103 -PREHOOK: query: insert overwrite table iow1_mm partition (key2) -select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate +0 0 +1 0 +10 10 +11 10 +97 97 +98 97 +98 98 +99 98 +100 100 +101 100 +103 103 +104 103 +PREHOOK: query: select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@iow1_mm -POSTHOOK: query: insert overwrite table iow1_mm partition (key2) -select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate +#### A masked pattern was here #### +POSTHOOK: query: select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@iow1_mm@key2=100 -POSTHOOK: Output: default@iow1_mm@key2=101 -POSTHOOK: Output: default@iow1_mm@key2=102 -POSTHOOK: Output: default@iow1_mm@key2=103 -POSTHOOK: Output: default@iow1_mm@key2=105 -POSTHOOK: Output: default@iow1_mm@key2=106 -POSTHOOK: Output: default@iow1_mm@key2=12 -POSTHOOK: Output: default@iow1_mm@key2=13 -POSTHOOK: Output: default@iow1_mm@key2=2 -POSTHOOK: Output: default@iow1_mm@key2=3 -POSTHOOK: Output: default@iow1_mm@key2=99 -POSTHOOK: Lineage: iow1_mm PARTITION(key2=100).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=101).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=102).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=103).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=105).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=106).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=12).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=13).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=2).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=3).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: iow1_mm PARTITION(key2=99).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +#### A masked pattern was here #### +101 101 +100 100 +100 100 +99 99 +3 3 +2 2 +13 13 +12 12 +103 103 +102 102 +106 106 +105 105 PREHOOK: query: select * from iow1_mm order by key, key2 PREHOOK: type: QUERY PREHOOK: Input: default@iow1_mm PREHOOK: Input: default@iow1_mm@key2=0 PREHOOK: Input: default@iow1_mm@key2=10 PREHOOK: Input: default@iow1_mm@key2=100 -PREHOOK: Input: default@iow1_mm@key2=101 -PREHOOK: Input: default@iow1_mm@key2=102 PREHOOK: Input: default@iow1_mm@key2=103 -PREHOOK: Input: default@iow1_mm@key2=105 -PREHOOK: Input: default@iow1_mm@key2=106 -PREHOOK: Input: default@iow1_mm@key2=12 -PREHOOK: Input: default@iow1_mm@key2=13 -PREHOOK: Input: default@iow1_mm@key2=2 -PREHOOK: Input: default@iow1_mm@key2=3 PREHOOK: Input: default@iow1_mm@key2=97 PREHOOK: Input: default@iow1_mm@key2=98 -PREHOOK: Input: default@iow1_mm@key2=99 #### A masked pattern was here #### POSTHOOK: query: select * from iow1_mm order by key, key2 POSTHOOK: type: QUERY @@ -1662,39 +1482,22 @@ POSTHOOK: Input: default@iow1_mm POSTHOOK: Input: default@iow1_mm@key2=0 POSTHOOK: Input: default@iow1_mm@key2=10 POSTHOOK: Input: default@iow1_mm@key2=100 -POSTHOOK: Input: default@iow1_mm@key2=101 -POSTHOOK: Input: default@iow1_mm@key2=102 POSTHOOK: Input: default@iow1_mm@key2=103 -POSTHOOK: Input: default@iow1_mm@key2=105 -POSTHOOK: Input: default@iow1_mm@key2=106 -POSTHOOK: Input: default@iow1_mm@key2=12 -POSTHOOK: Input: default@iow1_mm@key2=13 -POSTHOOK: Input: default@iow1_mm@key2=2 -POSTHOOK: Input: default@iow1_mm@key2=3 POSTHOOK: Input: default@iow1_mm@key2=97 POSTHOOK: Input: default@iow1_mm@key2=98 -POSTHOOK: Input: default@iow1_mm@key2=99 #### A masked pattern was here #### -2 2 -3 0 -3 3 -4 0 -12 12 -13 10 -13 13 -14 10 -99 99 -100 97 -100 100 +0 0 +1 0 +10 10 +11 10 +97 97 +98 97 +98 98 +99 98 100 100 -101 97 -101 98 -101 101 -102 98 -102 102 +101 100 103 103 -105 105 -106 106 +104 103 PREHOOK: query: drop table iow1_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@iow1_mm @@ -1765,7 +1568,7 @@ POSTHOOK: query: select count(1) from load0_mm POSTHOOK: type: QUERY POSTHOOK: Input: default@load0_mm #### A masked pattern was here #### -500 +1000 PREHOOK: query: drop table load0_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@load0_mm @@ -1916,7 +1719,7 @@ POSTHOOK: query: select count(1) from load1_mm POSTHOOK: type: QUERY POSTHOOK: Input: default@load1_mm #### A masked pattern was here #### -500 +1050 PREHOOK: query: drop table load1_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@load1_mm @@ -2541,649 +2344,6 @@ POSTHOOK: query: create table multi0_2_mm (key int, key2 int) tblproperties("tr POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@multi0_2_mm -PREHOOK: query: from intermediate -insert overwrite table multi0_1_mm select key, p -insert overwrite table multi0_2_mm select p, key -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi0_1_mm -PREHOOK: Output: default@multi0_2_mm -POSTHOOK: query: from intermediate -insert overwrite table multi0_1_mm select key, p -insert overwrite table multi0_2_mm select p, key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi0_1_mm -POSTHOOK: Output: default@multi0_2_mm -POSTHOOK: Lineage: multi0_1_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_1_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_2_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_2_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select * from multi0_1_mm order by key, key2 -PREHOOK: type: QUERY -PREHOOK: Input: default@multi0_1_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from multi0_1_mm order by key, key2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi0_1_mm -#### A masked pattern was here #### -0 456 -10 456 -97 455 -98 455 -100 457 -103 457 -PREHOOK: query: select * from multi0_2_mm order by key, key2 -PREHOOK: type: QUERY -PREHOOK: Input: default@multi0_2_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from multi0_2_mm order by key, key2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi0_2_mm -#### A masked pattern was here #### -455 97 -455 98 -456 0 -456 10 -457 100 -457 103 -PREHOOK: query: from intermediate -insert into table multi0_1_mm select p, key -insert overwrite table multi0_2_mm select key, p -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi0_1_mm -PREHOOK: Output: default@multi0_2_mm -POSTHOOK: query: from intermediate -insert into table multi0_1_mm select p, key -insert overwrite table multi0_2_mm select key, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi0_1_mm -POSTHOOK: Output: default@multi0_2_mm -POSTHOOK: Lineage: multi0_1_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_1_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_2_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi0_2_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -PREHOOK: query: select * from multi0_1_mm order by key, key2 -PREHOOK: type: QUERY -PREHOOK: Input: default@multi0_1_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from multi0_1_mm order by key, key2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi0_1_mm -#### A masked pattern was here #### -0 456 -10 456 -97 455 -98 455 -100 457 -103 457 -455 97 -455 98 -456 0 -456 10 -457 100 -457 103 -PREHOOK: query: select * from multi0_2_mm order by key, key2 -PREHOOK: type: QUERY -PREHOOK: Input: default@multi0_2_mm -#### A masked pattern was here #### -POSTHOOK: query: select * from multi0_2_mm order by key, key2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi0_2_mm -#### A masked pattern was here #### -0 456 -10 456 -97 455 -98 455 -100 457 -103 457 -PREHOOK: query: drop table multi0_1_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@multi0_1_mm -PREHOOK: Output: default@multi0_1_mm -POSTHOOK: query: drop table multi0_1_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@multi0_1_mm -POSTHOOK: Output: default@multi0_1_mm -PREHOOK: query: drop table multi0_2_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@multi0_2_mm -PREHOOK: Output: default@multi0_2_mm -POSTHOOK: query: drop table multi0_2_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@multi0_2_mm -POSTHOOK: Output: default@multi0_2_mm -PREHOOK: query: drop table multi1_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table multi1_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@multi1_mm -POSTHOOK: query: create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@multi1_mm -PREHOOK: query: from intermediate -insert into table multi1_mm partition(p=1) select p, key -insert into table multi1_mm partition(p=2) select key, p -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi1_mm@p=1 -PREHOOK: Output: default@multi1_mm@p=2 -POSTHOOK: query: from intermediate -insert into table multi1_mm partition(p=1) select p, key -insert into table multi1_mm partition(p=2) select key, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Output: default@multi1_mm@p=2 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -PREHOOK: query: select * from multi1_mm order by key, key2, p -PREHOOK: type: QUERY -PREHOOK: Input: default@multi1_mm -PREHOOK: Input: default@multi1_mm@p=1 -PREHOOK: Input: default@multi1_mm@p=2 -#### A masked pattern was here #### -POSTHOOK: query: select * from multi1_mm order by key, key2, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi1_mm -POSTHOOK: Input: default@multi1_mm@p=1 -POSTHOOK: Input: default@multi1_mm@p=2 -#### A masked pattern was here #### -0 456 2 -10 456 2 -97 455 2 -98 455 2 -100 457 2 -103 457 2 -455 97 1 -455 98 1 -456 0 1 -456 10 1 -457 100 1 -457 103 1 -PREHOOK: query: from intermediate -insert into table multi1_mm partition(p=2) select p, key -insert overwrite table multi1_mm partition(p=1) select key, p -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi1_mm@p=1 -PREHOOK: Output: default@multi1_mm@p=2 -POSTHOOK: query: from intermediate -insert into table multi1_mm partition(p=2) select p, key -insert overwrite table multi1_mm partition(p=1) select key, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Output: default@multi1_mm@p=2 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select * from multi1_mm order by key, key2, p -PREHOOK: type: QUERY -PREHOOK: Input: default@multi1_mm -PREHOOK: Input: default@multi1_mm@p=1 -PREHOOK: Input: default@multi1_mm@p=2 -#### A masked pattern was here #### -POSTHOOK: query: select * from multi1_mm order by key, key2, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi1_mm -POSTHOOK: Input: default@multi1_mm@p=1 -POSTHOOK: Input: default@multi1_mm@p=2 -#### A masked pattern was here #### -0 456 1 -0 456 2 -10 456 1 -10 456 2 -97 455 1 -97 455 2 -98 455 1 -98 455 2 -100 457 1 -100 457 2 -103 457 1 -103 457 2 -455 97 1 -455 97 2 -455 98 1 -455 98 2 -456 0 1 -456 0 2 -456 10 1 -456 10 2 -457 100 1 -457 100 2 -457 103 1 -457 103 2 -PREHOOK: query: from intermediate -insert into table multi1_mm partition(p) select p, key, p -insert into table multi1_mm partition(p=1) select key, p -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi1_mm -PREHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: query: from intermediate -insert into table multi1_mm partition(p) select p, key, p -insert into table multi1_mm partition(p=1) select key, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Output: default@multi1_mm@p=455 -POSTHOOK: Output: default@multi1_mm@p=456 -POSTHOOK: Output: default@multi1_mm@p=457 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=455).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=456).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=457).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=457).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p -PREHOOK: type: QUERY -PREHOOK: Input: default@multi1_mm -PREHOOK: Input: default@multi1_mm@p=1 -PREHOOK: Input: default@multi1_mm@p=2 -PREHOOK: Input: default@multi1_mm@p=455 -PREHOOK: Input: default@multi1_mm@p=456 -PREHOOK: Input: default@multi1_mm@p=457 -#### A masked pattern was here #### -POSTHOOK: query: select key, key2, p from multi1_mm order by key, key2, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi1_mm -POSTHOOK: Input: default@multi1_mm@p=1 -POSTHOOK: Input: default@multi1_mm@p=2 -POSTHOOK: Input: default@multi1_mm@p=455 -POSTHOOK: Input: default@multi1_mm@p=456 -POSTHOOK: Input: default@multi1_mm@p=457 -#### A masked pattern was here #### -0 456 1 -0 456 1 -0 456 2 -10 456 1 -10 456 1 -10 456 2 -97 455 1 -97 455 1 -97 455 2 -98 455 1 -98 455 1 -98 455 2 -100 457 1 -100 457 1 -100 457 2 -103 457 1 -103 457 1 -103 457 2 -455 97 1 -455 97 2 -455 97 455 -455 98 1 -455 98 2 -455 98 455 -456 0 1 -456 0 2 -456 0 456 -456 10 1 -456 10 2 -456 10 456 -457 100 1 -457 100 2 -457 100 457 -457 103 1 -457 103 2 -457 103 457 -PREHOOK: query: from intermediate -insert into table multi1_mm partition(p) select p, key, 1 -insert into table multi1_mm partition(p=1) select key, p -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@multi1_mm -PREHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: query: from intermediate -insert into table multi1_mm partition(p) select p, key, 1 -insert into table multi1_mm partition(p=1) select key, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p -PREHOOK: type: QUERY -PREHOOK: Input: default@multi1_mm -PREHOOK: Input: default@multi1_mm@p=1 -PREHOOK: Input: default@multi1_mm@p=2 -PREHOOK: Input: default@multi1_mm@p=455 -PREHOOK: Input: default@multi1_mm@p=456 -PREHOOK: Input: default@multi1_mm@p=457 -#### A masked pattern was here #### -POSTHOOK: query: select key, key2, p from multi1_mm order by key, key2, p -POSTHOOK: type: QUERY -POSTHOOK: Input: default@multi1_mm -POSTHOOK: Input: default@multi1_mm@p=1 -POSTHOOK: Input: default@multi1_mm@p=2 -POSTHOOK: Input: default@multi1_mm@p=455 -POSTHOOK: Input: default@multi1_mm@p=456 -POSTHOOK: Input: default@multi1_mm@p=457 -#### A masked pattern was here #### -0 456 1 -0 456 1 -0 456 1 -0 456 2 -10 456 1 -10 456 1 -10 456 1 -10 456 2 -97 455 1 -97 455 1 -97 455 1 -97 455 2 -98 455 1 -98 455 1 -98 455 1 -98 455 2 -100 457 1 -100 457 1 -100 457 1 -100 457 2 -103 457 1 -103 457 1 -103 457 1 -103 457 2 -455 97 1 -455 97 1 -455 97 2 -455 97 455 -455 98 1 -455 98 1 -455 98 2 -455 98 455 -456 0 1 -456 0 1 -456 0 2 -456 0 456 -456 10 1 -456 10 1 -456 10 2 -456 10 456 -457 100 1 -457 100 1 -457 100 2 -457 100 457 -457 103 1 -457 103 1 -457 103 2 -457 103 457 -PREHOOK: query: drop table multi1_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@multi1_mm -PREHOOK: Output: default@multi1_mm -POSTHOOK: query: drop table multi1_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@multi1_mm -POSTHOOK: Output: default@multi1_mm -PREHOOK: query: drop table stats_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table stats_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@stats_mm -POSTHOOK: query: create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats_mm -PREHOOK: query: insert overwrite table stats_mm select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@stats_mm -POSTHOOK: query: insert overwrite table stats_mm select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@stats_mm -POSTHOOK: Lineage: stats_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: desc formatted stats_mm -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_mm -POSTHOOK: query: desc formatted stats_mm -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_mm -# col_name data_type comment - -key int - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 6 - rawDataSize 13 - totalSize 19 - transactional true - transactional_properties insert_only -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: insert into table stats_mm select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Input: default@intermediate@p=457 -PREHOOK: Output: default@stats_mm -POSTHOOK: query: insert into table stats_mm select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Input: default@intermediate@p=457 -POSTHOOK: Output: default@stats_mm -POSTHOOK: Lineage: stats_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: desc formatted stats_mm -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_mm -POSTHOOK: query: desc formatted stats_mm -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_mm -# col_name data_type comment - -key int - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 2 - numRows 12 - rawDataSize 26 - totalSize 38 - transactional true - transactional_properties insert_only -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table stats_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats_mm -PREHOOK: Output: default@stats_mm -POSTHOOK: query: drop table stats_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats_mm -POSTHOOK: Output: default@stats_mm -PREHOOK: query: drop table stats2_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table stats2_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@stats2_mm -POSTHOOK: query: create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats2_mm -POSTHOOK: Lineage: stats2_mm._c0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats2_mm -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats2_mm -POSTHOOK: query: desc formatted stats2_mm -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats2_mm -# col_name data_type comment - -_c0 array - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 500 - rawDataSize 5312 - totalSize 5812 - transactional true - transactional_properties insert_only -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table stats2_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats2_mm -PREHOOK: Output: default@stats2_mm -POSTHOOK: query: drop table stats2_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats2_mm -POSTHOOK: Output: default@stats2_mm -PREHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@skewjoin_mm -POSTHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@skewjoin_mm -PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@skewjoin_mm -POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@skewjoin_mm -POSTHOOK: Lineage: skewjoin_mm.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: skewjoin_mm.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(distinct key) from skewjoin_mm -PREHOOK: type: QUERY -PREHOOK: Input: default@skewjoin_mm -#### A masked pattern was here #### -POSTHOOK: query: select count(distinct key) from skewjoin_mm -POSTHOOK: type: QUERY -POSTHOOK: Input: default@skewjoin_mm -#### A masked pattern was here #### -309 -PREHOOK: query: drop table skewjoin_mm -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@skewjoin_mm -PREHOOK: Output: default@skewjoin_mm -POSTHOOK: query: drop table skewjoin_mm -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@skewjoin_mm -POSTHOOK: Output: default@skewjoin_mm PREHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git ql/src/test/results/clientpositive/mm_insertonly_acid.q.out ql/src/test/results/clientpositive/mm_insertonly_acid.q.out deleted file mode 100644 index 22bdc93..0000000 --- ql/src/test/results/clientpositive/mm_insertonly_acid.q.out +++ /dev/null @@ -1,115 +0,0 @@ -PREHOOK: query: drop table qtr_acid -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table qtr_acid -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table qtr_acid (key int) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@qtr_acid -POSTHOOK: query: create table qtr_acid (key int) partitioned by (p int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@qtr_acid -PREHOOK: query: insert into table qtr_acid partition(p='123') select distinct key from src where key > 0 order by key asc limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@qtr_acid@p=123 -POSTHOOK: query: insert into table qtr_acid partition(p='123') select distinct key from src where key > 0 order by key asc limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@qtr_acid@p=123 -POSTHOOK: Lineage: qtr_acid PARTITION(p=123).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: insert into table qtr_acid partition(p='456') select distinct key from src where key > 0 order by key desc limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@qtr_acid@p=456 -POSTHOOK: query: insert into table qtr_acid partition(p='456') select distinct key from src where key > 0 order by key desc limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@qtr_acid@p=456 -POSTHOOK: Lineage: qtr_acid PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: explain -select * from qtr_acid order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -select * from qtr_acid order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: qtr_acid - Statistics: Num rows: 20 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), p (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 20 Data size: 47 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 47 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 47 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from qtr_acid order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@qtr_acid -PREHOOK: Input: default@qtr_acid@p=123 -PREHOOK: Input: default@qtr_acid@p=456 -#### A masked pattern was here #### -POSTHOOK: query: select * from qtr_acid order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@qtr_acid -POSTHOOK: Input: default@qtr_acid@p=123 -POSTHOOK: Input: default@qtr_acid@p=456 -#### A masked pattern was here #### -9 456 -10 123 -11 123 -85 456 -86 456 -87 456 -90 456 -92 456 -95 456 -96 456 -97 456 -98 456 -100 123 -103 123 -104 123 -105 123 -111 123 -113 123 -114 123 -116 123 -PREHOOK: query: drop table qtr_acid -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@qtr_acid -PREHOOK: Output: default@qtr_acid -POSTHOOK: query: drop table qtr_acid -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@qtr_acid -POSTHOOK: Output: default@qtr_acid