diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index 0feff59..ff09dd8 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -52,6 +52,9 @@ import org.apache.hadoop.fs.Trash; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.conf.HiveConfUtil; +import org.apache.hadoop.hive.io.HdfsUtils; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.shims.Utils; @@ -348,9 +351,13 @@ public static String unescapePathName(String path) { */ public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus, List results) throws IOException { + listStatusRecursively(fs, fileStatus, HIDDEN_FILES_PATH_FILTER, results); + } + public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus, + PathFilter filter, List results) throws IOException { if (fileStatus.isDir()) { - for (FileStatus stat : fs.listStatus(fileStatus.getPath(), HIDDEN_FILES_PATH_FILTER)) { + for (FileStatus stat : fs.listStatus(fileStatus.getPath(), filter)) { listStatusRecursively(fs, stat, results); } } else { @@ -905,11 +912,11 @@ public static boolean deleteTmpFile(File tempFile) { } return false; } - - + + /** * Return whenever all paths in the collection are schemaless - * + * * @param paths * @return */ @@ -924,16 +931,16 @@ public static boolean pathsContainNoScheme(Collection paths) { /** * Returns the deepest candidate path for the given path. - * + * * prioritizes on paths including schema / then includes matches without schema - * + * * @param path * @param candidates the candidate paths * @return */ public static Path getParentRegardlessOfScheme(Path path, Collection candidates) { Path schemalessPath = Path.getPathWithoutSchemeAndAuthority(path); - + for(;path!=null && schemalessPath!=null; path=path.getParent(),schemalessPath=schemalessPath.getParent()){ if(candidates.contains(path)) return path; @@ -946,13 +953,13 @@ public static Path getParentRegardlessOfScheme(Path path, Collection candi /** * Checks whenever path is inside the given subtree - * + * * return true iff * * path = subtree * * subtreeContains(path,d) for any descendant of the subtree node * @param path the path in question * @param subtree - * + * * @return */ public static boolean isPathWithinSubtree(Path path, Path subtree) { diff --git a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java index 7c9d72f..745a868 100644 --- a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java @@ -19,16 +19,20 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.conf.HiveConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.Lists; + /** * HiveStatsUtils. * A collection of utilities used for hive statistics. @@ -50,15 +54,26 @@ * @return array of FileStatus * @throws IOException */ - public static FileStatus[] getFileStatusRecurse(Path path, int level, FileSystem fs) + public static FileStatus[] getFileStatusRecurse(Path path, int level, FileSystem fs) throws IOException { + return getFileStatusRecurse(path, level, fs, FileUtils.HIDDEN_FILES_PATH_FILTER, false); + } + + public static FileStatus[] getFileStatusRecurse( + Path path, int level, FileSystem fs, PathFilter filter) throws IOException { + return getFileStatusRecurse(path, level, fs, filter, false); + } + + public static FileStatus[] getFileStatusRecurse( + Path path, int level, FileSystem fs, PathFilter filter, boolean allLevelsBelow) + throws IOException { // if level is <0, the return all files/directories under the specified path - if ( level < 0) { + if (level < 0) { List result = new ArrayList(); try { FileStatus fileStatus = fs.getFileStatus(path); - FileUtils.listStatusRecursively(fs, fileStatus, result); + FileUtils.listStatusRecursively(fs, fileStatus, filter, result); } catch (IOException e) { // globStatus() API returns empty FileStatus[] when the specified path // does not exist. But getFileStatus() throw IOException. To mimic the @@ -75,7 +90,31 @@ sb.append(Path.SEPARATOR).append("*"); } Path pathPattern = new Path(path, sb.toString()); - return fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER); + if (!allLevelsBelow) { + return fs.globStatus(pathPattern, filter); + } + LinkedList queue = new LinkedList<>(); + List results = new ArrayList(); + for (FileStatus status : fs.globStatus(pathPattern)) { + if (filter.accept(status.getPath())) { + results.add(status); + } + if (status.isDirectory()) { + queue.add(status); + } + } + while (!queue.isEmpty()) { + FileStatus status = queue.poll(); + for (FileStatus child : fs.listStatus(status.getPath())) { + if (filter.accept(child.getPath())) { + results.add(child); + } + if (child.isDirectory()) { + queue.add(child); + } + } + } + return results.toArray(new FileStatus[results.size()]); } public static int getNumBitVectorsForNDVEstimation(Configuration conf) throws Exception { diff --git a/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java b/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java index b224d26..8949788 100644 --- a/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java @@ -28,6 +28,8 @@ import java.util.Arrays; import java.util.List; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,6 +39,10 @@ */ public final class JavaUtils { + public static final String DELTA_PREFIX = "delta"; + public static final String DELTA_DIGITS = "%07d"; + public static final int DELTA_DIGITS_LEN = 7; + public static final String STATEMENT_DIGITS = "%04d"; private static final Logger LOG = LoggerFactory.getLogger(JavaUtils.class); private static final Method SUN_MISC_UTIL_RELEASE; @@ -158,4 +164,65 @@ public static String txnIdsToString(List txnIds) { private JavaUtils() { // prevent instantiation } + + public static Long extractTxnId(Path file) { + String fileName = file.getName(); + String[] parts = fileName.split("_", 4); // e.g. delta_0000001_0000001_0000 + if (parts.length < 4 || !DELTA_PREFIX.equals(parts[0])) { + LOG.debug("Cannot extract transaction ID for a MM table: " + file + + " (" + Arrays.toString(parts) + ")"); + return null; + } + long writeId = -1; + try { + writeId = Long.parseLong(parts[1]); + } catch (NumberFormatException ex) { + LOG.debug("Cannot extract transaction ID for a MM table: " + file + + "; parsing " + parts[1] + " got " + ex.getMessage()); + return null; + } + return writeId; + } + + public static class IdPathFilter implements PathFilter { + private final String mmDirName; + private final boolean isMatch, isIgnoreTemp; + public IdPathFilter(long writeId, int stmtId, boolean isMatch) { + this(writeId, stmtId, isMatch, false); + } + public IdPathFilter(long writeId, int stmtId, boolean isMatch, boolean isIgnoreTemp) { + this.mmDirName = DELTA_PREFIX + "_" + String.format(DELTA_DIGITS, writeId) + "_" + + String.format(DELTA_DIGITS, writeId) + "_" + String.format(STATEMENT_DIGITS, stmtId); + this.isMatch = isMatch; + this.isIgnoreTemp = isIgnoreTemp; + } + + @Override + public boolean accept(Path path) { + String name = path.getName(); + if (name.equals(mmDirName)) { + return isMatch; + } + if (isIgnoreTemp && name.length() > 0) { + char c = name.charAt(0); + if (c == '.' || c == '_') return false; // Regardless of isMatch, ignore this. + } + return !isMatch; + } + } + + public static class AnyIdDirFilter implements PathFilter { + @Override + public boolean accept(Path path) { + String name = path.getName(); + if (!name.startsWith(DELTA_PREFIX + "_")) return false; + String idStr = name.substring(DELTA_PREFIX.length() + 1, DELTA_PREFIX.length() + 1 + DELTA_DIGITS_LEN); + try { + Long.parseLong(idStr); + } catch (NumberFormatException ex) { + return false; + } + return true; + } + } } diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index cf3f50b..5e90757 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1274,6 +1274,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVETESTMODE("hive.test.mode", false, "Whether Hive is running in test mode. If yes, it turns on sampling and prefixes the output tablename.", false), + HIVEEXIMTESTMODE("hive.exim.test.mode", false, + "The subset of test mode that only enables custom path handling for ExIm.", false), HIVETESTMODEPREFIX("hive.test.mode.prefix", "test_", "In test mode, specfies prefixes for the output table", false), HIVETESTMODESAMPLEFREQ("hive.test.mode.samplefreq", 32, @@ -1862,8 +1864,15 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "hive.lock.numretries and hive.lock.sleep.between.retries."), HIVE_TXN_OPERATIONAL_PROPERTIES("hive.txn.operational.properties", 1, - "This is intended to be used as an internal property for future versions of ACID. (See\n" + - "HIVE-14035 for details.)"), + "Sets the operational properties that control the appropriate behavior for various\n" + + "versions of the Hive ACID subsystem. Mostly it is intended to be used as an internal property\n" + + "for future versions of ACID. (See HIVE-14035 for details.)\n" + + "0: Turn on the legacy mode for ACID\n" + + "1: Enable split-update feature found in the newer version of Hive ACID subsystem\n" + + "2: Hash-based merge, which combines delta files using GRACE hash join based approach (not implemented)\n" + + "3: Make the table 'quarter-acid' as it only supports insert. But it doesn't require ORC or bucketing.\n" + + "This is intended to be used as an internal property for future versions of ACID. (See\n" + + "HIVE-14035 for details.)"), HIVE_MAX_OPEN_TXNS("hive.max.open.txns", 100000, "Maximum number of open transactions. If \n" + "current open transactions reach this limit, future open transaction requests will be \n" + @@ -3458,6 +3467,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Log tracing id that can be used by upstream clients for tracking respective logs. " + "Truncated to " + LOG_PREFIX_LENGTH + " characters. Defaults to use auto-generated session id."), + HIVE_MM_AVOID_GLOBSTATUS_ON_S3("hive.mm.avoid.s3.globstatus", true, + "Whether to use listFiles (optimized on S3) instead of globStatus when on S3."), HIVE_CONF_RESTRICTED_LIST("hive.conf.restricted.list", "hive.security.authenticator.manager,hive.security.authorization.manager," + diff --git a/common/src/test/org/apache/hive/common/util/MockFileSystem.java b/common/src/test/org/apache/hive/common/util/MockFileSystem.java new file mode 100644 index 0000000..e65fd33 --- /dev/null +++ b/common/src/test/org/apache/hive/common/util/MockFileSystem.java @@ -0,0 +1,622 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.common.util; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FSInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Progressable; + +public class MockFileSystem extends FileSystem { + final List files = new ArrayList(); + final Map fileStatusMap = new HashMap<>(); + Path workingDir = new Path("/"); + // statics for when the mock fs is created via FileSystem.get + private static String blockedUgi = null; + private final static List globalFiles = new ArrayList(); + protected Statistics statistics; + public boolean allowDelete = false; + + public MockFileSystem() { + // empty + } + + @Override + public void initialize(URI uri, Configuration conf) { + setConf(conf); + statistics = getStatistics("mock", getClass()); + } + + public MockFileSystem(Configuration conf, MockFile... files) { + setConf(conf); + this.files.addAll(Arrays.asList(files)); + statistics = getStatistics("mock", getClass()); + } + + public static void setBlockedUgi(String s) { + blockedUgi = s; + } + + public void clear() { + files.clear(); + } + + @Override + public URI getUri() { + try { + return new URI("mock:///"); + } catch (URISyntaxException err) { + throw new IllegalArgumentException("huh?", err); + } + } + + // increments file modification time + public void touch(MockFile file) { + if (fileStatusMap.containsKey(file)) { + FileStatus fileStatus = fileStatusMap.get(file); + FileStatus fileStatusNew = new FileStatus(fileStatus.getLen(), fileStatus.isDirectory(), + fileStatus.getReplication(), fileStatus.getBlockSize(), + fileStatus.getModificationTime() + 1, fileStatus.getAccessTime(), + fileStatus.getPermission(), fileStatus.getOwner(), fileStatus.getGroup(), + fileStatus.getPath()); + fileStatusMap.put(file, fileStatusNew); + } + } + + @SuppressWarnings("serial") + public static class MockAccessDenied extends IOException { + } + + @Override + public FSDataInputStream open(Path path, int i) throws IOException { + statistics.incrementReadOps(1); + checkAccess(); + MockFile file = findFile(path); + if (file != null) return new FSDataInputStream(new MockInputStream(file)); + throw new IOException("File not found: " + path); + } + + public MockFile findFile(Path path) { + for (MockFile file: files) { + if (file.path.equals(path)) { + return file; + } + } + for (MockFile file: globalFiles) { + if (file.path.equals(path)) { + return file; + } + } + return null; + } + + private void checkAccess() throws IOException { + if (blockedUgi == null) return; + if (!blockedUgi.equals(UserGroupInformation.getCurrentUser().getShortUserName())) return; + throw new MockAccessDenied(); + } + + @Override + public FSDataOutputStream create(Path path, FsPermission fsPermission, + boolean overwrite, int bufferSize, + short replication, long blockSize, + Progressable progressable + ) throws IOException { + statistics.incrementWriteOps(1); + checkAccess(); + MockFile file = findFile(path); + if (file == null) { + file = new MockFile(path.toString(), (int) blockSize, new byte[0]); + files.add(file); + } + return new MockOutputStream(file); + } + + @Override + public FSDataOutputStream append(Path path, int bufferSize, + Progressable progressable + ) throws IOException { + statistics.incrementWriteOps(1); + checkAccess(); + return create(path, FsPermission.getDefault(), true, bufferSize, + (short) 3, 256 * 1024, progressable); + } + + @Override + public boolean rename(Path path, Path path2) throws IOException { + statistics.incrementWriteOps(1); + checkAccess(); + return false; + } + + @Override + public boolean delete(Path path) throws IOException { + statistics.incrementWriteOps(1); + checkAccess(); + return false; + } + + @Override + public boolean delete(Path path, boolean isRecursive) throws IOException { + statistics.incrementWriteOps(1); + checkAccess(); + return allowDelete && isRecursive && deleteMatchingFiles(files, path.toString()); + } + + @Override + public RemoteIterator listLocatedStatus(final Path f) + throws IOException { + return new RemoteIterator() { + private Iterator iterator = listLocatedFileStatuses(f).iterator(); + + @Override + public boolean hasNext() throws IOException { + return iterator.hasNext(); + } + + @Override + public LocatedFileStatus next() throws IOException { + return iterator.next(); + } + }; + } + + private List listLocatedFileStatuses(Path path) throws IOException { + statistics.incrementReadOps(1); + checkAccess(); + path = path.makeQualified(this); + List result = new ArrayList<>(); + String pathname = path.toString(); + String pathnameAsDir = pathname + "/"; + Set dirs = new TreeSet(); + MockFile file = findFile(path); + if (file != null) { + result.add(createLocatedStatus(file)); + return result; + } + findMatchingLocatedFiles(files, pathnameAsDir, dirs, result); + findMatchingLocatedFiles(globalFiles, pathnameAsDir, dirs, result); + // for each directory add it once + for(String dir: dirs) { + result.add(createLocatedDirectory(new MockPath(this, pathnameAsDir + dir))); + } + return result; + } + + @Override + public FileStatus[] listStatus(Path path) throws IOException { + statistics.incrementReadOps(1); + checkAccess(); + path = path.makeQualified(this); + List result = new ArrayList(); + String pathname = path.toString(); + String pathnameAsDir = pathname + "/"; + Set dirs = new TreeSet(); + MockFile file = findFile(path); + if (file != null) { + return new FileStatus[]{createStatus(file)}; + } + findMatchingFiles(files, pathnameAsDir, dirs, result); + findMatchingFiles(globalFiles, pathnameAsDir, dirs, result); + // for each directory add it once + for(String dir: dirs) { + result.add(createDirectory(new MockPath(this, pathnameAsDir + dir))); + } + return result.toArray(new FileStatus[result.size()]); + } + + private void findMatchingFiles( + List files, String pathnameAsDir, Set dirs, List result) { + for (MockFile file: files) { + String filename = file.path.toString(); + if (filename.startsWith(pathnameAsDir)) { + String tail = filename.substring(pathnameAsDir.length()); + int nextSlash = tail.indexOf('/'); + if (nextSlash > 0) { + dirs.add(tail.substring(0, nextSlash)); + } else { + result.add(createStatus(file)); + } + } + } + } + + private boolean deleteMatchingFiles(List files, String path) { + Iterator fileIter = files.iterator(); + boolean result = true; + while (fileIter.hasNext()) { + MockFile file = fileIter.next(); + String filename = file.path.toString(); + if (!filename.startsWith(path)) continue; + if (filename.length() <= path.length() || filename.charAt(path.length()) != '/') continue; + if (file.cannotDelete) { + result = false; + continue; + } + assert !file.isDeleted; + file.isDeleted = true; + fileIter.remove(); + } + return result; + } + + private void findMatchingLocatedFiles( + List files, String pathnameAsDir, Set dirs, List result) + throws IOException { + for (MockFile file: files) { + String filename = file.path.toString(); + if (filename.startsWith(pathnameAsDir)) { + String tail = filename.substring(pathnameAsDir.length()); + int nextSlash = tail.indexOf('/'); + if (nextSlash > 0) { + dirs.add(tail.substring(0, nextSlash)); + } else { + result.add(createLocatedStatus(file)); + } + } + } + } + + @Override + public void setWorkingDirectory(Path path) { + workingDir = path; + } + + @Override + public Path getWorkingDirectory() { + return workingDir; + } + + @Override + public boolean mkdirs(Path path, FsPermission fsPermission) { + statistics.incrementWriteOps(1); + return false; + } + + private FileStatus createStatus(MockFile file) { + if (fileStatusMap.containsKey(file)) { + return fileStatusMap.get(file); + } + FileStatus fileStatus = new FileStatus(file.length, false, 1, file.blockSize, 0, 0, + FsPermission.createImmutable((short) 644), "owen", "group", + file.path); + fileStatusMap.put(file, fileStatus); + return fileStatus; + } + + private FileStatus createDirectory(Path dir) { + return new FileStatus(0, true, 0, 0, 0, 0, + FsPermission.createImmutable((short) 755), "owen", "group", dir); + } + + private LocatedFileStatus createLocatedStatus(MockFile file) throws IOException { + FileStatus fileStatus = createStatus(file); + return new LocatedFileStatus(fileStatus, + getFileBlockLocationsImpl(fileStatus, 0, fileStatus.getLen(), false)); + } + + private LocatedFileStatus createLocatedDirectory(Path dir) throws IOException { + FileStatus fileStatus = createDirectory(dir); + return new LocatedFileStatus(fileStatus, + getFileBlockLocationsImpl(fileStatus, 0, fileStatus.getLen(), false)); + } + + @Override + public FileStatus getFileStatus(Path path) throws IOException { + statistics.incrementReadOps(1); + checkAccess(); + path = path.makeQualified(this); + String pathnameAsDir = path.toString() + "/"; + MockFile file = findFile(path); + if (file != null) return createStatus(file); + for (MockFile dir : files) { + if (dir.path.toString().startsWith(pathnameAsDir)) { + return createDirectory(path); + } + } + for (MockFile dir : globalFiles) { + if (dir.path.toString().startsWith(pathnameAsDir)) { + return createDirectory(path); + } + } + throw new FileNotFoundException("File " + path + " does not exist"); + } + + @Override + public BlockLocation[] getFileBlockLocations(FileStatus stat, + long start, long len) throws IOException { + return getFileBlockLocationsImpl(stat, start, len, true); + } + + private BlockLocation[] getFileBlockLocationsImpl(final FileStatus stat, final long start, + final long len, + final boolean updateStats) throws IOException { + if (updateStats) { + statistics.incrementReadOps(1); + } + checkAccess(); + List result = new ArrayList(); + MockFile file = findFile(stat.getPath()); + if (file != null) { + for(MockBlock block: file.blocks) { + if (getOverlap(block.offset, block.length, start, len) > 0) { + String[] topology = new String[block.hosts.length]; + for(int i=0; i < topology.length; ++i) { + topology[i] = "/rack/ " + block.hosts[i]; + } + result.add(new BlockLocation(block.hosts, block.hosts, + topology, block.offset, block.length)); + } + } + return result.toArray(new BlockLocation[result.size()]); + } + return new BlockLocation[0]; + } + + + /** + * Compute the number of bytes that overlap between the two ranges. + * @param offset1 start of range1 + * @param length1 length of range1 + * @param offset2 start of range2 + * @param length2 length of range2 + * @return the number of bytes in the overlap range + */ + private static long getOverlap(long offset1, long length1, long offset2, long length2) { + // c/p from OrcInputFormat + long end1 = offset1 + length1; + long end2 = offset2 + length2; + if (end2 <= offset1 || end1 <= offset2) { + return 0; + } else { + return Math.min(end1, end2) - Math.max(offset1, offset2); + } + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + buffer.append("mockFs{files:["); + for(int i=0; i < files.size(); ++i) { + if (i != 0) { + buffer.append(", "); + } + buffer.append(files.get(i)); + } + buffer.append("]}"); + return buffer.toString(); + } + + public static void addGlobalFile(MockFile mockFile) { + globalFiles.add(mockFile); + } + + public static void clearGlobalFiles() { + globalFiles.clear(); + } + + + public static class MockBlock { + int offset; + int length; + final String[] hosts; + + public MockBlock(String... hosts) { + this.hosts = hosts; + } + + public void setOffset(int offset) { + this.offset = offset; + } + + public void setLength(int length) { + this.length = length; + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + buffer.append("block{offset: "); + buffer.append(offset); + buffer.append(", length: "); + buffer.append(length); + buffer.append(", hosts: ["); + for(int i=0; i < hosts.length; i++) { + if (i != 0) { + buffer.append(", "); + } + buffer.append(hosts[i]); + } + buffer.append("]}"); + return buffer.toString(); + } + } + + public static class MockFile { + public final Path path; + public int blockSize; + public int length; + public MockBlock[] blocks; + public byte[] content; + public boolean cannotDelete = false; + // This is purely for testing convenience; has no bearing on FS operations such as list. + public boolean isDeleted = false; + + public MockFile(String path, int blockSize, byte[] content, + MockBlock... blocks) { + this.path = new Path(path); + this.blockSize = blockSize; + this.blocks = blocks; + this.content = content; + this.length = content.length; + int offset = 0; + for(MockBlock block: blocks) { + block.offset = offset; + block.length = Math.min(length - offset, blockSize); + offset += block.length; + } + } + + @Override + public int hashCode() { + return path.hashCode() + 31 * length; + } + + @Override + public boolean equals(final Object obj) { + if (!(obj instanceof MockFile)) { return false; } + return ((MockFile) obj).path.equals(this.path) && ((MockFile) obj).length == this.length; + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + buffer.append("mockFile{path: "); + buffer.append(path.toString()); + buffer.append(", blkSize: "); + buffer.append(blockSize); + buffer.append(", len: "); + buffer.append(length); + buffer.append(", blocks: ["); + for(int i=0; i < blocks.length; i++) { + if (i != 0) { + buffer.append(", "); + } + buffer.append(blocks[i]); + } + buffer.append("]}"); + return buffer.toString(); + } + } + + static class MockInputStream extends FSInputStream { + final MockFile file; + int offset = 0; + + public MockInputStream(MockFile file) throws IOException { + this.file = file; + } + + @Override + public void seek(long offset) throws IOException { + this.offset = (int) offset; + } + + @Override + public long getPos() throws IOException { + return offset; + } + + @Override + public boolean seekToNewSource(long l) throws IOException { + return false; + } + + @Override + public int read() throws IOException { + if (offset < file.length) { + return file.content[offset++] & 0xff; + } + return -1; + } + } + + public static class MockPath extends Path { + private final FileSystem fs; + public MockPath(FileSystem fs, String path) { + super(path); + this.fs = fs; + } + @Override + public FileSystem getFileSystem(Configuration conf) { + return fs; + } + } + + public static class MockOutputStream extends FSDataOutputStream { + public final MockFile file; + + public MockOutputStream(MockFile file) throws IOException { + super(new DataOutputBuffer(), null); + this.file = file; + } + + /** + * Set the blocks and their location for the file. + * Must be called after the stream is closed or the block length will be + * wrong. + * @param blocks the list of blocks + */ + public void setBlocks(MockBlock... blocks) { + file.blocks = blocks; + int offset = 0; + int i = 0; + while (offset < file.length && i < blocks.length) { + blocks[i].offset = offset; + blocks[i].length = Math.min(file.length - offset, file.blockSize); + offset += blocks[i].length; + i += 1; + } + } + + @Override + public void close() throws IOException { + super.close(); + DataOutputBuffer buf = (DataOutputBuffer) getWrappedStream(); + file.length = buf.getLength(); + file.content = new byte[file.length]; + MockBlock block = new MockBlock("host1"); + block.setLength(file.length); + setBlocks(block); + System.arraycopy(buf.getData(), 0, file.content, 0, file.length); + } + + @Override + public String toString() { + return "Out stream to " + file.toString(); + } + } + + public void addFile(MockFile file) { + files.add(file); + } +} \ No newline at end of file diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java index 847c3bc..94b620a 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java @@ -732,7 +732,7 @@ private void discoverPartitions(JobContext context) throws IOException { for (FileStatus st : status) { LinkedHashMap fullPartSpec = new LinkedHashMap(); if (!customDynamicLocationUsed) { - Warehouse.makeSpecFromName(fullPartSpec, st.getPath()); + Warehouse.makeSpecFromName(fullPartSpec, st.getPath(), null); } else { HCatFileUtil.getPartKeyValuesForCustomLocation(fullPartSpec, jobInfo, st.getPath().toString()); diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java index f9e71f0..cf0bb59 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Index; @@ -113,6 +114,10 @@ public static void setOutput(Configuration conf, Credentials credentials, throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported"); } + if (MetaStoreUtils.isInsertOnlyTable(table.getParameters())) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into an insert-only ACID table from Pig/Mapreduce is not supported"); + } + // Set up a common id hash for this job, so that when we create any temporary directory // later on, it is guaranteed to be unique. String idHash; diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index 8d861e4..0cafaa5 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -20,6 +20,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java index 8d5530f..39f98e4 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java @@ -104,7 +104,7 @@ protected void setUp() { db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true); db.createTable(src, cols, null, TextInputFormat.class, IgnoreKeyTextOutputFormat.class); - db.loadTable(hadoopDataFile[i], src, false, false, false, false, false); + db.loadTable(hadoopDataFile[i], src, false, false, false, false, false, null, 0, false); i++; } diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index d9e760f..0e1f1eb 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -203,6 +203,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ metadata_only_queries.q,\ metadata_only_queries_with_filters.q,\ metadataonly1.q,\ + mm_conversions.q,\ mrr.q,\ nonmr_fetch_threshold.q,\ optimize_nullscan.q,\ diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java index 981e52f..de19b1d 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java @@ -92,8 +92,7 @@ public static EncodingWriter create(InputFormat sourceIf, Deserializer ser return new DeserializerOrcWriter(serDe, sourceOi, allocSize); } Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath); - PartitionDesc partDesc = HiveFileFormatUtils.getPartitionDescFromPathRecursively( - parts, path, null); + PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, path, null); if (partDesc == null) { LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path); return new DeserializerOrcWriter(serDe, sourceOi, allocSize); diff --git a/metastore/scripts/upgrade/derby/hive-schema-2.2.0.derby.sql b/metastore/scripts/upgrade/derby/hive-schema-2.2.0.derby.sql index 16e611c..6dd3dee 100644 --- a/metastore/scripts/upgrade/derby/hive-schema-2.2.0.derby.sql +++ b/metastore/scripts/upgrade/derby/hive-schema-2.2.0.derby.sql @@ -112,6 +112,7 @@ ALTER TABLE "APP"."KEY_CONSTRAINTS" ADD CONSTRAINT "CONSTRAINTS_PK" PRIMARY KEY CREATE INDEX "APP"."CONSTRAINTS_PARENT_TBL_ID_INDEX" ON "APP"."KEY_CONSTRAINTS"("PARENT_TBL_ID"); + -- ---------------------------------------------- -- DDL Statements for indexes -- ---------------------------------------------- diff --git a/metastore/scripts/upgrade/mssql/hive-schema-2.2.0.mssql.sql b/metastore/scripts/upgrade/mssql/hive-schema-2.2.0.mssql.sql index 33730de..b6fdc7b 100644 --- a/metastore/scripts/upgrade/mssql/hive-schema-2.2.0.mssql.sql +++ b/metastore/scripts/upgrade/mssql/hive-schema-2.2.0.mssql.sql @@ -593,6 +593,7 @@ CREATE TABLE NOTIFICATION_SEQUENCE ALTER TABLE NOTIFICATION_SEQUENCE ADD CONSTRAINT NOTIFICATION_SEQUENCE_PK PRIMARY KEY (NNI_ID); + -- Constraints for table MASTER_KEYS for class(es) [org.apache.hadoop.hive.metastore.model.MMasterKey] -- Constraints for table IDXS for class(es) [org.apache.hadoop.hive.metastore.model.MIndex] diff --git a/metastore/scripts/upgrade/oracle/hive-schema-2.2.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-2.2.0.oracle.sql index ef22134..4aaa5e7 100644 --- a/metastore/scripts/upgrade/oracle/hive-schema-2.2.0.oracle.sql +++ b/metastore/scripts/upgrade/oracle/hive-schema-2.2.0.oracle.sql @@ -799,7 +799,6 @@ ALTER TABLE KEY_CONSTRAINTS ADD CONSTRAINT CONSTRAINTS_PK PRIMARY KEY (CONSTRAIN CREATE INDEX CONSTRAINTS_PT_INDEX ON KEY_CONSTRAINTS(PARENT_TBL_ID); - ------------------------------ -- Transaction and lock tables ------------------------------ diff --git a/metastore/scripts/upgrade/postgres/hive-schema-2.2.0.postgres.sql b/metastore/scripts/upgrade/postgres/hive-schema-2.2.0.postgres.sql index 87e1ab9..5feab4e 100644 --- a/metastore/scripts/upgrade/postgres/hive-schema-2.2.0.postgres.sql +++ b/metastore/scripts/upgrade/postgres/hive-schema-2.2.0.postgres.sql @@ -606,6 +606,7 @@ CREATE TABLE "KEY_CONSTRAINTS" CREATE INDEX "CONSTRAINTS_PARENT_TBLID_INDEX" ON "KEY_CONSTRAINTS" USING BTREE ("PARENT_TBL_ID"); + -- -- Name: BUCKETING_COLS_pkey; Type: CONSTRAINT; Schema: public; Owner: hiveuser; Tablespace: -- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 5812a1b..ab5f7b7 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -157,6 +157,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import static org.apache.commons.lang.StringUtils.join; + import com.facebook.fb303.FacebookBase; import com.facebook.fb303.fb_status; import com.google.common.annotations.VisibleForTesting; @@ -194,11 +197,10 @@ protected DateFormat initialValue() { }; }; - /** - * default port on which to start the Hive server - */ public static final String ADMIN = "admin"; public static final String PUBLIC = "public"; + /** MM write states. */ + public static final char MM_WRITE_OPEN = 'o', MM_WRITE_COMMITTED = 'c', MM_WRITE_ABORTED = 'a'; private static HadoopThriftAuthBridge.Server saslServer; private static MetastoreDelegationTokenManager delegationTokenManager; @@ -503,6 +505,7 @@ public void init() throws MetaException { tableCount = Metrics.getOrCreateGauge(MetricsConstants.TOTAL_TABLES); partCount = Metrics.getOrCreateGauge(MetricsConstants.TOTAL_PARTITIONS); updateMetrics(); + } preListeners = MetaStoreUtils.getMetaStoreListeners(MetaStorePreEventListener.class, @@ -1362,13 +1365,7 @@ public Type get_type(final String name) throws MetaException, NoSuchObjectExcept } } catch (Exception e) { ex = e; - if (e instanceof MetaException) { - throw (MetaException) e; - } else if (e instanceof NoSuchObjectException) { - throw (NoSuchObjectException) e; - } else { - throw newMetaException(e); - } + throwMetaException(e); } finally { endFunction("get_type", ret != null, ex); } @@ -1380,26 +1377,6 @@ private boolean is_type_exists(RawStore ms, String typeName) return (ms.getType(typeName) != null); } - private void drop_type_core(final RawStore ms, String typeName) - throws NoSuchObjectException, MetaException { - boolean success = false; - try { - ms.openTransaction(); - // drop any partitions - if (!is_type_exists(ms, typeName)) { - throw new NoSuchObjectException(typeName + " doesn't exist"); - } - if (!ms.dropType(typeName)) { - throw new MetaException("Unable to drop type " + typeName); - } - success = ms.commitTransaction(); - } finally { - if (!success) { - ms.rollbackTransaction(); - } - } - } - @Override public boolean drop_type(final String name) throws MetaException, NoSuchObjectException { startFunction("drop_type", ": " + name); @@ -1411,13 +1388,7 @@ public boolean drop_type(final String name) throws MetaException, NoSuchObjectEx success = getMS().dropType(name); } catch (Exception e) { ex = e; - if (e instanceof MetaException) { - throw (MetaException) e; - } else if (e instanceof NoSuchObjectException) { - throw (NoSuchObjectException) e; - } else { - throw newMetaException(e); - } + throwMetaException(e); } finally { endFunction("drop_type", success, ex); } @@ -2169,7 +2140,7 @@ private void deletePartitionData(List partPaths, boolean ifPurge) { //No drop part listener events fired for public listeners historically, for drop table case. //Limiting to internal listeners for now, to avoid unexpected calls for public listeners. if (listener instanceof HMSMetricsListener) { - for (Partition part : partsToDelete) { + for (@SuppressWarnings("unused") Partition part : partsToDelete) { listener.onDropPartition(null); } } @@ -2201,13 +2172,7 @@ public void drop_table_with_environment_context(final String dbname, final Strin throw new MetaException(e.getMessage()); } catch (Exception e) { ex = e; - if (e instanceof MetaException) { - throw (MetaException) e; - } else if (e instanceof NoSuchObjectException) { - throw (NoSuchObjectException) e; - } else { - throw newMetaException(e); - } + throwMetaException(e); } finally { endFunction("drop_table", success, ex, name); } @@ -2405,6 +2370,10 @@ private Table getTableInternal(String dbname, String name, Exception ex = null; try { t = get_table_core(dbname, name); + if (MetaStoreUtils.isInsertOnlyTable(t.getParameters())) { + assertClientHasCapability(capabilities, ClientCapability.INSERT_ONLY_TABLES, + "insert-only tables", "get_table_req"); + } firePreEvent(new PreReadTableEvent(t, this)); } catch (MetaException e) { ex = e; @@ -2448,7 +2417,7 @@ private Table getTableInternal(String dbname, String name, */ public Table get_table_core(final String dbname, final String name) throws MetaException, NoSuchObjectException { - Table t; + Table t = null; try { t = getMS().getTable(dbname, name); if (t == null) { @@ -2456,13 +2425,7 @@ public Table get_table_core(final String dbname, final String name) throws MetaE + " table not found"); } } catch (Exception e) { - if (e instanceof MetaException) { - throw (MetaException) e; - } else if (e instanceof NoSuchObjectException) { - throw (NoSuchObjectException) e; - } else { - throw newMetaException(e); - } + throwMetaException(e); } return t; } @@ -2513,8 +2476,7 @@ public GetTablesResult get_table_objects_by_name_req(GetTablesRequest req) throw if (dbName == null || dbName.isEmpty()) { throw new UnknownDBException("DB name is null or empty"); } - if (tableNames == null) - { + if (tableNames == null) { throw new InvalidOperationException(dbName + " cannot find null tables"); } @@ -2539,6 +2501,12 @@ public GetTablesResult get_table_objects_by_name_req(GetTablesRequest req) throw tables.addAll(ms.getTableObjectsByName(dbName, distinctTableNames.subList(startIndex, endIndex))); startIndex = endIndex; } + for (Table t : tables) { + if (MetaStoreUtils.isInsertOnlyTable(t.getParameters())) { + assertClientHasCapability(capabilities, ClientCapability.INSERT_ONLY_TABLES, + "insert-only tables", "get_table_req"); + } + } } catch (Exception e) { ex = e; if (e instanceof MetaException) { @@ -2876,7 +2844,7 @@ public boolean equals(Object obj) { throw new RuntimeException(e); } - partFutures.add(threadPool.submit(new Callable() { + partFutures.add(threadPool.submit(new Callable() { @Override public Partition call() throws Exception { ugi.doAs(new PrivilegedExceptionAction() { @@ -3113,7 +3081,7 @@ public Object run() throws Exception { try { for (Future partFuture : partFutures) { - Partition part = partFuture.get(); + partFuture.get(); } } catch (InterruptedException | ExecutionException e) { // cancel other tasks @@ -3814,13 +3782,7 @@ public Partition get_partition(final String db_name, final String tbl_name, ret = getMS().getPartition(db_name, tbl_name, part_vals); } catch (Exception e) { ex = e; - if (e instanceof MetaException) { - throw (MetaException) e; - } else if (e instanceof NoSuchObjectException) { - throw (NoSuchObjectException) e; - } else { - throw newMetaException(e); - } + throwMetaException(e); } finally { endFunction("get_partition", ret != null, ex, tbl_name); } @@ -3886,13 +3848,7 @@ public Partition get_partition_with_auth(final String db_name, ret = getMS().getPartitions(db_name, tbl_name, max_parts); } catch (Exception e) { ex = e; - if (e instanceof MetaException) { - throw (MetaException) e; - } else if (e instanceof NoSuchObjectException) { - throw (NoSuchObjectException) e; - } else { - throw newMetaException(e); - } + throwMetaException(e); } finally { endFunction("get_partitions", ret != null, ex, tbl_name); } @@ -4546,6 +4502,7 @@ private void alter_table_core(final String dbname, final String name, final Tabl } } + @SuppressWarnings("deprecation") Deserializer s = MetaStoreUtils.getDeserializer(curConf, tbl, false); ret = MetaStoreUtils.getFieldsFromDeserializer(tableName, s); } catch (SerDeException e) { @@ -6549,7 +6506,7 @@ public boolean partition_name_has_valid_characters(List part_vals, throw newMetaException(e); } } - endFunction("partition_name_has_valid_characters", true, null); + endFunction("partition_name_has_valid_characters", true, ex); return ret; } @@ -6893,21 +6850,6 @@ public GetRoleGrantsForPrincipalResponse get_role_grants_for_principal( return new GetRoleGrantsForPrincipalResponse(roleMaps); } - /** - * Convert each MRoleMap object into a thrift RolePrincipalGrant object - * @param roles - * @return - */ - private List getRolePrincipalGrants(List roles) throws MetaException { - List rolePrinGrantList = new ArrayList(); - if (roles != null) { - for (Role role : roles) { - rolePrinGrantList.addAll(getMS().listRoleMembers(role.getRoleName())); - } - } - return rolePrinGrantList; - } - @Override public AggrStats get_aggr_stats_for(PartitionsStatsRequest request) throws NoSuchObjectException, MetaException, TException { @@ -7270,13 +7212,7 @@ public PrimaryKeysResponse get_primary_keys(PrimaryKeysRequest request) ret = getMS().getPrimaryKeys(db_name, tbl_name); } catch (Exception e) { ex = e; - if (e instanceof MetaException) { - throw (MetaException) e; - } else if (e instanceof NoSuchObjectException) { - throw (NoSuchObjectException) e; - } else { - throw newMetaException(e); - } + throwMetaException(e); } finally { endFunction("get_primary_keys", ret != null, ex, tbl_name); } @@ -7300,19 +7236,24 @@ public ForeignKeysResponse get_foreign_keys(ForeignKeysRequest request) throws M foreign_db_name, foreign_tbl_name); } catch (Exception e) { ex = e; - if (e instanceof MetaException) { - throw (MetaException) e; - } else if (e instanceof NoSuchObjectException) { - throw (NoSuchObjectException) e; - } else { - throw newMetaException(e); - } + throwMetaException(e); } finally { endFunction("get_foreign_keys", ret != null, ex, foreign_tbl_name); } return new ForeignKeysResponse(ret); } + private void throwMetaException(Exception e) throws MetaException, + NoSuchObjectException { + if (e instanceof MetaException) { + throw (MetaException) e; + } else if (e instanceof NoSuchObjectException) { + throw (NoSuchObjectException) e; + } else { + throw newMetaException(e); + } + } + @Override public UniqueConstraintsResponse get_unique_constraints(UniqueConstraintsRequest request) throws MetaException, NoSuchObjectException, TException { @@ -7884,7 +7825,7 @@ private static void startCompactorCleaner(HiveConf conf) throws Exception { } private static MetaStoreThread instantiateThread(String classname) throws Exception { - Class c = Class.forName(classname); + Class c = Class.forName(classname); Object o = c.newInstance(); if (MetaStoreThread.class.isAssignableFrom(o.getClass())) { return (MetaStoreThread)o; @@ -7905,6 +7846,7 @@ private static void initializeAndStartThread(MetaStoreThread thread, HiveConf co thread.init(new AtomicBoolean(), new AtomicBoolean()); thread.start(); } + private static void startHouseKeeperService(HiveConf conf) throws Exception { if(!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_INITIATOR_ON)) { return; @@ -7913,7 +7855,7 @@ private static void startHouseKeeperService(HiveConf conf) throws Exception { startHouseKeeperService(conf, Class.forName("org.apache.hadoop.hive.ql.txn.AcidCompactionHistoryService")); startHouseKeeperService(conf, Class.forName("org.apache.hadoop.hive.ql.txn.AcidWriteSetService")); } - private static void startHouseKeeperService(HiveConf conf, Class c) throws Exception { + private static void startHouseKeeperService(HiveConf conf, Class c) throws Exception { //todo: when metastore adds orderly-shutdown logic, houseKeeper.stop() //should be called form it HouseKeeperService houseKeeper = (HouseKeeperService)c.newInstance(); @@ -7940,4 +7882,4 @@ private static void startHouseKeeperService(HiveConf conf, Class c) throws Excep } return fmHandlers; } -} +} \ No newline at end of file diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 70451c4..37da2f8 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveConfUtil; import org.apache.hadoop.hive.metastore.api.*; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; @@ -98,9 +99,11 @@ * doesn't have (e.g. a getting a table of a new type), it will get back failures when the * capability checking is enabled (the default). */ - public final static ClientCapabilities VERSION = null; // No capabilities. + public final static ClientCapabilities VERSION = new ClientCapabilities( + Lists.newArrayList(ClientCapability.INSERT_ONLY_TABLES)); + // Test capability for tests. public final static ClientCapabilities TEST_VERSION = new ClientCapabilities( - Lists.newArrayList(ClientCapability.TEST_CAPABILITY)); // Test capability for tests. + Lists.newArrayList(ClientCapability.INSERT_ONLY_TABLES, ClientCapability.TEST_CAPABILITY)); ThriftHiveMetastore.Iface client = null; private TTransport transport = null; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreThread.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreThread.java index b62c45f..8cd3b31 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreThread.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreThread.java @@ -51,6 +51,7 @@ * thread should then assure that the loop has been gone completely through at * least once. */ + // TODO: move these test parameters to more specific places... there's no need to have them here void init(AtomicBoolean stop, AtomicBoolean looped) throws MetaException; /** diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index bbe13fd..49005b9 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -282,13 +282,16 @@ public static boolean updateTableStatsFast(Table tbl, FileStatus[] fileStatus, b public static void populateQuickStats(FileStatus[] fileStatus, Map params) { int numFiles = 0; long tableSize = 0L; + String s = "LOG14535 Populating quick stats for: "; for (FileStatus status : fileStatus) { + s += status.getPath() + ", "; // don't take directories into account for quick stats if (!status.isDir()) { tableSize += status.getLen(); numFiles += 1; } } + LOG.info(s/*, new Exception()*/); params.put(StatsSetupConst.NUM_FILES, Integer.toString(numFiles)); params.put(StatsSetupConst.TOTAL_SIZE, Long.toString(tableSize)); } @@ -1967,6 +1970,62 @@ public static MetaException newMetaException(String errorMessage, Exception e) { return cols; } + // TODO The following two utility methods can be moved to AcidUtils once no class in metastore is relying on them, + // right now ObjectStore.getAllMmTablesForCleanup is calling these method + /** + * Checks if a table is an ACID table that only supports INSERT, but not UPDATE/DELETE + * @param params table properties + * @return true if table is an INSERT_ONLY table, false otherwise + */ + // TODO# also check that transactional is true + public static boolean isInsertOnlyTable(Map params) { + return isInsertOnlyTable(params, false); + } + + public static boolean isInsertOnlyTable(Map params, boolean isCtas) { + String transactionalProp = params.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); + return (transactionalProp != null && "insert_only".equalsIgnoreCase(transactionalProp)); + } + + public static boolean isInsertOnlyTable(Properties params) { + // TODO# redirect for now - fix before merge + HashMap testMap = new HashMap(); + for (String n : params.stringPropertyNames()) { + testMap.put(n, params.getProperty(n)); + } + return isInsertOnlyTable(testMap); + } + + /** The method for altering table props; may set the table to MM, non-MM, or not affect MM. */ + public static Boolean isToInsertOnlyTable(Map props) { + // TODO# Setting these separately is a very hairy issue in certain combinations, since we + // cannot decide what type of table this becomes without taking both into account, and + // in many cases the conversion might be illegal. + // The only thing we allow is tx = true w/o tx-props, for backward compat. + String transactional = props.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); + String transactionalProp = props.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); + if (transactional == null && transactionalProp == null) return null; // Not affected. + boolean isSetToTxn = "true".equalsIgnoreCase(transactional); + if (transactionalProp == null) { + if (isSetToTxn) return false; // Assume the full ACID table. + throw new RuntimeException("Cannot change '" + hive_metastoreConstants.TABLE_IS_TRANSACTIONAL + + "' without '" + hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES + "'"); + } + if (!"insert_only".equalsIgnoreCase(transactionalProp)) return false; // Not MM. + if (!isSetToTxn) { + throw new RuntimeException("Cannot set '" + + hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES + "' to 'insert_only' without " + + "setting '" + hive_metastoreConstants.TABLE_IS_TRANSACTIONAL + "' to 'true'"); + } + return true; + } + + public static boolean isRemovedInsertOnlyTable(Set removedSet) { + boolean hasTxn = removedSet.contains(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL), + hasProps = removedSet.contains(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); + return hasTxn || hasProps; + } + // given a list of partStats, this function will give you an aggr stats public static List aggrPartitionStats(List partStats, String dbName, String tableName, List partNames, List colNames, diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index b01e59e..d2038c5 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -25,6 +25,7 @@ import java.net.InetAddress; import java.net.URI; import java.nio.ByteBuffer; +import java.sql.Connection; import java.sql.SQLException; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; @@ -58,6 +59,7 @@ import javax.jdo.Query; import javax.jdo.Transaction; import javax.jdo.datastore.DataStoreCache; +import javax.jdo.datastore.JDOConnection; import javax.jdo.identity.IntIdentity; import javax.sql.DataSource; @@ -240,6 +242,7 @@ private boolean isInitialized = false; private PersistenceManager pm = null; private MetaStoreDirectSql directSql = null; + private DatabaseProduct dbType = null; private PartitionExpressionProxy expressionProxy = null; private Configuration hiveConf; private volatile int openTrasactionCalls = 0; @@ -429,6 +432,7 @@ private void initializeHelper(Properties dsProps) { pm = getPersistenceManager(); isInitialized = pm != null; if (isInitialized) { + dbType = determineDatabaseProduct(); expressionProxy = createExpressionProxy(hiveConf); if (HiveConf.getBoolVar(getConf(), ConfVars.METASTORE_TRY_DIRECT_SQL)) { String schema = prop.getProperty("javax.jdo.mapping.Schema"); @@ -442,6 +446,27 @@ private void initializeHelper(Properties dsProps) { " created in the thread with id: " + Thread.currentThread().getId()); } + private DatabaseProduct determineDatabaseProduct() { + try { + return DatabaseProduct.determineDatabaseProduct(getProductName(pm)); + } catch (SQLException e) { + LOG.warn("Cannot determine database product; assuming OTHER", e); + return DatabaseProduct.OTHER; + } + } + + private static String getProductName(PersistenceManager pm) { + JDOConnection jdoConn = pm.getDataStoreConnection(); + try { + return ((Connection)jdoConn.getNativeConnection()).getMetaData().getDatabaseProductName(); + } catch (Throwable t) { + LOG.warn("Error retrieving product name", t); + return null; + } finally { + jdoConn.close(); // We must release the connection before we call other pm methods. + } + } + /** * Creates the proxy used to evaluate expressions. This is here to prevent circular * dependency - ql -> metastore client <-> metastore server -> ql. If server and @@ -668,7 +693,6 @@ public boolean commitTransaction() { transactionStatus = TXN_STATUS.COMMITED; currentTransaction.commit(); } - return true; } @@ -774,7 +798,7 @@ public Database getDatabase(String name) throws NoSuchObjectException { } public Database getDatabaseInternal(String name) throws MetaException, NoSuchObjectException { - return new GetDbHelper(name, null, true, true) { + return new GetDbHelper(name, true, true) { @Override protected Database getSqlResult(GetHelper ctx) throws MetaException { return directSql.getDatabase(dbName); @@ -1495,13 +1519,13 @@ private Table convertToTable(MTable mtbl) throws MetaException { tableType = TableType.MANAGED_TABLE.toString(); } } - final Table table = new Table(mtbl.getTableName(), mtbl.getDatabase().getName(), mtbl - .getOwner(), mtbl.getCreateTime(), mtbl.getLastAccessTime(), mtbl - .getRetention(), convertToStorageDescriptor(mtbl.getSd()), - convertToFieldSchemas(mtbl.getPartitionKeys()), convertMap(mtbl.getParameters()), - mtbl.getViewOriginalText(), mtbl.getViewExpandedText(), tableType); - table.setRewriteEnabled(mtbl.isRewriteEnabled()); - return table; + final Table t = new Table(mtbl.getTableName(), mtbl.getDatabase().getName(), mtbl + .getOwner(), mtbl.getCreateTime(), mtbl.getLastAccessTime(), mtbl + .getRetention(), convertToStorageDescriptor(mtbl.getSd()), + convertToFieldSchemas(mtbl.getPartitionKeys()), convertMap(mtbl.getParameters()), + mtbl.getViewOriginalText(), mtbl.getViewExpandedText(), tableType); + t.setRewriteEnabled(mtbl.isRewriteEnabled()); + return t; } private MTable convertToMTable(Table tbl) throws InvalidObjectException, @@ -2750,7 +2774,8 @@ public GetHelper(String dbName, String tblName, boolean allowSql, boolean allowJ boolean isConfigEnabled = HiveConf.getBoolVar(getConf(), ConfVars.METASTORE_TRY_DIRECT_SQL) && (HiveConf.getBoolVar(getConf(), ConfVars.METASTORE_TRY_DIRECT_SQL_DDL) || !isInTxn); if (isConfigEnabled && directSql == null) { - directSql = new MetaStoreDirectSql(pm, getConf()); + dbType = determineDatabaseProduct(); + directSql = new MetaStoreDirectSql(pm, getConf(), ""); } if (!allowJdo && isConfigEnabled && !directSql.isCompatibleDatastore()) { @@ -2791,7 +2816,7 @@ public T run(boolean initTable) throws MetaException, NoSuchObjectException { throw ex; } catch (Exception ex) { LOG.error("", ex); - throw MetaStoreUtils.newMetaException(ex); + throw new MetaException(ex.getMessage()); } finally { close(); } @@ -2821,7 +2846,7 @@ private void handleDirectSqlError(Exception ex) throws MetaException, NoSuchObje if (ex instanceof MetaException) { throw (MetaException)ex; } - throw MetaStoreUtils.newMetaException(ex); + throw new MetaException(ex.getMessage()); } if (!isInTxn) { JDOException rollbackEx = null; @@ -2922,15 +2947,13 @@ protected String describeResult() { public abstract class GetDbHelper extends GetHelper { /** * GetHelper for returning db info using directSql/JDO. - * Since this is a db-level call, tblName is ignored, and null is passed irrespective of what is passed in. * @param dbName The Database Name - * @param tblName Placeholder param to match signature, always ignored. * @param allowSql Whether or not we allow DirectSQL to perform this query. * @param allowJdo Whether or not we allow ORM to perform this query. * @throws MetaException */ public GetDbHelper( - String dbName, String tblName, boolean allowSql, boolean allowJdo) throws MetaException { + String dbName,boolean allowSql, boolean allowJdo) throws MetaException { super(dbName,null,allowSql,allowJdo); } @@ -3351,8 +3374,12 @@ public void alterPartition(String dbname, String name, List part_vals, P } finally { if (!success) { rollbackTransaction(); - throw MetaStoreUtils.newMetaException( - "The transaction for alter partition did not commit successfully.", e); + MetaException metaException = new MetaException( + "The transaction for alter partition did not commit successfully."); + if (e != null) { + metaException.initCause(e); + } + throw metaException; } } } @@ -3376,8 +3403,12 @@ public void alterPartitions(String dbname, String name, List> part_ } finally { if (!success) { rollbackTransaction(); - throw MetaStoreUtils.newMetaException( - "The transaction for alter partition did not commit successfully.", e); + MetaException metaException = new MetaException( + "The transaction for alter partition did not commit successfully."); + if (e != null) { + metaException.initCause(e); + } + throw metaException; } } } @@ -7033,10 +7064,8 @@ private void writeMPartitionColumnStatistics(Table table, Partition partition, try { List stats = getMTableColumnStatistics(table, colNames, queryWrapper); - if (stats != null) { - for(MTableColumnStatistics cStat : stats) { - statsMap.put(cStat.getColName(), cStat); - } + for(MTableColumnStatistics cStat : stats) { + statsMap.put(cStat.getColName(), cStat); } } finally { queryWrapper.close(); @@ -7199,7 +7228,7 @@ public boolean updatePartitionColumnStatistics(ColumnStatistics colStats, List mStats = getMTableColumnStatistics(getTable(), colNames, queryWrapper); - if (mStats == null || mStats.isEmpty()) return null; + if (mStats.isEmpty()) return null; // LastAnalyzed is stored per column, but thrift object has it per multiple columns. // Luckily, nobody actually uses it, so we will set to lowest value of all columns for now. ColumnStatisticsDesc desc = StatObjectConverter.getTableColumnStatisticsDesc(mStats.get(0)); @@ -7440,7 +7469,7 @@ public void flushCache() { if (ex instanceof MetaException) { throw (MetaException) ex; } - throw MetaStoreUtils.newMetaException(ex); + throw new MetaException(ex.getMessage()); } finally { if (!committed) { rollbackTransaction(); @@ -7919,7 +7948,7 @@ private MVersionTable getMSchemaVersion() throws NoSuchObjectException, MetaExce throw new MetaException("Version table not found. " + "The metastore is not upgraded to " + MetaStoreSchemaInfoFactory.get(getConf()).getHiveSchemaVersion()); } else { - throw MetaStoreUtils.newMetaException(e); + throw e; } } committed = commitTransaction(); @@ -8901,7 +8930,7 @@ public void dropConstraint(String dbName, String tableName, @VisibleForTesting void rollbackAndCleanup(boolean success, Query query) { try { - if(!success) { + if (!success) { rollbackTransaction(); } } finally { @@ -8921,7 +8950,7 @@ void rollbackAndCleanup(boolean success, Query query) { @VisibleForTesting void rollbackAndCleanup(boolean success, QueryWrapper queryWrapper) { try { - if(!success) { + if (!success) { rollbackTransaction(); } } finally { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java index 71982a0..135bde7 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -28,7 +28,6 @@ import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.hive.common.classification.InterfaceStability; -import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java b/metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java index 3a3d184..dabede4 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -39,6 +39,7 @@ // These constants are also imported by org.apache.hadoop.hive.ql.io.AcidUtils. public static final String DEFAULT_TRANSACTIONAL_PROPERTY = "default"; + public static final String INSERTONLY_TRANSACTIONAL_PROPERTY = "insert_only"; TransactionalValidationListener(Configuration conf) { super(conf); @@ -117,7 +118,10 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw if ("true".equalsIgnoreCase(transactionalValue) && !"true".equalsIgnoreCase(oldTransactionalValue)) { //only need to check conformance if alter table enabled aicd if (!conformToAcid(newTable)) { - throw new MetaException("The table must be stored using an ACID compliant format (such as ORC)"); + // INSERT_ONLY tables don't have to conform to ACID requirement like ORC or bucketing + if (transactionalPropertiesValue == null || !"insert_only".equalsIgnoreCase(transactionalPropertiesValue)) { + throw new MetaException("The table must be stored using an ACID compliant format (such as ORC)"); + } } if (newTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) { @@ -135,7 +139,7 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw hasValidTransactionalValue = true; } - if (!hasValidTransactionalValue) { + if (!hasValidTransactionalValue && !MetaStoreUtils.isInsertOnlyTable(oldTable.getParameters())) { // if here, there is attempt to set transactional to something other than 'true' // and NOT the same value it was before throw new MetaException("TBLPROPERTIES with 'transactional'='true' cannot be unset"); @@ -152,8 +156,9 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw // 'transactional_properties' must match the old value. Any attempt to alter the previous // value will throw an error. An exception will still be thrown if the previous value was // null and an attempt is made to set it. This behaviour can be changed in the future. - if (oldTransactionalPropertiesValue == null - || !oldTransactionalPropertiesValue.equalsIgnoreCase(transactionalPropertiesValue) ) { + if ((oldTransactionalPropertiesValue == null + || !oldTransactionalPropertiesValue.equalsIgnoreCase(transactionalPropertiesValue)) + && !MetaStoreUtils.isInsertOnlyTable(oldTable.getParameters())) { throw new MetaException("TBLPROPERTIES with 'transactional_properties' cannot be " + "altered after the table is created"); } @@ -172,31 +177,39 @@ private void handleCreateTableTransactionalProp(PreCreateTableEvent context) thr if (parameters == null || parameters.isEmpty()) { return; } - String transactionalValue = null; - boolean transactionalPropFound = false; + String transactional = null; + String transactionalProperties = null; Set keys = new HashSet<>(parameters.keySet()); for(String key : keys) { - if(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.equalsIgnoreCase(key)) { - transactionalPropFound = true; - transactionalValue = parameters.get(key); + // Get the "transactional" tblproperties value + if (hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.equalsIgnoreCase(key)) { + transactional = parameters.get(key); parameters.remove(key); } + + // Get the "transactional_properties" tblproperties value + if (hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES.equalsIgnoreCase(key)) { + transactionalProperties = parameters.get(key); + } } - if (!transactionalPropFound) { + if (transactional == null) { return; } - if ("false".equalsIgnoreCase(transactionalValue)) { + if ("false".equalsIgnoreCase(transactional)) { // just drop transactional=false. For backward compatibility in case someone has scripts // with transactional=false LOG.info("'transactional'='false' is no longer a valid property and will be ignored"); return; } - if ("true".equalsIgnoreCase(transactionalValue)) { + if ("true".equalsIgnoreCase(transactional)) { if (!conformToAcid(newTable)) { - throw new MetaException("The table must be stored using an ACID compliant format (such as ORC)"); + // INSERT_ONLY tables don't have to conform to ACID requirement like ORC or bucketing + if (transactionalProperties == null || !"insert_only".equalsIgnoreCase(transactionalProperties)) { + throw new MetaException("The table must be stored using an ACID compliant format (such as ORC)"); + } } if (newTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) { @@ -210,7 +223,7 @@ private void handleCreateTableTransactionalProp(PreCreateTableEvent context) thr return; } - // transactional prop is found, but the value is not in expected range + // transactional is found, but the value is not in expected range throw new MetaException("'transactional' property of TBLPROPERTIES may only have value 'true'"); } @@ -273,6 +286,7 @@ private String validateTransactionalProperties(String transactionalProperties) { boolean isValid = false; switch (transactionalProperties) { case DEFAULT_TRANSACTIONAL_PROPERTY: + case INSERTONLY_TRANSACTIONAL_PROPERTY: isValid = true; break; default: diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java b/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java index 1dd50de..c105c6c 100755 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -403,11 +404,12 @@ public static String makeDynamicPartName(Map spec) { throw new MetaException("Partition name is invalid. " + name); } LinkedHashMap partSpec = new LinkedHashMap(); - makeSpecFromName(partSpec, new Path(name)); + makeSpecFromName(partSpec, new Path(name), null); return partSpec; } - public static void makeSpecFromName(Map partSpec, Path currPath) { + public static boolean makeSpecFromName(Map partSpec, Path currPath, + Set requiredKeys) { List kvs = new ArrayList(); do { String component = currPath.getName(); @@ -425,8 +427,15 @@ public static void makeSpecFromName(Map partSpec, Path currPath) // reverse the list since we checked the part from leaf dir to table's base dir for (int i = kvs.size(); i > 0; i--) { - partSpec.put(kvs.get(i - 1)[0], kvs.get(i - 1)[1]); + String key = kvs.get(i - 1)[0]; + if (requiredKeys != null) { + requiredKeys.remove(key); + } + partSpec.put(key, kvs.get(i - 1)[1]); } + if (requiredKeys == null || requiredKeys.isEmpty()) return true; + LOG.warn("Cannot create partition spec from " + currPath + "; missing keys " + requiredKeys); + return false; } public static Map makeEscSpecFromName(String name) throws MetaException { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 3ba81ce..407f0f4 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -19,9 +19,14 @@ import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.ArrayList; +import java.util.HashMap; import java.util.HashMap; import java.util.LinkedList; +import java.util.LinkedList; import java.util.List; +import java.util.List; +import java.util.Map; import java.util.Map; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index 4db203d..8249e7d 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -26,7 +26,6 @@ import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index fb16cfc..0f7827b 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -27,7 +27,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -109,7 +108,6 @@ public boolean openTransaction() { @Override public boolean commitTransaction() { - return false; } @@ -120,8 +118,6 @@ public boolean isActiveTransaction() { @Override public void rollbackTransaction() { - - } @Override diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java index 5448d0d..75eba79 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -59,8 +59,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; - import javax.jdo.Query; public class TestObjectStore { @@ -77,6 +77,15 @@ private static final String ROLE2 = "testobjectstorerole2"; private static final Logger LOG = LoggerFactory.getLogger(TestObjectStore.class.getName()); + private static final class LongSupplier implements Supplier { + public long value = 0; + + @Override + public Long get() { + return value; + } + } + public static class MockPartitionExpressionProxy implements PartitionExpressionProxy { @Override public String convertExprToFilter(byte[] expr) throws MetaException { @@ -265,6 +274,12 @@ public void testTableOps() throws MetaException, InvalidObjectException, NoSuchO objectStore.dropDatabase(DB1); } + private StorageDescriptor createFakeSd(String location) { + return new StorageDescriptor(null, location, null, null, false, 0, + new SerDeInfo("SerDeName", "serializationLib", null), null, null, null); + } + + /** * Tests partition operations */ @@ -272,7 +287,7 @@ public void testTableOps() throws MetaException, InvalidObjectException, NoSuchO public void testPartitionOps() throws MetaException, InvalidObjectException, NoSuchObjectException, InvalidInputException { Database db1 = new Database(DB1, "description", "locationurl", null); objectStore.createDatabase(db1); - StorageDescriptor sd = new StorageDescriptor(null, "location", null, null, false, 0, new SerDeInfo("SerDeName", "serializationLib", null), null, null, null); + StorageDescriptor sd = createFakeSd("location"); HashMap tableParams = new HashMap(); tableParams.put("EXTERNAL", "false"); FieldSchema partitionKey1 = new FieldSchema("Country", ColumnType.STRING_TYPE_NAME, ""); @@ -366,7 +381,7 @@ public void testDirectSqlErrorMetrics() throws Exception { Counter directSqlErrors = Metrics.getRegistry().getCounters().get(MetricsConstants.DIRECTSQL_ERRORS); - objectStore.new GetDbHelper("foo", null, true, true) { + objectStore.new GetDbHelper("foo", true, true) { @Override protected Database getSqlResult(ObjectStore.GetHelper ctx) throws MetaException { return null; @@ -381,7 +396,7 @@ protected Database getJdoResult(ObjectStore.GetHelper ctx) throws Meta Assert.assertEquals(0, directSqlErrors.getCount()); - objectStore.new GetDbHelper("foo", null, true, true) { + objectStore.new GetDbHelper("foo", true, true) { @Override protected Database getSqlResult(ObjectStore.GetHelper ctx) throws MetaException { throw new RuntimeException(); @@ -459,4 +474,4 @@ public void testQueryCloseOnError() throws Exception { Mockito.verify(spy, Mockito.times(3)) .rollbackAndCleanup(Mockito.anyBoolean(), Mockito.anyObject()); } -} +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 9183edf..845e391 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -349,7 +349,8 @@ private Path getStagingDir(Path inputPath, boolean mkdir) { // Append task specific info to stagingPathName, instead of creating a sub-directory. // This way we don't have to worry about deleting the stagingPathName separately at // end of query execution. - dir = fs.makeQualified(new Path(stagingPathName + "_" + this.executionId + "-" + TaskRunner.getTaskRunnerID())); + dir = fs.makeQualified(new Path( + stagingPathName + "_" + this.executionId + "-" + TaskRunner.getTaskRunnerID())); LOG.debug("Created staging dir = " + dir + " for path = " + inputPath); @@ -960,7 +961,7 @@ public void setExplainConfig(ExplainConfiguration explainConfig) { this.explainConfig = explainConfig; } - public void resetOpContext(){ + public void resetOpContext() { opContext = new CompilationOpContext(); sequencer = new AtomicInteger(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 4e7c80f..1157e00 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -39,7 +39,6 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang.StringUtils; - import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ValidReadTxnList; @@ -817,7 +816,7 @@ public static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, S } // The following union operation returns a union, which traverses over the - // first set once and then then over each element of second set, in order, + // first set once and then then over each element of second set, in order, // that is not contained in first. This means it doesn't replace anything // in first set, and would preserve the WriteType in WriteEntity in first // set in case of outputs list. @@ -1206,7 +1205,7 @@ private int acquireLocks() { desc.setStatementId(txnMgr.getWriteIdAndIncrement()); } } - /*It's imperative that {@code acquireLocks()} is called for all commands so that + /*It's imperative that {@code acquireLocks()} is called for all commands so that HiveTxnManager can transition its state machine correctly*/ txnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState); if(txnMgr.recordSnapshot(plan)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java index dfad6c1..70e764e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -34,6 +34,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.Lists; + /** * Fast file merge operator for ORC and RCfile. This is an abstract class which * does not process any rows. Refer {@link org.apache.hadoop.hive.ql.exec.OrcFileMergeOperator} @@ -47,20 +49,21 @@ protected JobConf jc; protected FileSystem fs; - protected boolean autoDelete; - protected boolean exception; - protected Path outPath; - protected Path finalPath; - protected Path dpPath; - protected Path tmpPath; - protected Path taskTmpPath; - protected int listBucketingDepth; - protected boolean hasDynamicPartitions; - protected boolean isListBucketingAlterTableConcatenate; - protected boolean tmpPathFixedConcatenate; - protected boolean tmpPathFixed; - protected Set incompatFileSet; - protected transient DynamicPartitionCtx dpCtx; + private boolean autoDelete; + private Path outPath; // The output path used by the subclasses. + private Path finalPath; // Used as a final destination; same as outPath for MM tables. + private Path dpPath; + private Path tmpPath; // Only stored to update based on the original in fixTmpPath. + private Path taskTmpPath; // Only stored to update based on the original in fixTmpPath. + private int listBucketingDepth; + private boolean hasDynamicPartitions; + private boolean isListBucketingAlterTableConcatenate; + private boolean tmpPathFixedConcatenate; + private boolean tmpPathFixed; + private Set incompatFileSet; + private transient DynamicPartitionCtx dpCtx; + private boolean isMmTable; + private String taskId; /** Kryo ctor. */ protected AbstractFileMergeOperator() { @@ -77,39 +80,50 @@ public void initializeOp(Configuration hconf) throws HiveException { this.jc = new JobConf(hconf); incompatFileSet = new HashSet(); autoDelete = false; - exception = false; tmpPathFixed = false; tmpPathFixedConcatenate = false; - outPath = null; - finalPath = null; dpPath = null; - tmpPath = null; - taskTmpPath = null; dpCtx = conf.getDpCtx(); hasDynamicPartitions = conf.hasDynamicPartitions(); isListBucketingAlterTableConcatenate = conf .isListBucketingAlterTableConcatenate(); listBucketingDepth = conf.getListBucketingDepth(); Path specPath = conf.getOutputPath(); - updatePaths(Utilities.toTempPath(specPath), - Utilities.toTaskTempPath(specPath)); + isMmTable = conf.getIsMmTable(); + if (isMmTable) { + updatePaths(specPath, null); + } else { + updatePaths(Utilities.toTempPath(specPath), Utilities.toTaskTempPath(specPath)); + } try { fs = specPath.getFileSystem(hconf); - autoDelete = fs.deleteOnExit(outPath); + if (!isMmTable) { + // Do not delete for MM tables. We either want the file if we succeed, or we must + // delete is explicitly before proceeding if the merge fails. + autoDelete = fs.deleteOnExit(outPath); + } } catch (IOException e) { - this.exception = true; - throw new HiveException("Failed to initialize AbstractFileMergeOperator", - e); + throw new HiveException("Failed to initialize AbstractFileMergeOperator", e); } } // sets up temp and task temp path private void updatePaths(Path tp, Path ttp) { - String taskId = Utilities.getTaskId(jc); + if (taskId == null) { + taskId = Utilities.getTaskId(jc); + } tmpPath = tp; - taskTmpPath = ttp; - finalPath = new Path(tp, taskId); - outPath = new Path(ttp, Utilities.toTempPath(taskId)); + if (isMmTable) { + taskTmpPath = null; + // Make sure we don't collide with the source. + outPath = finalPath = new Path(tmpPath, taskId + ".merged"); + } else { + taskTmpPath = ttp; + finalPath = new Path(tp, taskId); + outPath = new Path(ttp, Utilities.toTempPath(taskId)); + } + Utilities.LOG14535.info("Paths for merge " + taskId + ": tmp " + tmpPath + ", task " + + taskTmpPath + ", final " + finalPath + ", out " + outPath); } /** @@ -142,7 +156,7 @@ private void updatePaths(Path tp, Path ttp) { protected void fixTmpPath(Path inputPath, int depthDiff) throws IOException { // don't need to update tmp paths when there is no depth difference in paths - if (depthDiff <=0) { + if (depthDiff <= 0) { return; } @@ -157,10 +171,12 @@ protected void fixTmpPath(Path inputPath, int depthDiff) throws IOException { } Path newTmpPath = new Path(tmpPath, newPath); - Path newTaskTmpPath = new Path(taskTmpPath, newPath); if (!fs.exists(newTmpPath)) { + Utilities.LOG14535.info("Creating " + newTmpPath); fs.mkdirs(newTmpPath); } + + Path newTaskTmpPath = (taskTmpPath != null) ? new Path(taskTmpPath, newPath) : null; updatePaths(newTmpPath, newTaskTmpPath); } @@ -182,7 +198,7 @@ protected void checkPartitionsMatch(Path inputPath) throws IOException { } protected void fixTmpPath(Path path) throws IOException { - + Utilities.LOG14535.info("Calling fixTmpPath with " + path); // Fix temp path for alter table ... concatenate if (isListBucketingAlterTableConcatenate) { if (this.tmpPathFixedConcatenate) { @@ -208,38 +224,49 @@ protected void fixTmpPath(Path path) throws IOException { @Override public void closeOp(boolean abort) throws HiveException { try { - if (!abort) { - // if outPath does not exist, then it means all paths within combine split are skipped as - // they are incompatible for merge (for example: files without stripe stats). - // Those files will be added to incompatFileSet - if (fs.exists(outPath)) { - FileStatus fss = fs.getFileStatus(outPath); + if (abort) { + if (!autoDelete || isMmTable) { + fs.delete(outPath, true); + } + return; + } + // if outPath does not exist, then it means all paths within combine split are skipped as + // they are incompatible for merge (for example: files without stripe stats). + // Those files will be added to incompatFileSet + if (fs.exists(outPath)) { + FileStatus fss = fs.getFileStatus(outPath); + if (!isMmTable) { if (!fs.rename(outPath, finalPath)) { - throw new IOException( - "Unable to rename " + outPath + " to " + finalPath); + throw new IOException("Unable to rename " + outPath + " to " + finalPath); } - LOG.info("renamed path " + outPath + " to " + finalPath + " . File" + - " size is " - + fss.getLen()); + LOG.info("Renamed path " + outPath + " to " + finalPath + + "(" + fss.getLen() + " bytes)."); + } else { + assert finalPath.equals(outPath); + // There's always just one file that we have merged. + // The union/DP/etc. should already be account for in the path. + Utilities.writeMmCommitManifest(Lists.newArrayList(outPath), + tmpPath.getParent(), fs, taskId, conf.getTxnId(), conf.getStmtId(), null); + LOG.info("Merged into " + finalPath + "(" + fss.getLen() + " bytes)."); } + } - // move any incompatible files to final path - if (incompatFileSet != null && !incompatFileSet.isEmpty()) { - for (Path incompatFile : incompatFileSet) { - Path destDir = finalPath.getParent(); - try { - Utilities.renameOrMoveFiles(fs, incompatFile, destDir); - LOG.info("Moved incompatible file " + incompatFile + " to " + - destDir); - } catch (HiveException e) { - LOG.error("Unable to move " + incompatFile + " to " + destDir); - throw new IOException(e); - } - } + // move any incompatible files to final path + if (incompatFileSet != null && !incompatFileSet.isEmpty()) { + if (isMmTable) { + // We only support query-time merge for MM tables, so don't handle this. + throw new HiveException("Incompatible files should not happen in MM tables."); } - } else { - if (!autoDelete) { - fs.delete(outPath, true); + for (Path incompatFile : incompatFileSet) { + Path destDir = finalPath.getParent(); + try { + Utilities.renameOrMoveFiles(fs, incompatFile, destDir); + LOG.info("Moved incompatible file " + incompatFile + " to " + + destDir); + } catch (HiveException e) { + LOG.error("Unable to move " + incompatFile + " to " + destDir); + throw new IOException(e); + } } } } catch (IOException e) { @@ -253,16 +280,27 @@ public void jobCloseOp(Configuration hconf, boolean success) try { Path outputDir = conf.getOutputPath(); FileSystem fs = outputDir.getFileSystem(hconf); - Path backupPath = backupOutputPath(fs, outputDir); - Utilities - .mvFileToFinalPath(outputDir, hconf, success, LOG, conf.getDpCtx(), - null, reporter); - if (success) { - LOG.info("jobCloseOp moved merged files to output dir: " + outputDir); - } - if (backupPath != null) { - fs.delete(backupPath, true); + Long mmWriteId = conf.getTxnId(); + int stmtId = conf.getStmtId(); + if (!isMmTable) { + Path backupPath = backupOutputPath(fs, outputDir); + Utilities.mvFileToFinalPath( + outputDir, hconf, success, LOG, conf.getDpCtx(), null, reporter); + if (success) { + LOG.info("jobCloseOp moved merged files to output dir: " + outputDir); + } + if (backupPath != null) { + fs.delete(backupPath, true); + } + } else { + int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(), + lbLevels = conf.getListBucketingDepth(); + // We don't expect missing buckets from mere (actually there should be no buckets), + // so just pass null as bucketing context. Union suffix should also be accounted for. + Utilities.handleMmTableFinalPath(outputDir.getParent(), null, hconf, success, + dpLevels, lbLevels, null, mmWriteId, stmtId, reporter, isMmTable, false); } + } catch (IOException e) { throw new HiveException("Failed jobCloseOp for AbstractFileMergeOperator", e); @@ -290,4 +328,12 @@ public String getName() { public static String getOperatorName() { return "MERGE"; } + + protected final Path getOutPath() { + return outPath; + } + + protected final void addIncompatibleFile(Path path) { + incompatFileSet.add(path); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java index 2683f29..1f223f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java @@ -18,17 +18,20 @@ package org.apache.hadoop.hive.ql.exec; +import java.io.IOException; import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hive.common.JavaUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.DriverContext; -import org.apache.hadoop.hive.ql.parse.LoadSemanticAnalyzer; import org.apache.hadoop.hive.ql.plan.CopyWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.util.StringUtils; @@ -37,7 +40,6 @@ * CopyTask implementation. **/ public class CopyTask extends Task implements Serializable { - private static final long serialVersionUID = 1L; private static transient final Logger LOG = LoggerFactory.getLogger(CopyTask.class); @@ -48,19 +50,24 @@ public CopyTask() { @Override public int execute(DriverContext driverContext) { + Path[] from = work.getFromPaths(), to = work.getToPaths(); + for (int i = 0; i < from.length; ++i) { + int result = copyOnePath(from[i], to[i]); + if (result != 0) return result; + } + return 0; + } + + protected int copyOnePath(Path fromPath, Path toPath) { FileSystem dstFs = null; - Path toPath = null; try { - Path fromPath = work.getFromPath(); - toPath = work.getToPath(); - console.printInfo("Copying data from " + fromPath.toString(), " to " + toPath.toString()); FileSystem srcFs = fromPath.getFileSystem(conf); dstFs = toPath.getFileSystem(conf); - FileStatus[] srcs = LoadSemanticAnalyzer.matchFilesOrDir(srcFs, fromPath); + FileStatus[] srcs = matchFilesOrDir(srcFs, fromPath, work.doSkipSourceMmDirs()); if (srcs == null || srcs.length == 0) { if (work.isErrorOnSrcEmpty()) { console.printError("No files matching path: " + fromPath.toString()); @@ -96,6 +103,46 @@ public int execute(DriverContext driverContext) { } } + // Note: initially copied from LoadSemanticAnalyzer. + private static FileStatus[] matchFilesOrDir( + FileSystem fs, Path path, boolean isSourceMm) throws IOException { + if (!fs.exists(path)) return null; + if (!isSourceMm) return matchFilesOneDir(fs, path, null); + // TODO: this doesn't handle list bucketing properly. Does the original exim do that? + FileStatus[] mmDirs = fs.listStatus(path, new JavaUtils.AnyIdDirFilter()); + if (mmDirs == null || mmDirs.length == 0) return null; + List allFiles = new ArrayList(); + for (FileStatus mmDir : mmDirs) { + Utilities.LOG14535.info("Found source MM directory " + mmDir.getPath()); + matchFilesOneDir(fs, mmDir.getPath(), allFiles); + } + return allFiles.toArray(new FileStatus[allFiles.size()]); + } + + private static FileStatus[] matchFilesOneDir( + FileSystem fs, Path path, List result) throws IOException { + FileStatus[] srcs = fs.globStatus(path, new EximPathFilter()); + if (srcs != null && srcs.length == 1) { + if (srcs[0].isDirectory()) { + srcs = fs.listStatus(srcs[0].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); + } + } + if (result != null && srcs != null) { + for (int i = 0; i < srcs.length; ++i) { + result.add(srcs[i]); + } + } + return srcs; + } + + private static final class EximPathFilter implements PathFilter { + @Override + public boolean accept(Path p) { + String name = p.getName(); + return name.equals("_metadata") ? true : !name.startsWith("_") && !name.startsWith("."); + } + } + @Override public StageType getType() { return StageType.COPY; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 646bb23..865d65c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -57,7 +57,10 @@ import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; @@ -179,8 +182,10 @@ import org.apache.hadoop.hive.ql.plan.GrantRevokeRoleDDL; import org.apache.hadoop.hive.ql.plan.InsertTableDesc; import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; +import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc; import org.apache.hadoop.hive.ql.plan.LockDatabaseDesc; import org.apache.hadoop.hive.ql.plan.LockTableDesc; +import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.MsckDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc; @@ -256,6 +261,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; /** * DDLTask implementation. @@ -714,7 +720,8 @@ private int mergeFiles(Hive db, AlterTablePartMergeFilesDesc mergeFilesDesc, // merge work only needs input and output. MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(), - mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName()); + mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName(), + mergeFilesDesc.getTableDesc()); LinkedHashMap> pathToAliases = new LinkedHashMap<>(); ArrayList inputDirstr = new ArrayList(1); inputDirstr.add(mergeFilesDesc.getInputDir().toString()); @@ -1899,7 +1906,7 @@ private void checkArchiveProperty(int partSpecLevel, private int compact(Hive db, AlterTableSimpleDesc desc) throws HiveException { Table tbl = db.getTable(desc.getTableName()); - if (!AcidUtils.isAcidTable(tbl)) { + if (!AcidUtils.isFullAcidTable(tbl) && !MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())) { throw new HiveException(ErrorMsg.NONACID_COMPACTION_NOT_SUPPORTED, tbl.getDbName(), tbl.getTableName()); } @@ -3638,14 +3645,16 @@ private int alterTable(Hive db, AlterTableDesc alterTbl) throws HiveException { // Don't change the table object returned by the metastore, as we'll mess with it's caches. Table oldTbl = tbl; tbl = oldTbl.copy(); + // Handle child tasks here. We could add them directly whereever we need, + // but let's make it a little bit more explicit. if (allPartitions != null) { // Alter all partitions for (Partition part : allPartitions) { - alterTableOrSinglePartition(alterTbl, tbl, part); + addChildTasks(alterTableOrSinglePartition(alterTbl, tbl, part)); } } else { // Just alter the table - alterTableOrSinglePartition(alterTbl, tbl, null); + addChildTasks(alterTableOrSinglePartition(alterTbl, tbl, null)); } if (allPartitions == null) { @@ -3720,6 +3729,13 @@ private boolean addIfAbsentByName(WriteEntity newWriteEntity) { return addIfAbsentByName(newWriteEntity, work.getOutputs()); } + private void addChildTasks(List> extraTasks) { + if (extraTasks == null) return; + for (Task newTask : extraTasks) { + addDependentTask(newTask); + } + } + private boolean isSchemaEvolutionEnabled(Table tbl) { boolean isAcid = AcidUtils.isTablePropertyTransactional(tbl.getMetadata()); if (isAcid || HiveConf.getBoolVar(conf, ConfVars.HIVE_SCHEMA_EVOLUTION)) { @@ -3728,12 +3744,13 @@ private boolean isSchemaEvolutionEnabled(Table tbl) { return false; } + private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition part) { return (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); } - private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Partition part) - throws HiveException { + private List> alterTableOrSinglePartition( + AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException { EnvironmentContext environmentContext = alterTbl.getEnvironmentContext(); if (environmentContext == null) { environmentContext = new EnvironmentContext(); @@ -3895,29 +3912,9 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } sd.setCols(alterTbl.getNewCols()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) { - if (StatsSetupConst.USER.equals(environmentContext.getProperties() - .get(StatsSetupConst.STATS_GENERATED))) { - environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); - } - if (part != null) { - part.getTPartition().getParameters().putAll(alterTbl.getProps()); - } else { - tbl.getTTable().getParameters().putAll(alterTbl.getProps()); - } + return alterTableAddProps(alterTbl, tbl, part, environmentContext); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.DROPPROPS) { - Iterator keyItr = alterTbl.getProps().keySet().iterator(); - if (StatsSetupConst.USER.equals(environmentContext.getProperties() - .get(StatsSetupConst.STATS_GENERATED))) { - // drop a stats parameter, which triggers recompute stats update automatically - environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); - } - while (keyItr.hasNext()) { - if (part != null) { - part.getTPartition().getParameters().remove(keyItr.next()); - } else { - tbl.getTTable().getParameters().remove(keyItr.next()); - } - } + return alterTableDropProps(alterTbl, tbl, part, environmentContext); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) { StorageDescriptor sd = retrieveStorageDescriptor(tbl, part); sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps()); @@ -4056,12 +4053,12 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part } else if (alterTbl.getOp() == AlterTableTypes.ALTERBUCKETNUM) { if (part != null) { if (part.getBucketCount() == alterTbl.getNumberBuckets()) { - return 0; + return null; } part.setBucketCount(alterTbl.getNumberBuckets()); } else { if (tbl.getNumBuckets() == alterTbl.getNumberBuckets()) { - return 0; + return null; } tbl.setNumBuckets(alterTbl.getNumberBuckets()); } @@ -4069,7 +4066,215 @@ private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Part throw new HiveException(ErrorMsg.UNSUPPORTED_ALTER_TBL_OP, alterTbl.getOp().toString()); } - return 0; + return null; + } + + private List> alterTableDropProps(AlterTableDesc alterTbl, Table tbl, + Partition part, EnvironmentContext environmentContext) throws HiveException { + if (StatsSetupConst.USER.equals(environmentContext.getProperties() + .get(StatsSetupConst.STATS_GENERATED))) { + // drop a stats parameter, which triggers recompute stats update automatically + environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); + } + + List> result = null; + if (part == null) { + Set removedSet = alterTbl.getProps().keySet(); + boolean isFromMmTable = MetaStoreUtils.isInsertOnlyTable(tbl.getParameters()), + isRemoved = MetaStoreUtils.isRemovedInsertOnlyTable(removedSet); + if (isFromMmTable && isRemoved) { + result = generateRemoveMmTasks(tbl); + } + } + Iterator keyItr = alterTbl.getProps().keySet().iterator(); + while (keyItr.hasNext()) { + if (part != null) { + part.getTPartition().getParameters().remove(keyItr.next()); + } else { + tbl.getTTable().getParameters().remove(keyItr.next()); + } + } + return result; + } + + private List> generateRemoveMmTasks(Table tbl) throws HiveException { + // To avoid confusion from nested MM directories when table is converted back and forth, we + // want to rename mm_ dirs to remove the prefix; however, given the unpredictable nested + // directory handling in Hive/MR, we will instead move all the files into the root directory. + // We will also delete any directories that are not committed. + // Note that this relies on locks. Note also that we only do the renames AFTER the metastore + // operation commits. Deleting uncommitted things is safe, but moving stuff before we convert + // could cause data loss. + List allMmDirs = new ArrayList<>(); + if (tbl.isStoredAsSubDirectories()) { + // TODO: support this? we only bail because it's a PITA and hardly anyone seems to care. + throw new HiveException("Converting list bucketed tables stored as subdirectories " + + " to and from MM is not supported"); + } + List bucketCols = tbl.getBucketCols(); + if (bucketCols != null && !bucketCols.isEmpty() + && HiveConf.getBoolVar(conf, ConfVars.HIVE_STRICT_CHECKS_BUCKETING)) { + throw new HiveException("Converting bucketed tables from MM is not supported by default; " + + "copying files from multiple MM directories may potentially break the buckets. You " + + "can set " + ConfVars.HIVE_STRICT_CHECKS_BUCKETING.varname + + " to false for this query if you want to force the conversion."); + } + Hive db = getHive(); + String value = conf.get(ValidTxnList.VALID_TXNS_KEY); + ValidTxnList validTxnList = value == null ? new ValidReadTxnList() : new ValidReadTxnList(value); + if (tbl.getPartitionKeys().size() > 0) { + PartitionIterable parts = new PartitionIterable(db, tbl, null, + HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); + Iterator partIter = parts.iterator(); + while (partIter.hasNext()) { + Partition part = partIter.next(); + checkMmLb(part); + handleRemoveMm(part.getDataLocation(), validTxnList, allMmDirs); + } + } else { + checkMmLb(tbl); + handleRemoveMm(tbl.getDataLocation(), validTxnList, allMmDirs); + } + List targetPaths = new ArrayList<>(allMmDirs.size()); + List targetPrefix = new ArrayList<>(allMmDirs.size()); + int prefixLen = JavaUtils.DELTA_PREFIX.length(); + for (int i = 0; i < allMmDirs.size(); ++i) { + Path src = allMmDirs.get(i); + Path tgt = src.getParent(); + String prefix = src.getName().substring(prefixLen + 1) + "_"; + Utilities.LOG14535.info("Will move " + src + " to " + tgt + " (prefix " + prefix + ")"); + targetPaths.add(tgt); + targetPrefix.add(prefix); + } + // Don't set inputs and outputs - the locks have already been taken so it's pointless. + MoveWork mw = new MoveWork(null, null, null, null, false); + mw.setMultiFilesDesc(new LoadMultiFilesDesc( + allMmDirs, targetPaths, targetPrefix, true, null, null)); + return Lists.>newArrayList(TaskFactory.get(mw, conf)); + } + + private void checkMmLb(Table tbl) throws HiveException { + if (!tbl.isStoredAsSubDirectories()) return; + // TODO: support this? + throw new HiveException("Converting list bucketed tables stored as subdirectories " + + " to and from MM is not supported"); + } + + private void checkMmLb(Partition part) throws HiveException { + if (!part.isStoredAsSubDirectories()) return; + // TODO: support this? + throw new HiveException("Converting list bucketed tables stored as subdirectories " + + " to and from MM is not supported. Please create a table in the desired format."); + } + + private void handleRemoveMm( + Path path, ValidTxnList validTxnList, List result) throws HiveException { + // Note: doesn't take LB into account; that is not presently supported here (throws above). + try { + FileSystem fs = path.getFileSystem(conf); + for (FileStatus file : fs.listStatus(path)) { + Path childPath = file.getPath(); + if (!file.isDirectory()) { + ensureDelete(fs, childPath, "a non-directory file"); + continue; + } + Long writeId = JavaUtils.extractTxnId(childPath); + if (writeId == null) { + ensureDelete(fs, childPath, "an unknown directory"); + } else if (!validTxnList.isTxnValid(writeId)) { + // Assume no concurrent active writes - we rely on locks here. We could check and fail. + ensureDelete(fs, childPath, "an uncommitted directory"); + } else { + result.add(childPath); + } + } + } catch (IOException ex) { + throw new HiveException(ex); + } + } + + private static void ensureDelete(FileSystem fs, Path path, String what) throws IOException { + Utilities.LOG14535.info("Deleting " + what + " " + path); + try { + if (!fs.delete(path, true)) throw new IOException("delete returned false"); + } catch (Exception ex) { + String error = "Couldn't delete " + path + "; cannot remove MM setting from the table"; + LOG.error(error, ex); + throw (ex instanceof IOException) ? (IOException)ex : new IOException(ex); + } + } + + private List> generateAddMmTasks(Table tbl) throws HiveException { + // We will move all the files in the table/partition directories into the first MM + // directory, then commit the first write ID. + List srcs = new ArrayList<>(), tgts = new ArrayList<>(); + long mmWriteId = 0; + try { + HiveTxnManager txnManager = SessionState.get().getTxnMgr(); + if (txnManager.isTxnOpen()) { + mmWriteId = txnManager.getCurrentTxnId(); + } else { + mmWriteId = txnManager.openTxn(new Context(conf), conf.getUser()); + txnManager.commitTxn(); + } + } catch (Exception e) { + String errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage(); + console.printError(errorMessage, "\n" + + org.apache.hadoop.util.StringUtils.stringifyException(e)); + } + int stmtId = 0; + String mmDir = AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId); + Hive db = getHive(); + if (tbl.getPartitionKeys().size() > 0) { + PartitionIterable parts = new PartitionIterable(db, tbl, null, + HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); + Iterator partIter = parts.iterator(); + while (partIter.hasNext()) { + Partition part = partIter.next(); + checkMmLb(part); + Path src = part.getDataLocation(), tgt = new Path(src, mmDir); + srcs.add(src); + tgts.add(tgt); + Utilities.LOG14535.info("Will move " + src + " to " + tgt); + } + } else { + checkMmLb(tbl); + Path src = tbl.getDataLocation(), tgt = new Path(src, mmDir); + srcs.add(src); + tgts.add(tgt); + Utilities.LOG14535.info("Will move " + src + " to " + tgt); + } + // Don't set inputs and outputs - the locks have already been taken so it's pointless. + MoveWork mw = new MoveWork(null, null, null, null, false); + mw.setMultiFilesDesc(new LoadMultiFilesDesc(srcs, tgts, true, null, null)); + ImportCommitWork icw = new ImportCommitWork(tbl.getDbName(), tbl.getTableName(), mmWriteId, stmtId); + Task mv = TaskFactory.get(mw, conf), ic = TaskFactory.get(icw, conf); + mv.addDependentTask(ic); + return Lists.>newArrayList(mv); + } + + private List> alterTableAddProps(AlterTableDesc alterTbl, Table tbl, + Partition part, EnvironmentContext environmentContext) throws HiveException { + if (StatsSetupConst.USER.equals(environmentContext.getProperties() + .get(StatsSetupConst.STATS_GENERATED))) { + environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); + } + List> result = null; + if (part != null) { + part.getTPartition().getParameters().putAll(alterTbl.getProps()); + } else { + boolean isFromMmTable = MetaStoreUtils.isInsertOnlyTable(tbl.getParameters()); + Boolean isToMmTable = MetaStoreUtils.isToInsertOnlyTable(alterTbl.getProps()); + if (isToMmTable != null) { + if (!isFromMmTable && isToMmTable) { + result = generateAddMmTasks(tbl); + } else if (isFromMmTable && !isToMmTable) { + result = generateRemoveMmTasks(tbl); + } + } + tbl.getTTable().getParameters().putAll(alterTbl.getProps()); + } + return result; } private int dropConstraint(Hive db, AlterTableDesc alterTbl) @@ -4442,7 +4647,7 @@ private int createTable(Hive db, CreateTableDesc crtTbl) throws HiveException { } // create the table - if (crtTbl.getReplaceMode()){ + if (crtTbl.getReplaceMode()) { // replace-mode creates are really alters using CreateTableDesc. try { db.alterTable(tbl.getDbName()+"."+tbl.getTableName(),tbl,null); @@ -4459,18 +4664,22 @@ private int createTable(Hive db, CreateTableDesc crtTbl) throws HiveException { } else { db.createTable(tbl, crtTbl.getIfNotExists()); } - if ( crtTbl.isCTAS()) { + Long mmWriteId = crtTbl.getInitialMmWriteId(); + if (crtTbl.isCTAS() || mmWriteId != null) { Table createdTable = db.getTable(tbl.getDbName(), tbl.getTableName()); - DataContainer dc = new DataContainer(createdTable.getTTable()); - SessionState.get().getLineageState().setLineage( - createdTable.getPath(), dc, createdTable.getCols() - ); + if (crtTbl.isCTAS()) { + DataContainer dc = new DataContainer(createdTable.getTTable()); + SessionState.get().getLineageState().setLineage( + createdTable.getPath(), dc, createdTable.getCols() + ); + } } } addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK)); return 0; } + /** * Create a new table like an existing table. * diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DependencyCollectionTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DependencyCollectionTask.java index 9189cfc..e639572 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DependencyCollectionTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DependencyCollectionTask.java @@ -20,7 +20,6 @@ import java.io.Serializable; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork; import org.apache.hadoop.hive.ql.plan.api.StageType; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 13750cd..d2d9946 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -31,12 +31,15 @@ import org.apache.commons.lang3.StringEscapeUtils; import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader; @@ -76,6 +79,7 @@ import org.slf4j.LoggerFactory; import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; /** * FetchTask implementation. @@ -265,11 +269,16 @@ private boolean getNextPath() throws Exception { while (iterPath.hasNext()) { currPath = iterPath.next(); currDesc = iterPartDesc.next(); + Utilities.LOG14535.debug("Considering " + currPath); if (isNonNativeTable) { return true; } FileSystem fs = currPath.getFileSystem(job); if (fs.exists(currPath)) { + if (extractValidTxnList() != null && + MetaStoreUtils.isInsertOnlyTable(currDesc.getTableDesc().getProperties())) { + return true; + } for (FileStatus fStat : listStatusUnderPath(fs, currPath)) { if (fStat.getLen() > 0) { return true; @@ -277,6 +286,7 @@ private boolean getNextPath() throws Exception { } } } + Utilities.LOG14535.debug("Done with all the paths"); return false; } @@ -306,6 +316,7 @@ static void setFetchOperatorContext(JobConf conf, List paths) { if (splits == null) { return null; } + if (!isPartitioned || convertedOI == null) { currSerDe = tableSerDe; ObjectConverter = null; @@ -369,6 +380,10 @@ public boolean doNext(WritableComparable key, Writable value) throws IOException Class formatter = currDesc.getInputFileFormatClass(); Utilities.copyTableJobPropertiesToConf(currDesc.getTableDesc(), job); InputFormat inputFormat = getInputFormatFromCache(formatter, job); + String inputs = processCurrPathForMmWriteIds(inputFormat); + Utilities.LOG14535.info("Setting fetch inputs to " + inputs); + if (inputs == null) return null; + job.set("mapred.input.dir", inputs); InputSplit[] splits = inputFormat.getSplits(job, 1); FetchInputFormatSplit[] inputSplits = new FetchInputFormatSplit[splits.length]; @@ -388,6 +403,39 @@ public boolean doNext(WritableComparable key, Writable value) throws IOException return null; } + private String processCurrPathForMmWriteIds(InputFormat inputFormat) throws IOException { + if (inputFormat instanceof HiveInputFormat) { + return StringUtils.escapeString(currPath.toString()); // No need to process here. + } + ValidTxnList validTxnList; + if (MetaStoreUtils.isInsertOnlyTable(currDesc.getTableDesc().getProperties())) { + validTxnList = extractValidTxnList(); + } else { + validTxnList = null; // non-MM case + } + if (validTxnList != null) { + Utilities.LOG14535.info("Observing " + currDesc.getTableName() + ": " + validTxnList); + } + + Path[] dirs = HiveInputFormat.processPathsForMmRead(Lists.newArrayList(currPath), job, validTxnList); + if (dirs == null || dirs.length == 0) { + return null; // No valid inputs. This condition is logged inside the call. + } + StringBuffer str = new StringBuffer(StringUtils.escapeString(dirs[0].toString())); + for(int i = 1; i < dirs.length;i++) { + str.append(",").append(StringUtils.escapeString(dirs[i].toString())); + } + return str.toString(); + } + + private ValidTxnList extractValidTxnList() { + if (currDesc.getTableName() == null || !org.apache.commons.lang.StringUtils.isBlank(currDesc.getTableName())) { + String txnString = job.get(ValidTxnList.VALID_TXNS_KEY); + return txnString == null ? new ValidReadTxnList() : new ValidReadTxnList(txnString); + } + return null; // not fetching from a table directly but from a temp location + } + private FetchInputFormatSplit[] splitSampling(SplitSample splitSample, FetchInputFormatSplit[] splits) { long totalSize = 0; @@ -699,7 +747,7 @@ public FetchInputFormatSplit(InputSplit split, InputFormat inputFormat) { return inputFormat.getRecordReader(getInputSplit(), job, Reporter.NULL); } } - + private static class FetchInputFormatSplitComparator implements Comparator { @Override public int compare(FetchInputFormatSplit a, FetchInputFormatSplit b) { @@ -711,4 +759,8 @@ public int compare(FetchInputFormatSplit a, FetchInputFormatSplit b) { return Long.signum(a.getLength() - b.getLength()); } } + + public Configuration getJobConf() { + return job; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java index e708d58..f6d27fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java @@ -24,6 +24,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.DriverContext; @@ -192,5 +193,4 @@ public void clearFetch() throws HiveException { fetch.clearFetchContext(); } } - } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index bc265eb..3544884 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -18,6 +18,19 @@ package org.apache.hadoop.hive.ql.exec; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_TEMPORARY_TABLE_STORAGE; + +import java.io.IOException; +import java.io.Serializable; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -31,7 +44,9 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.Utilities.MissingBucketsContext; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; import org.apache.hadoop.hive.ql.io.BucketCodec; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveKey; @@ -70,6 +85,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hive.common.util.HiveStringUtils; import org.slf4j.Logger; @@ -91,6 +107,7 @@ /** * File Sink operator implementation. **/ +@SuppressWarnings("deprecation") public class FileSinkOperator extends TerminalOperator implements Serializable { @@ -105,7 +122,8 @@ protected transient Path parent; protected transient HiveOutputFormat hiveOutputFormat; protected transient Path specPath; - protected transient String childSpecPathDynLinkedPartitions; + protected transient String unionPath; + protected transient boolean isUnionDp; protected transient int dpStartCol; // start column # for DP columns protected transient List dpVals; // array of values corresponding to DP columns protected transient List dpWritables; @@ -143,17 +161,34 @@ } public class FSPaths implements Cloneable { - Path tmpPath; - Path taskOutputTempPath; + private Path tmpPath; + private Path taskOutputTempPath; Path[] outPaths; Path[] finalPaths; RecordWriter[] outWriters; RecordUpdater[] updaters; Stat stat; + int acidLastBucket = -1; + int acidFileOffset = -1; + private boolean isMmTable; + private Long txnId; + private int stmtId; + + public FSPaths(Path specPath, boolean isMmTable) { + this.isMmTable = isMmTable; + if (!isMmTable) { + tmpPath = Utilities.toTempPath(specPath); + taskOutputTempPath = Utilities.toTaskTempPath(specPath); + } else { + tmpPath = specPath; + taskOutputTempPath = null; // Should not be used. + txnId = conf.getTransactionId(); + stmtId = conf.getStatementId(); + } + Utilities.LOG14535.info("new FSPaths for " + numFiles + " files, dynParts = " + bDynParts + + ": tmpPath " + tmpPath + ", task path " + taskOutputTempPath + + " (spec path " + specPath + ")"/*, new Exception()*/); - public FSPaths(Path specPath) { - tmpPath = Utilities.toTempPath(specPath); - taskOutputTempPath = Utilities.toTaskTempPath(specPath); outPaths = new Path[numFiles]; finalPaths = new Path[numFiles]; outWriters = new RecordWriter[numFiles]; @@ -174,7 +209,7 @@ public Path getTaskOutPath(String taskId) { /** * Update the final paths according to tmpPath. */ - public Path getFinalPath(String taskId, Path tmpPath, String extension) { + private Path getFinalPath(String taskId, Path tmpPath, String extension) { if (extension != null) { return new Path(tmpPath, taskId + extension); } else { @@ -204,30 +239,38 @@ public void closeWriters(boolean abort) throws HiveException { } } - private void commit(FileSystem fs) throws HiveException { + private void commit(FileSystem fs, List commitPaths) throws HiveException { for (int idx = 0; idx < outPaths.length; ++idx) { try { - if ((bDynParts || isSkewedStoredAsSubDirectories) - && !fs.exists(finalPaths[idx].getParent())) { - fs.mkdirs(finalPaths[idx].getParent()); - } - boolean needToRename = true; - if (conf.getWriteType() == AcidUtils.Operation.UPDATE || - conf.getWriteType() == AcidUtils.Operation.DELETE) { - // If we're updating or deleting there may be no file to close. This can happen - // because the where clause strained out all of the records for a given bucket. So - // before attempting the rename below, check if our file exists. If it doesn't, - // then skip the rename. If it does try it. We could just blindly try the rename - // and avoid the extra stat, but that would mask other errors. - try { - if (outPaths[idx] != null) { - FileStatus stat = fs.getFileStatus(outPaths[idx]); - } - } catch (FileNotFoundException fnfe) { - needToRename = false; - } - } - if (needToRename && outPaths[idx] != null && !fs.rename(outPaths[idx], finalPaths[idx])) { + commitOneOutPath(idx, fs, commitPaths); + } catch (IOException e) { + throw new HiveException("Unable to commit output from: " + + outPaths[idx] + " to: " + finalPaths[idx], e); + } + } + } + + private void commitOneOutPath(int idx, FileSystem fs, List commitPaths) + throws IOException, HiveException { + if ((bDynParts || isSkewedStoredAsSubDirectories) + && !fs.exists(finalPaths[idx].getParent())) { + Utilities.LOG14535.info("commit making path for dyn/skew: " + finalPaths[idx].getParent()); + FileUtils.mkdir(fs, finalPaths[idx].getParent(), hconf); + } + // If we're updating or deleting there may be no file to close. This can happen + // because the where clause strained out all of the records for a given bucket. So + // before attempting the rename below, check if our file exists. If it doesn't, + // then skip the rename. If it does try it. We could just blindly try the rename + // and avoid the extra stat, but that would mask other errors. + Operation acidOp = conf.getWriteType(); + boolean needToRename = outPaths[idx] != null && ((acidOp != Operation.UPDATE + && acidOp != Operation.DELETE) || fs.exists(outPaths[idx])); + if (needToRename && outPaths[idx] != null) { + Utilities.LOG14535.info("committing " + outPaths[idx] + " to " + finalPaths[idx] + " (" + isMmTable + ")"); + if (isMmTable) { + assert outPaths[idx].equals(finalPaths[idx]); + commitPaths.add(outPaths[idx]); + } else if (!fs.rename(outPaths[idx], finalPaths[idx])) { FileStatus fileStatus = FileUtils.getFileStatusOrNull(fs, finalPaths[idx]); if (fileStatus != null) { LOG.warn("Target path " + finalPaths[idx] + " with a size " + fileStatus.getLen() + " exists. Trying to delete it."); @@ -237,16 +280,13 @@ private void commit(FileSystem fs) throws HiveException { } if (!fs.rename(outPaths[idx], finalPaths[idx])) { - throw new HiveException("Unable to rename output from: " + - outPaths[idx] + " to: " + finalPaths[idx]); + throw new HiveException("Unable to rename output from: " + + outPaths[idx] + " to: " + finalPaths[idx]); } - } - updateProgress(); - } catch (IOException e) { - throw new HiveException("Unable to rename output from: " + - outPaths[idx] + " to: " + finalPaths[idx], e); } } + + updateProgress(); } public void abortWriters(FileSystem fs, boolean abort, boolean delete) throws HiveException { @@ -266,8 +306,90 @@ public void abortWriters(FileSystem fs, boolean abort, boolean delete) throws Hi } } - public Stat getStat() { - return stat; + public void configureDynPartPath(String dirName, String childSpecPathDynLinkedPartitions) { + dirName = (childSpecPathDynLinkedPartitions == null) ? dirName : + dirName + Path.SEPARATOR + childSpecPathDynLinkedPartitions; + tmpPath = new Path(tmpPath, dirName); + if (taskOutputTempPath != null) { + taskOutputTempPath = new Path(taskOutputTempPath, dirName); + } + } + + public void initializeBucketPaths(int filesIdx, String taskId, boolean isNativeTable, + boolean isSkewedStoredAsSubDirectories) { + if (isNativeTable) { + String extension = Utilities.getFileExtension(jc, isCompressed, hiveOutputFormat); + if (!isMmTable) { + if (!bDynParts && !isSkewedStoredAsSubDirectories) { + finalPaths[filesIdx] = getFinalPath(taskId, parent, extension); + } else { + finalPaths[filesIdx] = getFinalPath(taskId, tmpPath, extension); + } + outPaths[filesIdx] = getTaskOutPath(taskId); + } else { + String subdirPath = AcidUtils.deltaSubdir(txnId, txnId, stmtId); + if (unionPath != null) { + // Create the union directory inside the MM directory. + subdirPath += Path.SEPARATOR + unionPath; + } + subdirPath += Path.SEPARATOR + taskId; + if (conf.isMerge()) { + // Make sure we don't collide with the source files. + // MM tables don't support concat so we don't expect the merge of merged files. + subdirPath += ".merged"; + } + Path finalPath = null; + if (!bDynParts && !isSkewedStoredAsSubDirectories) { + finalPath = getFinalPath(subdirPath, specPath, extension); + } else { + // Note: tmpPath here has the correct partition key + finalPath = getFinalPath(subdirPath, tmpPath, extension); + } + // In the cases that have multi-stage insert, e.g. a "hive.skewjoin.key"-based skew join, + // it can happen that we want multiple commits into the same directory from different + // tasks (not just task instances). In non-MM case, Utilities.renameOrMoveFiles ensures + // unique names. We could do the same here, but this will still cause the old file to be + // deleted because it has not been committed in /this/ FSOP. We are going to fail to be + // safe. Potentially, we could implement some partial commit between stages, if this + // affects some less obscure scenario. + try { + FileSystem fpfs = finalPath.getFileSystem(hconf); + if (fpfs.exists(finalPath)) throw new RuntimeException(finalPath + " already exists"); + } catch (IOException e) { + throw new RuntimeException(e); + } + finalPaths[filesIdx] = finalPath; + outPaths[filesIdx] = finalPath; + } + if (LOG.isInfoEnabled()) { + LOG.info("Final Path: FS " + finalPaths[filesIdx]); + if (LOG.isInfoEnabled() && !isMmTable) { + LOG.info("Writing to temp file: FS " + outPaths[filesIdx]); + } + } + } else { + finalPaths[filesIdx] = outPaths[filesIdx] = specPath; + } + } + + public Path getTmpPath() { + return tmpPath; + } + + public Path getTaskOutputTempPath() { + return taskOutputTempPath; + } + + public void addToStat(String statType, long amount) { + if ("rowCount".equals(statType)) { + Utilities.LOG14535.info("Adding " + statType + " = " + amount + " to " + System.identityHashCode(this)); + } + stat.addToStat(statType, amount); + } + + public Collection getStoredStats() { + Utilities.LOG14535.info("Getting stats from " + System.identityHashCode(this)); + return stat.getStoredStats(); } } // class FSPaths @@ -320,14 +442,25 @@ private void initializeSpecPath() { // and Parent/DynamicPartition/Child_1 respectively. // The movetask that follows subQ1 and subQ2 tasks still moves the directory // 'Parent' - if ((!conf.isLinkedFileSink()) || (dpCtx == null)) { + boolean isLinked = conf.isLinkedFileSink(); + if (!isLinked) { + // Simple case - no union. specPath = conf.getDirName(); - childSpecPathDynLinkedPartitions = null; - return; + unionPath = null; + } else { + isUnionDp = (dpCtx != null); + if (conf.isMmTable() || isUnionDp) { + // MM tables need custom handling for union suffix; DP tables use parent too. + specPath = conf.getParentDir(); + unionPath = conf.getDirName().getName(); + } else { + // For now, keep the old logic for non-MM non-DP union case. Should probably be unified. + specPath = conf.getDirName(); + unionPath = null; + } } - - specPath = conf.getParentDir(); - childSpecPathDynLinkedPartitions = conf.getDirName().getName(); + Utilities.LOG14535.info("Setting up FSOP " + System.identityHashCode(this) + " (" + + conf.isLinkedFileSink() + ") with " + taskId + " and " + specPath + " + " + unionPath); } /** Kryo ctor. */ @@ -410,7 +543,10 @@ protected void initializeOp(Configuration hconf) throws HiveException { } if (!bDynParts) { - fsp = new FSPaths(specPath); + fsp = new FSPaths(specPath, conf.isMmTable()); + Utilities.LOG14535.info("creating new paths " + System.identityHashCode(fsp) + + " from ctor; childSpec " + unionPath + ": tmpPath " + fsp.getTmpPath() + + ", task path " + fsp.getTaskOutputTempPath()); // Create all the files - this is required because empty files need to be created for // empty buckets @@ -424,6 +560,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { .getVar(hconf, HIVE_TEMPORARY_TABLE_STORAGE)); if (isTemporary && fsp != null && tmpStorage != StoragePolicyValue.DEFAULT) { + assert !conf.isMmTable(); // Not supported for temp tables. final Path outputPath = fsp.taskOutputTempPath; StoragePolicyShim shim = ShimLoader.getHadoopShims() .getStoragePolicyShim(fs); @@ -567,7 +704,7 @@ protected void createBucketFiles(FSPaths fsp) throws HiveException { assert filesIdx == numFiles; // in recent hadoop versions, use deleteOnExit to clean tmp files. - if (isNativeTable && fs != null && fsp != null) { + if (isNativeTable && fs != null && fsp != null && !conf.isMmTable()) { autoDelete = fs.deleteOnExit(fsp.outPaths[0]); } } catch (Exception e) { @@ -581,34 +718,16 @@ protected void createBucketFiles(FSPaths fsp) throws HiveException { protected void createBucketForFileIdx(FSPaths fsp, int filesIdx) throws HiveException { try { - if (isNativeTable) { - fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, fsp.tmpPath, null); - if (LOG.isInfoEnabled()) { - LOG.info("Final Path: FS " + fsp.finalPaths[filesIdx]); - } - fsp.outPaths[filesIdx] = fsp.getTaskOutPath(taskId); - if (LOG.isInfoEnabled()) { - LOG.info("Writing to temp file: FS " + fsp.outPaths[filesIdx]); - } - } else { - fsp.finalPaths[filesIdx] = fsp.outPaths[filesIdx] = specPath; - } - // The reason to keep these instead of using - // OutputFormat.getRecordWriter() is that - // getRecordWriter does not give us enough control over the file name that - // we create. - String extension = Utilities.getFileExtension(jc, isCompressed, hiveOutputFormat); - if (!bDynParts && !this.isSkewedStoredAsSubDirectories) { - fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, parent, extension); - } else { - fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, fsp.tmpPath, extension); - } + fsp.initializeBucketPaths(filesIdx, taskId, isNativeTable, isSkewedStoredAsSubDirectories); + Utilities.LOG14535.info("createBucketForFileIdx " + filesIdx + ": final path " + fsp.finalPaths[filesIdx] + + "; out path " + fsp.outPaths[filesIdx] +" (spec path " + specPath + ", tmp path " + + fsp.getTmpPath() + ", task " + taskId + ")"/*, new Exception()*/); if (LOG.isInfoEnabled()) { LOG.info("New Final Path: FS " + fsp.finalPaths[filesIdx]); } - if (isNativeTable) { + if (isNativeTable && !conf.isMmTable()) { // in recent hadoop versions, use deleteOnExit to clean tmp files. autoDelete = fs.deleteOnExit(fsp.outPaths[filesIdx]); } @@ -616,9 +735,17 @@ protected void createBucketForFileIdx(FSPaths fsp, int filesIdx) Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), jc); // only create bucket files only if no dynamic partitions, // buckets of dynamic partitions will be created for each newly created partition - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) { + //todo IOW integration. Full Acid uses the else if block to create Acid's RecordUpdater (HiveFileFormatUtils) + // and that will set writingBase(conf.getInsertOverwrite()) + // If MM wants to create a new base for IOW (instead of delta dir), it should specify it here + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || conf.isMmTable()) { + Path outPath = fsp.outPaths[filesIdx]; + if (conf.isMmTable() + && !FileUtils.mkdir(fs, outPath.getParent(), hconf)) { + LOG.warn("Unable to create directory with inheritPerms: " + outPath); + } fsp.outWriters[filesIdx] = HiveFileFormatUtils.getHiveRecordWriter(jc, conf.getTableInfo(), - outputClass, conf, fsp.outPaths[filesIdx], reporter); + outputClass, conf, outPath, reporter); // If the record writer provides stats, get it from there instead of the serde statsFromRecordWriter[filesIdx] = fsp.outWriters[filesIdx] instanceof StatsProvidingRecordWriter; @@ -739,9 +866,9 @@ public void process(Object row, int tag) throws HiveException { if (conf.isGatherStats() && !isCollectRWStats) { SerDeStats stats = serializer.getSerDeStats(); if (stats != null) { - fpaths.stat.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize()); + fpaths.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize()); } - fpaths.stat.addToStat(StatsSetupConst.ROW_COUNT, 1); + fpaths.addToStat(StatsSetupConst.ROW_COUNT, 1); } if ((++numRows == cntr) && LOG.isInfoEnabled()) { @@ -758,7 +885,7 @@ public void process(Object row, int tag) throws HiveException { // for a given operator branch prediction should work quite nicely on it. // RecordUpdateer expects to get the actual row, not a serialized version of it. Thus we // pass the row rather than recordValue. - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || conf.isMmTable()) { rowOutWriters[findWriterOffset(row)].write(recordValue); } else if (conf.getWriteType() == AcidUtils.Operation.INSERT) { fpaths.updaters[findWriterOffset(row)].insert(conf.getTransactionId(), row); @@ -835,7 +962,7 @@ public void process(Object row, int tag) throws HiveException { protected boolean areAllTrue(boolean[] statsFromRW) { // If we are doing an acid operation they will always all be true as RecordUpdaters always // collect stats - if (conf.getWriteType() != AcidUtils.Operation.NOT_ACID) { + if (conf.getWriteType() != AcidUtils.Operation.NOT_ACID && !conf.isMmTable()) { return true; } for(boolean b : statsFromRW) { @@ -875,6 +1002,7 @@ assert getConf().getWriteType() != AcidUtils.Operation.DELETE && protected FSPaths lookupListBucketingPaths(String lbDirName) throws HiveException { FSPaths fsp2 = valToPaths.get(lbDirName); if (fsp2 == null) { + Utilities.LOG14535.info("lookupListBucketingPaths for " + lbDirName); fsp2 = createNewPaths(lbDirName); } return fsp2; @@ -888,18 +1016,11 @@ protected FSPaths lookupListBucketingPaths(String lbDirName) throws HiveExceptio * @throws HiveException */ private FSPaths createNewPaths(String dirName) throws HiveException { - FSPaths fsp2 = new FSPaths(specPath); - if (childSpecPathDynLinkedPartitions != null) { - fsp2.tmpPath = new Path(fsp2.tmpPath, - dirName + Path.SEPARATOR + childSpecPathDynLinkedPartitions); - fsp2.taskOutputTempPath = - new Path(fsp2.taskOutputTempPath, - dirName + Path.SEPARATOR + childSpecPathDynLinkedPartitions); - } else { - fsp2.tmpPath = new Path(fsp2.tmpPath, dirName); - fsp2.taskOutputTempPath = - new Path(fsp2.taskOutputTempPath, dirName); - } + FSPaths fsp2 = new FSPaths(specPath, conf.isMmTable()); + fsp2.configureDynPartPath(dirName, !conf.isMmTable() && isUnionDp ? unionPath : null); + Utilities.LOG14535.info("creating new paths " + System.identityHashCode(fsp2) + " for " + + dirName + ", childSpec " + unionPath + ": tmpPath " + fsp2.getTmpPath() + + ", task path " + fsp2.getTaskOutputTempPath()); if(!conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { createBucketFiles(fsp2); valToPaths.put(dirName, fsp2); @@ -989,7 +1110,7 @@ protected FSPaths getDynOutPaths(List row, String lbDirName) throws Hive // stats from the record writer and store in the previous fsp that is cached if (conf.isGatherStats() && isCollectRWStats) { SerDeStats stats = null; - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || conf.isMmTable()) { RecordWriter outWriter = prevFsp.outWriters[0]; if (outWriter != null) { stats = ((StatsProvidingRecordWriter) outWriter).getStats(); @@ -998,8 +1119,8 @@ protected FSPaths getDynOutPaths(List row, String lbDirName) throws Hive stats = prevFsp.updaters[0].getStats(); } if (stats != null) { - prevFsp.stat.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize()); - prevFsp.stat.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount()); + prevFsp.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize()); + prevFsp.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount()); } } @@ -1080,6 +1201,7 @@ public void closeOp(boolean abort) throws HiveException { throw new HiveException(e); } } + List commitPaths = new ArrayList<>(); for (FSPaths fsp : valToPaths.values()) { fsp.closeWriters(abort); // before closing the operator check if statistics gathering is requested @@ -1090,14 +1212,14 @@ public void closeOp(boolean abort) throws HiveException { // record writer already gathers the statistics, it can simply return the // accumulated statistics which will be aggregated in case of spray writers if (conf.isGatherStats() && isCollectRWStats) { - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) { + if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID || conf.isMmTable()) { for (int idx = 0; idx < fsp.outWriters.length; idx++) { RecordWriter outWriter = fsp.outWriters[idx]; if (outWriter != null) { SerDeStats stats = ((StatsProvidingRecordWriter) outWriter).getStats(); if (stats != null) { - fsp.stat.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize()); - fsp.stat.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount()); + fsp.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize()); + fsp.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount()); } } } @@ -1106,8 +1228,8 @@ public void closeOp(boolean abort) throws HiveException { if (fsp.updaters[i] != null) { SerDeStats stats = fsp.updaters[i].getStats(); if (stats != null) { - fsp.stat.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize()); - fsp.stat.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount()); + fsp.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize()); + fsp.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount()); } } } @@ -1115,9 +1237,13 @@ public void closeOp(boolean abort) throws HiveException { } if (isNativeTable) { - fsp.commit(fs); + fsp.commit(fs, commitPaths); } } + if (conf.isMmTable()) { + Utilities.writeMmCommitManifest( + commitPaths, specPath, fs, taskId, conf.getTransactionId(), conf.getStatementId(), unionPath); + } // Only publish stats if this operator's flag was set to gather stats if (conf.isGatherStats()) { publishStats(); @@ -1127,13 +1253,14 @@ public void closeOp(boolean abort) throws HiveException { // Hadoop always call close() even if an Exception was thrown in map() or // reduce(). for (FSPaths fsp : valToPaths.values()) { - fsp.abortWriters(fs, abort, !autoDelete && isNativeTable); + fsp.abortWriters(fs, abort, !autoDelete && isNativeTable && !conf.isMmTable()); } } fsp = prevFsp = null; super.closeOp(abort); } + /** * @return the name of the operator */ @@ -1152,12 +1279,28 @@ public void jobCloseOp(Configuration hconf, boolean success) try { if ((conf != null) && isNativeTable) { Path specPath = conf.getDirName(); + String unionSuffix = null; DynamicPartitionCtx dpCtx = conf.getDynPartCtx(); - if (conf.isLinkedFileSink() && (dpCtx != null)) { + ListBucketingCtx lbCtx = conf.getLbCtx(); + if (conf.isLinkedFileSink() && (dpCtx != null || conf.isMmTable())) { specPath = conf.getParentDir(); + unionSuffix = conf.getDirName().getName(); + } + Utilities.LOG14535.info("jobCloseOp using specPath " + specPath); + if (!conf.isMmTable()) { + Utilities.mvFileToFinalPath(specPath, hconf, success, LOG, dpCtx, conf, reporter); + } else { + int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(), + lbLevels = lbCtx == null ? 0 : lbCtx.calculateListBucketingLevel(); + // TODO: why is it stored in both? + int numBuckets = (conf.getTable() != null) ? conf.getTable().getNumBuckets() + : (dpCtx != null ? dpCtx.getNumBuckets() : 0); + MissingBucketsContext mbc = new MissingBucketsContext( + conf.getTableInfo(), numBuckets, conf.getCompressed()); + Utilities.handleMmTableFinalPath(specPath, unionSuffix, hconf, success, + dpLevels, lbLevels, mbc, conf.getTransactionId(), conf.getStatementId(), reporter, + conf.isMmTable(), conf.isMmCtas()); } - Utilities.mvFileToFinalPath(specPath, hconf, success, LOG, dpCtx, conf, - reporter); } } catch (IOException e) { throw new HiveException(e); @@ -1209,6 +1352,7 @@ private void createHiveOutputFormat(JobConf job) throws HiveException { } private void publishStats() throws HiveException { + Utilities.LOG14535.error("FSOP publishStats called."); boolean isStatsReliable = conf.isStatsReliable(); // Initializing a stats publisher @@ -1239,7 +1383,8 @@ private void publishStats() throws HiveException { for (Map.Entry entry : valToPaths.entrySet()) { String fspKey = entry.getKey(); // DP/LB FSPaths fspValue = entry.getValue(); - + Utilities.LOG14535.info("Observing entry for stats " + fspKey + + " => FSP with tmpPath " + fspValue.getTmpPath()); // for bucketed tables, hive.optimize.sort.dynamic.partition optimization // adds the taskId to the fspKey. if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { @@ -1252,6 +1397,7 @@ private void publishStats() throws HiveException { // not be retrieved from staging table and hence not aggregated. To avoid this issue // we will remove the taskId from the key which is redundant anyway. fspKey = fspKey.split(taskID)[0]; + Utilities.LOG14535.info("Adjusting fspKey for stats to " + fspKey); } // split[0] = DP, split[1] = LB @@ -1263,12 +1409,14 @@ private void publishStats() throws HiveException { String prefix = conf.getTableInfo().getTableName().toLowerCase(); prefix = Utilities.join(prefix, spSpec, dpSpec); prefix = prefix.endsWith(Path.SEPARATOR) ? prefix : prefix + Path.SEPARATOR; + Utilities.LOG14535.info("Prefix for stats " + prefix + " (from " + spSpec + ", " + dpSpec + ")"); Map statsToPublish = new HashMap(); - for (String statType : fspValue.stat.getStoredStats()) { + for (String statType : fspValue.getStoredStats()) { statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType))); } if (!statsPublisher.publishStat(prefix, statsToPublish)) { + Utilities.LOG14535.error("Failed to publish stats"); // The original exception is lost. // Not changing the interface to maintain backward compatibility if (isStatsReliable) { @@ -1278,6 +1426,7 @@ private void publishStats() throws HiveException { } sContext.setIndexForTezUnion(this.getIndexForTezUnion()); if (!statsPublisher.closeConnection(sContext)) { + Utilities.LOG14535.error("Failed to close stats"); // The original exception is lost. // Not changing the interface to maintain backward compatibility if (isStatsReliable) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java index 4a9c3bc..f4f20f0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java @@ -301,6 +301,9 @@ protected void flushToFile() throws IOException, HiveException { String bigBucketFileName = getExecContext().getCurrentBigBucketFile(); String fileName = getExecContext().getLocalWork().getBucketFileName(bigBucketFileName); // get the tmp URI path; it will be a hdfs path if not local mode + // TODO# this doesn't work... the path for writer and reader mismatch + // Dump the side-table for tag ... -local-10004/HashTable-Stage-1/MapJoin-a-00-(ds%3D2008-04-08)mm_2.hashtable + // Load back 1 hashtable file -local-10004/HashTable-Stage-1/MapJoin-a-00-srcsortbucket3outof4.txt.hashtable String dumpFilePrefix = conf.getDumpFilePrefix(); Path path = Utilities.generatePath(tmpURI, dumpFilePrefix, tag, fileName); console.printInfo(Utilities.now() + "\tDump the side-table for tag: " + tag + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java new file mode 100644 index 0000000..27db9a4 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitTask.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; +import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.util.StringUtils; + +public class ImportCommitTask extends Task { + + private static final long serialVersionUID = 1L; + + public ImportCommitTask() { + super(); + } + + @Override + public int execute(DriverContext driverContext) { + Utilities.LOG14535.info("Executing ImportCommit for " + work.getTxnId()); + + try { + if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { + Utilities.LOG14535.info("Exiting due to explain"); + return 0; + } + return 0; + } catch (Exception e) { + console.printError("Failed with exception " + e.getMessage(), "\n" + + StringUtils.stringifyException(e)); + setException(e); + return 1; + } + } + + @Override + public StageType getType() { + return StageType.MOVE; // The commit for import is normally done as part of MoveTask. + } + + @Override + public String getName() { + return "IMPORT_COMMIT"; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java new file mode 100644 index 0000000..5b59635 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ImportCommitWork.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import java.io.Serializable; + +import org.apache.hadoop.hive.ql.plan.Explain; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + +@Explain(displayName = "Import Commit", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +public class ImportCommitWork implements Serializable { + private static final long serialVersionUID = 1L; + private String dbName, tblName; + private long txnId; + private int stmtId; + + public ImportCommitWork(String dbName, String tblName, long txnId, int stmtId) { + this.txnId = txnId; + this.stmtId = stmtId; + this.dbName = dbName; + this.tblName = tblName; + } + + public long getTxnId() { + return txnId; + } + + public int getStmtId() { + return stmtId; + } + + public String getDbName() { + return dbName; + } + + public String getTblName() { + return tblName; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java index a4bca45..3f5cdf5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java @@ -232,6 +232,7 @@ private void mvFileToFinalPath(Path specPath, Configuration hconf, // point, updates from speculative tasks still writing to tmpPath // will not appear in finalPath. log.info("Moving tmp dir: " + tmpPath + " to: " + intermediatePath); + Utilities.LOG14535.info("Moving tmp dir: " + tmpPath + " to: " + intermediatePath + "(spec " + specPath + ")"); Utilities.rename(fs, tmpPath, intermediatePath); // Step2: remove any tmp file or double-committed output files Utilities.removeTempOrDuplicateFiles(fs, intermediatePath); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index cde2805..34d0598 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -254,8 +254,24 @@ public boolean hasFollowingStatsTask() { return false; } + private final static class TaskInformation { + public List bucketCols = null; + public List sortCols = null; + public int numBuckets = -1; + public Task task; + public String path; + public TaskInformation(Task task, String path) { + this.task = task; + this.path = path; + } + } + @Override public int execute(DriverContext driverContext) { + if (work.isNoop()) return 0; // TODO# temporary flag for HIVE-14990 + Utilities.LOG14535.info("Executing MoveWork " + System.identityHashCode(work) + + " with " + work.getLoadFileWork() + "; " + work.getLoadTableWork() + "; " + + work.getLoadMultiFilesWork()); try { if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { @@ -269,24 +285,52 @@ public int execute(DriverContext driverContext) { if (lfd != null) { Path targetPath = lfd.getTargetDir(); Path sourcePath = lfd.getSourcePath(); - moveFile(sourcePath, targetPath, lfd.getIsDfsDir()); + if (targetPath.equals(sourcePath)) { + Utilities.LOG14535.info("MoveTask not moving LFD " + sourcePath); + } else { + Utilities.LOG14535.info("MoveTask moving LFD " + sourcePath + " to " + targetPath); + moveFile(sourcePath, targetPath, lfd.getIsDfsDir()); + } } // Multi-file load is for dynamic partitions when some partitions do not // need to merge and they can simply be moved to the target directory. + // This is also used for MM table conversion. LoadMultiFilesDesc lmfd = work.getLoadMultiFilesWork(); if (lmfd != null) { boolean isDfsDir = lmfd.getIsDfsDir(); - int i = 0; - while (i targetPrefixes = lmfd.getTargetPrefixes(); + for (int i = 0; i files; - FileSystem srcFs; // source filesystem - try { - srcFs = tbd.getSourcePath().getFileSystem(conf); - dirs = srcFs.globStatus(tbd.getSourcePath()); - files = new ArrayList(); - for (int i = 0; (dirs != null && i < dirs.length); i++) { - files.addAll(Arrays.asList(srcFs.listStatus(dirs[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER))); - // We only check one file, so exit the loop when we have at least - // one. - if (files.size() > 0) { - break; - } - } - } catch (IOException e) { - throw new HiveException( - "addFiles: filesystem error in check phase", e); - } + checkFileFormats(db, tbd, table); - // handle file format check for table level - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVECHECKFILEFORMAT)) { - boolean flag = true; - // work.checkFileFormat is set to true only for Load Task, so assumption here is - // dynamic partition context is null - if (tbd.getDPCtx() == null) { - if (tbd.getPartitionSpec() == null || tbd.getPartitionSpec().isEmpty()) { - // Check if the file format of the file matches that of the table. - flag = HiveFileFormatUtils.checkInputFormat( - srcFs, conf, tbd.getTable().getInputFileFormatClass(), files); - } else { - // Check if the file format of the file matches that of the partition - Partition oldPart = db.getPartition(table, tbd.getPartitionSpec(), false); - if (oldPart == null) { - // this means we have just created a table and are specifying partition in the - // load statement (without pre-creating the partition), in which case lets use - // table input format class. inheritTableSpecs defaults to true so when a new - // partition is created later it will automatically inherit input format - // from table object - flag = HiveFileFormatUtils.checkInputFormat( - srcFs, conf, tbd.getTable().getInputFileFormatClass(), files); - } else { - flag = HiveFileFormatUtils.checkInputFormat( - srcFs, conf, oldPart.getInputFormatClass(), files); - } - } - if (!flag) { - throw new HiveException(ErrorMsg.WRONG_FILE_FORMAT); - } - } else { - LOG.warn("Skipping file format check as dpCtx is not null"); - } - } - } + boolean isFullAcidOp = work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID + && !tbd.isMmTable(); // Create a data container DataContainer dc = null; if (tbd.getPartitionSpec().size() == 0) { dc = new DataContainer(table.getTTable()); + Utilities.LOG14535.info("loadTable called from " + tbd.getSourcePath() + " into " + tbd.getTable().getTableName()); + if (tbd.isMmTable() && !tbd.isCommitMmWrite()) { + throw new HiveException( + "Only single-partition LoadTableDesc can skip commiting write ID"); + } db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getReplace(), - work.isSrcLocal(), isSkewedStoredAsDirs(tbd), - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, - hasFollowingStatsTask()); + work.isSrcLocal(), isSkewedStoredAsDirs(tbd), isFullAcidOp, hasFollowingStatsTask(), + tbd.getTxnId(), tbd.getStmtId(), tbd.isMmTable()); if (work.getOutputs() != null) { DDLTask.addIfAbsentByName(new WriteEntity(table, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs()); @@ -381,155 +378,15 @@ public int execute(DriverContext driverContext) { LOG.info("Partition is: " + tbd.getPartitionSpec().toString()); // Check if the bucketing and/or sorting columns were inferred - List bucketCols = null; - List sortCols = null; - int numBuckets = -1; - Task task = this; - String path = tbd.getSourcePath().toUri().toString(); - // Find the first ancestor of this MoveTask which is some form of map reduce task - // (Either standard, local, or a merge) - while (task.getParentTasks() != null && task.getParentTasks().size() == 1) { - task = (Task)task.getParentTasks().get(0); - // If it was a merge task or a local map reduce task, nothing can be inferred - if (task instanceof MergeFileTask || task instanceof MapredLocalTask) { - break; - } - - // If it's a standard map reduce task, check what, if anything, it inferred about - // the directory this move task is moving - if (task instanceof MapRedTask) { - MapredWork work = (MapredWork)task.getWork(); - MapWork mapWork = work.getMapWork(); - bucketCols = mapWork.getBucketedColsByDirectory().get(path); - sortCols = mapWork.getSortedColsByDirectory().get(path); - if (work.getReduceWork() != null) { - numBuckets = work.getReduceWork().getNumReduceTasks(); - } - - if (bucketCols != null || sortCols != null) { - // This must be a final map reduce task (the task containing the file sink - // operator that writes the final output) - assert work.isFinalMapRed(); - } - break; - } - - // If it's a move task, get the path the files were moved from, this is what any - // preceding map reduce task inferred information about, and moving does not invalidate - // those assumptions - // This can happen when a conditional merge is added before the final MoveTask, but the - // condition for merging is not met, see GenMRFileSink1. - if (task instanceof MoveTask) { - if (((MoveTask)task).getWork().getLoadFileWork() != null) { - path = ((MoveTask)task).getWork().getLoadFileWork().getSourcePath().toUri().toString(); - } - } - } + TaskInformation ti = new TaskInformation(this, tbd.getSourcePath().toUri().toString()); + inferTaskInformation(ti); // deal with dynamic partitions DynamicPartitionCtx dpCtx = tbd.getDPCtx(); if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions - - List> dps = Utilities.getFullDPSpecs(conf, dpCtx); - - console.printInfo(System.getProperty("line.separator")); - long startTime = System.currentTimeMillis(); - // load the list of DP partitions and return the list of partition specs - // TODO: In a follow-up to HIVE-1361, we should refactor loadDynamicPartitions - // to use Utilities.getFullDPSpecs() to get the list of full partSpecs. - // After that check the number of DPs created to not exceed the limit and - // iterate over it and call loadPartition() here. - // The reason we don't do inside HIVE-1361 is the latter is large and we - // want to isolate any potential issue it may introduce. - Map, Partition> dp = - db.loadDynamicPartitions( - tbd.getSourcePath(), - tbd.getTable().getTableName(), - tbd.getPartitionSpec(), - tbd.getReplace(), - dpCtx.getNumDPCols(), - isSkewedStoredAsDirs(tbd), - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, - SessionState.get().getTxnMgr().getCurrentTxnId(), hasFollowingStatsTask(), - work.getLoadTableWork().getWriteType()); - - // publish DP columns to its subscribers - if (dps != null && dps.size() > 0) { - pushFeed(FeedType.DYNAMIC_PARTITIONS, dp.values()); - } - - String loadTime = "\t Time taken to load dynamic partitions: " + - (System.currentTimeMillis() - startTime)/1000.0 + " seconds"; - console.printInfo(loadTime); - LOG.info(loadTime); - - if (dp.size() == 0 && conf.getBoolVar(HiveConf.ConfVars.HIVE_ERROR_ON_EMPTY_PARTITION)) { - throw new HiveException("This query creates no partitions." + - " To turn off this error, set hive.error.on.empty.partition=false."); - } - - startTime = System.currentTimeMillis(); - // for each partition spec, get the partition - // and put it to WriteEntity for post-exec hook - for(Map.Entry, Partition> entry : dp.entrySet()) { - Partition partn = entry.getValue(); - - if (bucketCols != null || sortCols != null) { - updatePartitionBucketSortColumns( - db, table, partn, bucketCols, numBuckets, sortCols); - } - - WriteEntity enty = new WriteEntity(partn, - getWriteType(tbd, work.getLoadTableWork().getWriteType())); - if (work.getOutputs() != null) { - DDLTask.addIfAbsentByName(enty, work.getOutputs()); - } - // Need to update the queryPlan's output as well so that post-exec hook get executed. - // This is only needed for dynamic partitioning since for SP the the WriteEntity is - // constructed at compile time and the queryPlan already contains that. - // For DP, WriteEntity creation is deferred at this stage so we need to update - // queryPlan here. - if (queryPlan.getOutputs() == null) { - queryPlan.setOutputs(new LinkedHashSet()); - } - queryPlan.getOutputs().add(enty); - - // update columnar lineage for each partition - dc = new DataContainer(table.getTTable(), partn.getTPartition()); - - // Don't set lineage on delete as we don't have all the columns - if (SessionState.get() != null && - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.DELETE && - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.UPDATE) { - SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, - table.getCols()); - } - LOG.info("\tLoading partition " + entry.getKey()); - } - console.printInfo("\t Time taken for adding to write entity : " + - (System.currentTimeMillis() - startTime)/1000.0 + " seconds"); - dc = null; // reset data container to prevent it being added again. + dc = handleDynParts(db, table, tbd, ti, dpCtx); } else { // static partitions - List partVals = MetaStoreUtils.getPvals(table.getPartCols(), - tbd.getPartitionSpec()); - db.validatePartitionNameCharacters(partVals); - db.loadPartition(tbd.getSourcePath(), tbd.getTable().getTableName(), - tbd.getPartitionSpec(), tbd.getReplace(), - tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), - work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, hasFollowingStatsTask()); - Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); - - if (bucketCols != null || sortCols != null) { - updatePartitionBucketSortColumns(db, table, partn, bucketCols, - numBuckets, sortCols); - } - - dc = new DataContainer(table.getTTable(), partn.getTPartition()); - // add this partition to post-execution hook - if (work.getOutputs() != null) { - DDLTask.addIfAbsentByName(new WriteEntity(partn, - getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs()); - } - } + dc = handleStaticParts(db, table, tbd, ti); + } } if (SessionState.get() != null && dc != null) { // If we are doing an update or a delete the number of columns in the table will not @@ -579,6 +436,231 @@ public int execute(DriverContext driverContext) { return (1); } } + + private DataContainer handleStaticParts(Hive db, Table table, LoadTableDesc tbd, + TaskInformation ti) throws HiveException, IOException, InvalidOperationException { + List partVals = MetaStoreUtils.getPvals(table.getPartCols(), tbd.getPartitionSpec()); + db.validatePartitionNameCharacters(partVals); + Utilities.LOG14535.info("loadPartition called from " + tbd.getSourcePath() + + " into " + tbd.getTable().getTableName()); + db.loadPartition(tbd.getSourcePath(), tbd.getTable().getTableName(), + tbd.getPartitionSpec(), tbd.getReplace(), + tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), + work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && + !tbd.isMmTable(), + hasFollowingStatsTask(), tbd.getTxnId(), tbd.getStmtId()); + Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); + + // See the comment inside updatePartitionBucketSortColumns. + if (!tbd.isMmTable() && (ti.bucketCols != null || ti.sortCols != null)) { + updatePartitionBucketSortColumns(db, table, partn, ti.bucketCols, + ti.numBuckets, ti.sortCols); + } + + DataContainer dc = new DataContainer(table.getTTable(), partn.getTPartition()); + // add this partition to post-execution hook + if (work.getOutputs() != null) { + DDLTask.addIfAbsentByName(new WriteEntity(partn, + getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs()); + } + return dc; + } + + private DataContainer handleDynParts(Hive db, Table table, LoadTableDesc tbd, + TaskInformation ti, DynamicPartitionCtx dpCtx) throws HiveException, + IOException, InvalidOperationException { + DataContainer dc; + List> dps = Utilities.getFullDPSpecs(conf, dpCtx); + + console.printInfo(System.getProperty("line.separator")); + long startTime = System.currentTimeMillis(); + // load the list of DP partitions and return the list of partition specs + // TODO: In a follow-up to HIVE-1361, we should refactor loadDynamicPartitions + // to use Utilities.getFullDPSpecs() to get the list of full partSpecs. + // After that check the number of DPs created to not exceed the limit and + // iterate over it and call loadPartition() here. + // The reason we don't do inside HIVE-1361 is the latter is large and we + // want to isolate any potential issue it may introduce. + if (tbd.isMmTable() && !tbd.isCommitMmWrite()) { + throw new HiveException("Only single-partition LoadTableDesc can skip commiting write ID"); + } + Map, Partition> dp = + db.loadDynamicPartitions( + tbd.getSourcePath(), + tbd.getTable().getTableName(), + tbd.getPartitionSpec(), + tbd.getReplace(), + dpCtx.getNumDPCols(), + (tbd.getLbCtx() == null) ? 0 : tbd.getLbCtx().calculateListBucketingLevel(), + work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && + !tbd.isMmTable(), + SessionState.get().getTxnMgr().getCurrentTxnId(), tbd.getStmtId(), hasFollowingStatsTask(), + work.getLoadTableWork().getWriteType()); + + // publish DP columns to its subscribers + if (dps != null && dps.size() > 0) { + pushFeed(FeedType.DYNAMIC_PARTITIONS, dp.values()); + } + + String loadTime = "\t Time taken to load dynamic partitions: " + + (System.currentTimeMillis() - startTime)/1000.0 + " seconds"; + console.printInfo(loadTime); + LOG.info(loadTime); + + if (dp.size() == 0 && conf.getBoolVar(HiveConf.ConfVars.HIVE_ERROR_ON_EMPTY_PARTITION)) { + throw new HiveException("This query creates no partitions." + + " To turn off this error, set hive.error.on.empty.partition=false."); + } + + startTime = System.currentTimeMillis(); + // for each partition spec, get the partition + // and put it to WriteEntity for post-exec hook + for(Map.Entry, Partition> entry : dp.entrySet()) { + Partition partn = entry.getValue(); + + // See the comment inside updatePartitionBucketSortColumns. + if (!tbd.isMmTable() && (ti.bucketCols != null || ti.sortCols != null)) { + updatePartitionBucketSortColumns( + db, table, partn, ti.bucketCols, ti.numBuckets, ti.sortCols); + } + + WriteEntity enty = new WriteEntity(partn, + getWriteType(tbd, work.getLoadTableWork().getWriteType())); + if (work.getOutputs() != null) { + DDLTask.addIfAbsentByName(enty, work.getOutputs()); + } + // Need to update the queryPlan's output as well so that post-exec hook get executed. + // This is only needed for dynamic partitioning since for SP the the WriteEntity is + // constructed at compile time and the queryPlan already contains that. + // For DP, WriteEntity creation is deferred at this stage so we need to update + // queryPlan here. + if (queryPlan.getOutputs() == null) { + queryPlan.setOutputs(new LinkedHashSet()); + } + queryPlan.getOutputs().add(enty); + + // update columnar lineage for each partition + dc = new DataContainer(table.getTTable(), partn.getTPartition()); + + // Don't set lineage on delete as we don't have all the columns + if (SessionState.get() != null && + work.getLoadTableWork().getWriteType() != AcidUtils.Operation.DELETE && + work.getLoadTableWork().getWriteType() != AcidUtils.Operation.UPDATE) { + SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, + table.getCols()); + } + LOG.info("\tLoading partition " + entry.getKey()); + } + console.printInfo("\t Time taken for adding to write entity : " + + (System.currentTimeMillis() - startTime)/1000.0 + " seconds"); + dc = null; // reset data container to prevent it being added again. + return dc; + } + + private void inferTaskInformation(TaskInformation ti) { + // Find the first ancestor of this MoveTask which is some form of map reduce task + // (Either standard, local, or a merge) + while (ti.task.getParentTasks() != null && ti.task.getParentTasks().size() == 1) { + ti.task = (Task)ti.task.getParentTasks().get(0); + // If it was a merge task or a local map reduce task, nothing can be inferred + if (ti.task instanceof MergeFileTask || ti.task instanceof MapredLocalTask) { + break; + } + + // If it's a standard map reduce task, check what, if anything, it inferred about + // the directory this move task is moving + if (ti.task instanceof MapRedTask) { + MapredWork work = (MapredWork)ti.task.getWork(); + MapWork mapWork = work.getMapWork(); + ti.bucketCols = mapWork.getBucketedColsByDirectory().get(ti.path); + ti.sortCols = mapWork.getSortedColsByDirectory().get(ti.path); + if (work.getReduceWork() != null) { + ti.numBuckets = work.getReduceWork().getNumReduceTasks(); + } + + if (ti.bucketCols != null || ti.sortCols != null) { + // This must be a final map reduce task (the task containing the file sink + // operator that writes the final output) + assert work.isFinalMapRed(); + } + break; + } + + // If it's a move task, get the path the files were moved from, this is what any + // preceding map reduce task inferred information about, and moving does not invalidate + // those assumptions + // This can happen when a conditional merge is added before the final MoveTask, but the + // condition for merging is not met, see GenMRFileSink1. + if (ti.task instanceof MoveTask) { + MoveTask mt = (MoveTask)ti.task; + if (mt.getWork().getLoadFileWork() != null) { + ti.path = mt.getWork().getLoadFileWork().getSourcePath().toUri().toString(); + } + } + } + } + + private void checkFileFormats(Hive db, LoadTableDesc tbd, Table table) + throws HiveException { + if (work.getCheckFileFormat()) { + // Get all files from the src directory + FileStatus[] dirs; + ArrayList files; + FileSystem srcFs; // source filesystem + try { + srcFs = tbd.getSourcePath().getFileSystem(conf); + dirs = srcFs.globStatus(tbd.getSourcePath()); + files = new ArrayList(); + for (int i = 0; (dirs != null && i < dirs.length); i++) { + files.addAll(Arrays.asList(srcFs.listStatus(dirs[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER))); + // We only check one file, so exit the loop when we have at least + // one. + if (files.size() > 0) { + break; + } + } + } catch (IOException e) { + throw new HiveException( + "addFiles: filesystem error in check phase", e); + } + + // handle file format check for table level + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVECHECKFILEFORMAT)) { + boolean flag = true; + // work.checkFileFormat is set to true only for Load Task, so assumption here is + // dynamic partition context is null + if (tbd.getDPCtx() == null) { + if (tbd.getPartitionSpec() == null || tbd.getPartitionSpec().isEmpty()) { + // Check if the file format of the file matches that of the table. + flag = HiveFileFormatUtils.checkInputFormat( + srcFs, conf, tbd.getTable().getInputFileFormatClass(), files); + } else { + // Check if the file format of the file matches that of the partition + Partition oldPart = db.getPartition(table, tbd.getPartitionSpec(), false); + if (oldPart == null) { + // this means we have just created a table and are specifying partition in the + // load statement (without pre-creating the partition), in which case lets use + // table input format class. inheritTableSpecs defaults to true so when a new + // partition is created later it will automatically inherit input format + // from table object + flag = HiveFileFormatUtils.checkInputFormat( + srcFs, conf, tbd.getTable().getInputFileFormatClass(), files); + } else { + flag = HiveFileFormatUtils.checkInputFormat( + srcFs, conf, oldPart.getInputFormatClass(), files); + } + } + if (!flag) { + throw new HiveException(ErrorMsg.WRONG_FILE_FORMAT); + } + } else { + LOG.warn("Skipping file format check as dpCtx is not null"); + } + } + } + } + + /** * so to make sure we crate WriteEntity with the right WriteType. This is (at this point) only * for consistency since LockManager (which is the only thing that pays attention to WriteType) @@ -597,6 +679,7 @@ public int execute(DriverContext driverContext) { return WriteEntity.WriteType.INSERT; } } + private boolean isSkewedStoredAsDirs(LoadTableDesc tbd) { return (tbd.getLbCtx() == null) ? false : tbd.getLbCtx() .isSkewedStoredAsDir(); @@ -621,6 +704,11 @@ private void updatePartitionBucketSortColumns(Hive db, Table table, Partition pa boolean updateBucketCols = false; if (bucketCols != null) { + // TODO: this particular bit will not work for MM tables, as there can be multiple + // directories for different MM IDs. We could put the path here that would account + // for the current MM ID being written, but it will not guarantee that other MM IDs + // have the correct buckets. The existing code discards the inferred data when the + // reducers don't produce enough files; we'll do the same for MM tables for now. FileSystem fileSys = partn.getDataLocation().getFileSystem(conf); FileStatus[] fileStatus = HiveStatsUtils.getFileStatusRecurse( partn.getDataLocation(), 1, fileSys); @@ -706,4 +794,4 @@ public StageType getType() { public String getName() { return "MOVE"; } -} +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java index 46905ca..a1ad243 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java @@ -77,6 +77,7 @@ public void process(Object row, int tag) throws HiveException { private void processKeyValuePairs(Object key, Object value) throws HiveException { String filePath = ""; + boolean exception = false; try { OrcFileValueWrapper v; OrcFileKeyWrapper k; @@ -89,12 +90,15 @@ private void processKeyValuePairs(Object key, Object value) // skip incompatible file, files that are missing stripe statistics are set to incompatible if (k.isIncompatFile()) { LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath()); - incompatFileSet.add(k.getInputPath()); + addIncompatibleFile(k.getInputPath()); return; } filePath = k.getInputPath().toUri().getPath(); + Utilities.LOG14535.info("OrcFileMergeOperator processing " + filePath); + + fixTmpPath(k.getInputPath().getParent()); v = (OrcFileValueWrapper) value; @@ -129,6 +133,7 @@ private void processKeyValuePairs(Object key, Object value) options.bufferSize(compressBuffSize).enforceBufferSize(); } + Path outPath = getOutPath(); outWriter = OrcFile.createWriter(outPath, options); if (LOG.isDebugEnabled()) { LOG.info("ORC merge file output path: " + outPath); @@ -136,7 +141,7 @@ private void processKeyValuePairs(Object key, Object value) } if (!checkCompatibility(k)) { - incompatFileSet.add(k.getInputPath()); + addIncompatibleFile(k.getInputPath()); return; } @@ -167,7 +172,7 @@ private void processKeyValuePairs(Object key, Object value) outWriter.appendUserMetadata(v.getUserMetadata()); } } catch (Throwable e) { - this.exception = true; + exception = true; LOG.error("Closing operator..Exception: " + ExceptionUtils.getStackTrace(e)); throw new HiveException(e); } finally { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java index 4dea1d2..349b459 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java @@ -77,7 +77,7 @@ private void processKeyValuePairs(Object k, Object v) codec = key.getCodec(); columnNumber = key.getKeyBuffer().getColumnNumber(); RCFileOutputFormat.setColumnNumber(jc, columnNumber); - outWriter = new RCFile.Writer(fs, jc, outPath, null, codec); + outWriter = new RCFile.Writer(fs, jc, getOutPath(), null, codec); } boolean sameCodec = ((codec == key.getCodec()) || codec.getClass().equals( @@ -94,7 +94,6 @@ private void processKeyValuePairs(Object k, Object v) key.getRecordLength(), key.getKeyLength(), key.getCompressedKeyLength()); } catch (Throwable e) { - this.exception = true; closeOp(true); throw new HiveException(e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java index 54746d3..86e21cb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java @@ -61,8 +61,9 @@ protected int execute(DriverContext driverContext) { FileSystem dstFs = null; Path toPath = null; try { - Path fromPath = work.getFromPath(); - toPath = work.getToPath(); + // TODO# merge with real CopyTask logic + Path fromPath = work.getFromPaths()[0]; + toPath = work.getToPaths()[0]; console.printInfo("Copying data from " + fromPath.toString(), " to " + toPath.toString()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java index 9c3a664..65363ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java @@ -239,6 +239,7 @@ private int aggregateStats(ExecutorService threadPool, Hive db) { Map parameters = tTable.getParameters(); try { Path dir = new Path(tTable.getSd().getLocation()); + Utilities.LOG14535.info("Aggregating stats for " + dir); long numRows = 0; long rawDataSize = 0; long fileSize = 0; @@ -248,6 +249,7 @@ private int aggregateStats(ExecutorService threadPool, Hive db) { boolean statsAvailable = false; for(FileStatus file: fileList) { + Utilities.LOG14535.info("Computing stats for " + file); if (!file.isDir()) { InputFormat inputFormat = ReflectionUtil.newInstance( table.getInputFormatClass(), jc); @@ -284,11 +286,11 @@ private int aggregateStats(ExecutorService threadPool, Hive db) { db.alterTable(tableFullName, new Table(tTable), environmentContext); String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']'; - LOG.debug(msg); + Utilities.LOG14535.debug(msg); console.printInfo(msg); } else { String msg = "Table " + tableFullName + " does not provide stats."; - LOG.debug(msg); + Utilities.LOG14535.debug(msg); } } catch (Exception e) { console.printInfo("[Warning] could not update stats for " + tableFullName + ".", diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index c22d69b..a85a96d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -180,7 +180,7 @@ private int aggregateStats(Hive db) { // work.getLoadTableDesc().getReplace() is true means insert overwrite command // work.getLoadFileDesc().getDestinationCreateTable().isEmpty() means CTAS etc. // acidTable will not have accurate stats unless it is set through analyze command. - if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) { + if (work.getTableSpecs() == null && AcidUtils.isFullAcidTable(table)) { StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); } else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) @@ -218,7 +218,7 @@ private int aggregateStats(Hive db) { if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) { console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']'); } - LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']'); + Utilities.LOG14535.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']'); } else { // Partitioned table: // Need to get the old stats of the partition @@ -281,7 +281,7 @@ public Void call() throws Exception { // org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition(); Map parameters = tPart.getParameters(); - if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) { + if (work.getTableSpecs() == null && AcidUtils.isFullAcidTable(table)) { StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); } else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index ffedefe..8fe037e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -81,11 +81,11 @@ private String schemaEvolutionColumns; private String schemaEvolutionColumnsTypes; - public TableDesc getTableDesc() { + public TableDesc getTableDescSkewJoin() { return tableDesc; } - public void setTableDesc(TableDesc tableDesc) { + public void setTableDescSkewJoin(TableDesc tableDesc) { this.tableDesc = tableDesc; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java index fe9b624..a335499 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java @@ -108,6 +108,8 @@ public TaskTuple(Class workClass, Class> taskClass) { MergeFileTask.class)); taskvec.add(new TaskTuple(DependencyCollectionWork.class, DependencyCollectionTask.class)); + taskvec.add(new TaskTuple(ImportCommitWork.class, + ImportCommitTask.class)); taskvec.add(new TaskTuple(PartialScanWork.class, PartialScanTask.class)); taskvec.add(new TaskTuple(IndexMetadataChangeWork.class, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index aca99f2..88f5a0d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -23,6 +23,55 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import java.beans.DefaultPersistenceDelegate; +import java.beans.Encoder; +import java.beans.Expression; +import java.beans.Statement; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInput; +import java.io.EOFException; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.net.URI; +import java.net.URL; +import java.net.URLClassLoader; +import java.net.URLDecoder; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.SQLFeatureNotSupportedException; +import java.sql.SQLTransientException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collection; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; +import java.util.zip.InflaterInputStream; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.commons.codec.binary.Base64; @@ -33,10 +82,14 @@ import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveInterruptCallback; @@ -45,6 +98,7 @@ import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.StringInternUtils; +import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; @@ -76,6 +130,7 @@ import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; import org.apache.hadoop.hive.ql.io.RCFile; import org.apache.hadoop.hive.ql.io.ReworkMapredInputFormat; @@ -114,6 +169,7 @@ import org.apache.hadoop.hive.ql.stats.StatsPublisher; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.Serializer; @@ -145,7 +201,7 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Progressable; import org.apache.hive.common.util.ACLConfigurationParser; @@ -153,66 +209,21 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.beans.DefaultPersistenceDelegate; -import java.beans.Encoder; -import java.beans.Expression; -import java.beans.Statement; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInput; -import java.io.EOFException; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.Serializable; -import java.net.URI; -import java.net.URL; -import java.net.URLClassLoader; -import java.net.URLDecoder; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.SQLFeatureNotSupportedException; -import java.sql.SQLTransientException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Collection; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Random; -import java.util.Set; -import java.util.UUID; import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.zip.Deflater; -import java.util.zip.DeflaterOutputStream; -import java.util.zip.InflaterInputStream; + /** * Utilities. * */ -@SuppressWarnings("nls") +@SuppressWarnings({ "nls", "deprecation" }) public final class Utilities { + // TODO# remove when merging; convert some statements to local loggers, remove others + public static final Logger LOG14535 = LoggerFactory.getLogger("Log14535"); + /** * The object in the reducer are composed of these top level fields. */ @@ -666,14 +677,15 @@ protected Expression instantiate(Object oldInstance, Encoder out) { } @Override - protected void initialize(Class type, Object oldInstance, Object newInstance, Encoder out) { - Iterator ite = ((Collection) oldInstance).iterator(); + protected void initialize(Class type, Object oldInstance, Object newInstance, Encoder out) { + Iterator ite = ((Collection) oldInstance).iterator(); while (ite.hasNext()) { out.writeStatement(new Statement(oldInstance, "add", new Object[] {ite.next()})); } } } + @VisibleForTesting public static TableDesc defaultTd; static { // by default we expect ^A separated strings @@ -681,7 +693,16 @@ protected void initialize(Class type, Object oldInstance, Object newInstance, En // PlanUtils.getDefaultTableDesc(String separatorCode, String columns) // or getBinarySortableTableDesc(List fieldSchemas) when // we know the column names. - defaultTd = PlanUtils.getDefaultTableDesc("" + Utilities.ctrlaCode); + /** + * Generate the table descriptor of MetadataTypedColumnsetSerDe with the + * separatorCode. MetaDataTypedColumnsetSerDe is used because LazySimpleSerDe + * does not support a table with a single column "col" with type + * "array". + */ + defaultTd = new TableDesc(TextInputFormat.class, IgnoreKeyTextOutputFormat.class, + Utilities.makeProperties(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, + "" + Utilities.ctrlaCode, serdeConstants.SERIALIZATION_LIB, + MetadataTypedColumnsetSerDe.class.getName())); } public static final int carriageReturnCode = 13; @@ -1103,6 +1124,9 @@ public static Path toTempPath(Path orig) { if (orig.getName().indexOf(tmpPrefix) == 0) { return orig; } + if (orig.getName().contains("=1")) { + LOG.error("TODO# creating tmp path from " + orig, new Exception()); + } return new Path(orig.getParent(), tmpPrefix + orig.getName()); } @@ -1430,7 +1454,7 @@ public static void mvFileToFinalPath(Path specPath, Configuration hconf, boolean success, Logger log, DynamicPartitionCtx dpCtx, FileSinkDesc conf, Reporter reporter) throws IOException, HiveException { - + FileSystem fs = specPath.getFileSystem(hconf); Path tmpPath = Utilities.toTempPath(specPath); Path taskTmpPath = Utilities.toTaskTempPath(specPath); @@ -1447,13 +1471,13 @@ public static void mvFileToFinalPath(Path specPath, Configuration hconf, // create empty buckets if necessary if (emptyBuckets.size() > 0) { perfLogger.PerfLogBegin("FileSinkOperator", "CreateEmptyBuckets"); - createEmptyBuckets(hconf, emptyBuckets, conf, reporter); + createEmptyBuckets( + hconf, emptyBuckets, conf.getCompressed(), conf.getTableInfo(), reporter); filesKept.addAll(emptyBuckets); perfLogger.PerfLogEnd("FileSinkOperator", "CreateEmptyBuckets"); } - // move to the file destination - log.info("Moving tmp dir: " + tmpPath + " to: " + specPath); + Utilities.LOG14535.info("Moving tmp dir: " + tmpPath + " to: " + specPath); perfLogger.PerfLogBegin("FileSinkOperator", "RenameOrMoveFiles"); if (HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVE_EXEC_MOVE_FILES_FROM_SOURCE_DIR)) { // HIVE-17113 - avoid copying files that may have been written to the temp dir by runaway tasks, @@ -1465,11 +1489,14 @@ public static void mvFileToFinalPath(Path specPath, Configuration hconf, perfLogger.PerfLogEnd("FileSinkOperator", "RenameOrMoveFiles"); } } else { + Utilities.LOG14535.info("deleting tmpPath " + tmpPath); fs.delete(tmpPath, true); } + Utilities.LOG14535.info("deleting taskTmpPath " + taskTmpPath); fs.delete(taskTmpPath, true); } + /** * Check the existence of buckets according to bucket specification. Create empty buckets if * needed. @@ -1481,8 +1508,8 @@ public static void mvFileToFinalPath(Path specPath, Configuration hconf, * @throws HiveException * @throws IOException */ - private static void createEmptyBuckets(Configuration hconf, List paths, - FileSinkDesc conf, Reporter reporter) + static void createEmptyBuckets(Configuration hconf, List paths, + boolean isCompressed, TableDesc tableInfo, Reporter reporter) throws HiveException, IOException { JobConf jc; @@ -1494,13 +1521,11 @@ private static void createEmptyBuckets(Configuration hconf, List paths, } HiveOutputFormat hiveOutputFormat = null; Class outputClass = null; - boolean isCompressed = conf.getCompressed(); - TableDesc tableInfo = conf.getTableInfo(); try { Serializer serializer = (Serializer) tableInfo.getDeserializerClass().newInstance(); serializer.initialize(null, tableInfo.getProperties()); outputClass = serializer.getSerializedClass(); - hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo()); + hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, tableInfo); } catch (SerDeException e) { throw new HiveException(e); } catch (InstantiationException e) { @@ -1510,6 +1535,7 @@ private static void createEmptyBuckets(Configuration hconf, List paths, } for (Path path : paths) { + Utilities.LOG14535.info("creating empty bucket for " + path); RecordWriter writer = HiveFileFormatUtils.getRecordWriter( jc, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), path, reporter); @@ -1553,148 +1579,198 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I */ public static List removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats, DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf, Set filesKept) throws IOException { + int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(), + numBuckets = (conf != null && conf.getTable() != null) + ? conf.getTable().getNumBuckets() : 0; + return removeTempOrDuplicateFiles(fs, fileStats, dpLevels, numBuckets, hconf, null, 0, false); + } + + private static boolean removeEmptyDpDirectory(FileSystem fs, Path path) throws IOException { + FileStatus[] items = fs.listStatus(path); + // remove empty directory since DP insert should not generate empty partitions. + // empty directories could be generated by crashed Task/ScriptOperator + if (items.length != 0) return false; + if (!fs.delete(path, true)) { + LOG.error("Cannot delete empty directory " + path); + throw new IOException("Cannot delete empty directory " + path); + } + return true; + } + + public static List removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats, + int dpLevels, int numBuckets, Configuration hconf, Long txnId, int stmtId, boolean isMmTable) throws IOException { if (fileStats == null) { return null; } - List result = new ArrayList(); HashMap taskIDToFile = null; - if (dpCtx != null) { + if (dpLevels > 0) { FileStatus parts[] = fileStats; - for (int i = 0; i < parts.length; ++i) { - assert parts[i].isDir() : "dynamic partition " + parts[i].getPath() + assert parts[i].isDirectory() : "dynamic partition " + parts[i].getPath() + " is not a directory"; - FileStatus[] items = fs.listStatus(parts[i].getPath()); - - // remove empty directory since DP insert should not generate empty partitions. - // empty directories could be generated by crashed Task/ScriptOperator - if (items.length == 0) { - if (!fs.delete(parts[i].getPath(), true)) { - LOG.error("Cannot delete empty directory " + parts[i].getPath()); - throw new IOException("Cannot delete empty directory " + parts[i].getPath()); - } + Path path = parts[i].getPath(); + Utilities.LOG14535.info("removeTempOrDuplicateFiles looking at DP " + path); + if (removeEmptyDpDirectory(fs, path)) { + parts[i] = null; + continue; } + FileStatus[] items = fs.listStatus(path); - taskIDToFile = removeTempOrDuplicateFiles(items, fs); - if (filesKept != null && taskIDToFile != null) { - addFilesToPathSet(taskIDToFile.values(), filesKept); - } - // if the table is bucketed and enforce bucketing, we should check and generate all buckets - if (dpCtx.getNumBuckets() > 0 && taskIDToFile != null && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { - // refresh the file list - items = fs.listStatus(parts[i].getPath()); - // get the missing buckets and generate empty buckets - String taskID1 = taskIDToFile.keySet().iterator().next(); - Path bucketPath = taskIDToFile.values().iterator().next().getPath(); - for (int j = 0; j < dpCtx.getNumBuckets(); ++j) { - String taskID2 = replaceTaskId(taskID1, j); - if (!taskIDToFile.containsKey(taskID2)) { - // create empty bucket, file name should be derived from taskID2 - URI bucketUri = bucketPath.toUri(); - String path2 = replaceTaskIdFromFilename(bucketUri.getPath().toString(), j); - result.add(new Path(bucketUri.getScheme(), bucketUri.getAuthority(), path2)); - } + if (isMmTable) { + Path mmDir = parts[i].getPath(); + if (!mmDir.getName().equals(AcidUtils.deltaSubdir(txnId, txnId, stmtId))) { + throw new IOException("Unexpected non-MM directory name " + mmDir); } + Utilities.LOG14535.info("removeTempOrDuplicateFiles processing files in MM directory " + mmDir); } + taskIDToFile = removeTempOrDuplicateFilesNonMm(items, fs); + + // TODO: not clear why two if conditions are different. Preserve the existing logic for now. + addBucketFileToResults(taskIDToFile, numBuckets, hconf, result); } } else { FileStatus[] items = fileStats; if (items.length == 0) { return result; } - taskIDToFile = removeTempOrDuplicateFiles(items, fs); - if (filesKept != null && taskIDToFile != null) { - addFilesToPathSet(taskIDToFile.values(), filesKept); - } - if(taskIDToFile != null && taskIDToFile.size() > 0 && conf != null && conf.getTable() != null - && (conf.getTable().getNumBuckets() > taskIDToFile.size()) && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { - // get the missing buckets and generate empty buckets for non-dynamic partition - String taskID1 = taskIDToFile.keySet().iterator().next(); - Path bucketPath = taskIDToFile.values().iterator().next().getPath(); - for (int j = 0; j < conf.getTable().getNumBuckets(); ++j) { - String taskID2 = replaceTaskId(taskID1, j); - if (!taskIDToFile.containsKey(taskID2)) { - // create empty bucket, file name should be derived from taskID2 - URI bucketUri = bucketPath.toUri(); - String path2 = replaceTaskIdFromFilename(bucketUri.getPath().toString(), j); - result.add(new Path(bucketUri.getScheme(), bucketUri.getAuthority(), path2)); - } + if (!isMmTable) { + taskIDToFile = removeTempOrDuplicateFilesNonMm(items, fs); + } else { + if (items.length > 1) { + throw new IOException("Unexpected directories for non-DP MM: " + Arrays.toString(items)); } + Path mmDir = items[0].getPath(); + if (!mmDir.getName().equals(AcidUtils.deltaSubdir(txnId, txnId, stmtId))) { + throw new IOException("Unexpected non-MM directory " + mmDir); + } + Utilities.LOG14535.info( + "removeTempOrDuplicateFiles processing files in MM directory " + mmDir); + taskIDToFile = removeTempOrDuplicateFilesNonMm(fs.listStatus(mmDir), fs); } + // TODO: not clear why two if conditions are different. Preserve the existing logic for now. + addBucketFileToResults2(taskIDToFile, numBuckets, hconf, result); } return result; } - public static HashMap removeTempOrDuplicateFiles(FileStatus[] items, - FileSystem fs) throws IOException { + // TODO: not clear why two if conditions are different. Preserve the existing logic for now. + private static void addBucketFileToResults2(HashMap taskIDToFile, + int numBuckets, Configuration hconf, List result) { + if(taskIDToFile != null && taskIDToFile.size() > 0 && (numBuckets > taskIDToFile.size()) + && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { + addBucketsToResultsCommon(taskIDToFile, numBuckets, result); + } + } + + // TODO: not clear why two if conditions are different. Preserve the existing logic for now. + private static void addBucketFileToResults(HashMap taskIDToFile, + int numBuckets, Configuration hconf, List result) { + // if the table is bucketed and enforce bucketing, we should check and generate all buckets + if (numBuckets > 0 && taskIDToFile != null + && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { + addBucketsToResultsCommon(taskIDToFile, numBuckets, result); + } + } - if (items == null || fs == null) { - return null; + private static void addBucketsToResultsCommon( + HashMap taskIDToFile, int numBuckets, List result) { + String taskID1 = taskIDToFile.keySet().iterator().next(); + Path bucketPath = taskIDToFile.values().iterator().next().getPath(); + Utilities.LOG14535.info("Bucket path " + bucketPath); + for (int j = 0; j < numBuckets; ++j) { + addBucketFileIfMissing(result, taskIDToFile, taskID1, bucketPath, j); } + } + private static void addBucketFileIfMissing(List result, + HashMap taskIDToFile, String taskID1, Path bucketPath, int j) { + String taskID2 = replaceTaskId(taskID1, j); + if (!taskIDToFile.containsKey(taskID2)) { + // create empty bucket, file name should be derived from taskID2 + URI bucketUri = bucketPath.toUri(); + String path2 = replaceTaskIdFromFilename(bucketUri.getPath().toString(), j); + Utilities.LOG14535.info("Creating an empty bucket file " + path2); + result.add(new Path(bucketUri.getScheme(), bucketUri.getAuthority(), path2)); + } + } + + private static HashMap removeTempOrDuplicateFilesNonMm( + FileStatus[] files, FileSystem fs) throws IOException { + if (files == null || fs == null) { + return null; + } HashMap taskIdToFile = new HashMap(); - for (FileStatus one : items) { + for (FileStatus one : files) { if (isTempPath(one)) { + Utilities.LOG14535.info("removeTempOrDuplicateFiles deleting " + one.getPath()/*, new Exception()*/); if (!fs.delete(one.getPath(), true)) { throw new IOException("Unable to delete tmp file: " + one.getPath()); } } else { - String taskId = getPrefixedTaskIdFromFilename(one.getPath().getName()); - FileStatus otherFile = taskIdToFile.get(taskId); - if (otherFile == null) { - taskIdToFile.put(taskId, one); - } else { - // Compare the file sizes of all the attempt files for the same task, the largest win - // any attempt files could contain partial results (due to task failures or - // speculative runs), but the largest should be the correct one since the result - // of a successful run should never be smaller than a failed/speculative run. - FileStatus toDelete = null; - - // "LOAD .. INTO" and "INSERT INTO" commands will generate files with - // "_copy_x" suffix. These files are usually read by map tasks and the - // task output gets written to some tmp path. The output file names will - // be of format taskId_attemptId. The usual path for all these tasks is - // srcPath -> taskTmpPath -> tmpPath -> finalPath. - // But, MergeFileTask can move files directly from src path to final path - // without copying it to tmp path. In such cases, different files with - // "_copy_x" suffix will be identified as duplicates (change in value - // of x is wrongly identified as attempt id) and will be deleted. - // To avoid that we will ignore files with "_copy_x" suffix from duplicate - // elimination. - if (!isCopyFile(one.getPath().getName())) { - if (otherFile.getLen() >= one.getLen()) { - toDelete = one; - } else { - toDelete = otherFile; - taskIdToFile.put(taskId, one); - } - long len1 = toDelete.getLen(); - long len2 = taskIdToFile.get(taskId).getLen(); - if (!fs.delete(toDelete.getPath(), true)) { - throw new IOException( - "Unable to delete duplicate file: " + toDelete.getPath() - + ". Existing file: " + - taskIdToFile.get(taskId).getPath()); - } else { - LOG.warn("Duplicate taskid file removed: " + toDelete.getPath() + - " with length " - + len1 + ". Existing file: " + - taskIdToFile.get(taskId).getPath() + " with length " - + len2); - } - } else { - LOG.info(one.getPath() + " file identified as duplicate. This file is" + - " not deleted as it has copySuffix."); - } - } + // This would be a single file. See if we need to remove it. + ponderRemovingTempOrDuplicateFile(fs, one, taskIdToFile); } } return taskIdToFile; } + private static void ponderRemovingTempOrDuplicateFile(FileSystem fs, + FileStatus file, HashMap taskIdToFile) throws IOException { + String taskId = getPrefixedTaskIdFromFilename(file.getPath().getName()); + Utilities.LOG14535.info("removeTempOrDuplicateFiles pondering " + file.getPath() + ", taskId " + taskId); + + FileStatus otherFile = taskIdToFile.get(taskId); + taskIdToFile.put(taskId, (otherFile == null) ? file : + compareTempOrDuplicateFiles(fs, file, otherFile)); + } + + private static FileStatus compareTempOrDuplicateFiles(FileSystem fs, + FileStatus file, FileStatus existingFile) throws IOException { + // Compare the file sizes of all the attempt files for the same task, the largest win + // any attempt files could contain partial results (due to task failures or + // speculative runs), but the largest should be the correct one since the result + // of a successful run should never be smaller than a failed/speculative run. + FileStatus toDelete = null, toRetain = null; + + // "LOAD .. INTO" and "INSERT INTO" commands will generate files with + // "_copy_x" suffix. These files are usually read by map tasks and the + // task output gets written to some tmp path. The output file names will + // be of format taskId_attemptId. The usual path for all these tasks is + // srcPath -> taskTmpPath -> tmpPath -> finalPath. + // But, MergeFileTask can move files directly from src path to final path + // without copying it to tmp path. In such cases, different files with + // "_copy_x" suffix will be identified as duplicates (change in value + // of x is wrongly identified as attempt id) and will be deleted. + // To avoid that we will ignore files with "_copy_x" suffix from duplicate + // elimination. + if (isCopyFile(file.getPath().getName())) { + LOG.info(file.getPath() + " file identified as duplicate. This file is" + + " not deleted as it has copySuffix."); + return existingFile; + } + + if (existingFile.getLen() >= file.getLen()) { + toDelete = file; + toRetain = existingFile; + } else { + toDelete = existingFile; + toRetain = file; + } + if (!fs.delete(toDelete.getPath(), true)) { + throw new IOException( + "Unable to delete duplicate file: " + toDelete.getPath() + + ". Existing file: " + toRetain.getPath()); + } else { + LOG.warn("Duplicate taskid file removed: " + toDelete.getPath() + " with length " + + toDelete.getLen() + ". Existing file: " + toRetain.getPath() + " with length " + + toRetain.getLen()); + } + return toRetain; + } + public static boolean isCopyFile(String filename) { String taskId = filename; String copyFileSuffix = null; @@ -2573,7 +2649,11 @@ public static boolean isEmptyPath(Configuration job, Path dirPath) throws IOExce // generate a full partition specification LinkedHashMap fullPartSpec = new LinkedHashMap(partSpec); - Warehouse.makeSpecFromName(fullPartSpec, partPath); + if (!Warehouse.makeSpecFromName(fullPartSpec, partPath, new HashSet(partSpec.keySet()))) { + Utilities.LOG14535.warn("Ignoring invalid DP directory " + partPath); + continue; + } + Utilities.LOG14535.info("Adding partition spec from " + partPath + ": " + fullPartSpec); fullPartSpecs.add(fullPartSpec); } return fullPartSpecs; @@ -3146,18 +3226,30 @@ public static double getHighestSamplePercentage (MapWork work) { List pathsToAdd = new LinkedList(); LockedDriverState lDrvStat = LockedDriverState.getLockedDriverState(); // AliasToWork contains all the aliases - for (String alias : work.getAliasToWork().keySet()) { + Collection aliasToWork = work.getAliasToWork().keySet(); + if (!skipDummy) { + // ConcurrentModification otherwise if adding dummy. + aliasToWork = new ArrayList<>(aliasToWork); + } + for (String alias : aliasToWork) { LOG.info("Processing alias " + alias); // The alias may not have any path + Collection>> pathToAliases = + work.getPathToAliases().entrySet(); + if (!skipDummy) { + // ConcurrentModification otherwise if adding dummy. + pathToAliases = new ArrayList<>(pathToAliases); + } boolean isEmptyTable = true; boolean hasLogged = false; - // Note: this copies the list because createDummyFileForEmptyPartition may modify the map. - for (Path file : new LinkedList(work.getPathToAliases().keySet())) { + Path path = null; + for (Map.Entry> e : pathToAliases) { if (lDrvStat != null && lDrvStat.driverState == DriverState.INTERRUPT) throw new IOException("Operation is Canceled."); - List aliases = work.getPathToAliases().get(file); + Path file = e.getKey(); + List aliases = e.getValue(); if (aliases.contains(alias)) { if (file != null) { isEmptyTable = false; @@ -3174,7 +3266,6 @@ public static double getHighestSamplePercentage (MapWork work) { StringInternUtils.internUriStringsInPath(file); pathsProcessed.add(file); - if (LOG.isDebugEnabled()) { LOG.debug("Adding input file " + file); } else if (!hasLogged) { @@ -3182,6 +3273,7 @@ public static double getHighestSamplePercentage (MapWork work) { LOG.info("Adding " + work.getPathToAliases().size() + " inputs; the first input is " + file); } + pathsToAdd.add(file); } } @@ -3486,8 +3578,8 @@ private static void createTmpDirs(Configuration conf, if (op instanceof FileSinkOperator) { FileSinkDesc fdesc = ((FileSinkOperator) op).getConf(); + if (fdesc.isMmTable()) continue; // No need to create for MM tables Path tempDir = fdesc.getDirName(); - if (tempDir != null) { Path tempPath = Utilities.toTempPath(tempDir); FileSystem fs = tempPath.getFileSystem(conf); @@ -3933,6 +4025,333 @@ public static String humanReadableByteCount(long bytes) { return String.format("%.2f%sB", bytes / Math.pow(unit, exp), suffix); } + private static final String MANIFEST_EXTENSION = ".manifest"; + + private static void tryDelete(FileSystem fs, Path path) { + try { + fs.delete(path, true); + } catch (IOException ex) { + LOG.error("Failed to delete " + path, ex); + } + } + + public static Path[] getMmDirectoryCandidates(FileSystem fs, Path path, int dpLevels, + int lbLevels, PathFilter filter, long txnId, int stmtId, Configuration conf) throws IOException { + int skipLevels = dpLevels + lbLevels; + if (filter == null) { + filter = new JavaUtils.IdPathFilter(txnId, stmtId, true); + } + if (skipLevels == 0) { + return statusToPath(fs.listStatus(path, filter)); + } + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_MM_AVOID_GLOBSTATUS_ON_S3) && isS3(fs)) { + return getMmDirectoryCandidatesRecursive(fs, path, skipLevels, filter); + } + return getMmDirectoryCandidatesGlobStatus(fs, path, skipLevels, filter, txnId, stmtId); + } + + private static boolean isS3(FileSystem fs) { + try { + return fs.getScheme().equalsIgnoreCase("s3a"); + } catch (UnsupportedOperationException ex) { + // Some FS-es do not implement getScheme, e.g. ProxyLocalFileSystem. + return false; + } + } + + private static Path[] statusToPath(FileStatus[] statuses) { + if (statuses == null) return null; + Path[] paths = new Path[statuses.length]; + for (int i = 0; i < statuses.length; ++i) { + paths[i] = statuses[i].getPath(); + } + return paths; + } + + private static Path[] getMmDirectoryCandidatesRecursive(FileSystem fs, + Path path, int skipLevels, PathFilter filter) throws IOException { + String lastRelDir = null; + HashSet results = new HashSet(); + String relRoot = Path.getPathWithoutSchemeAndAuthority(path).toString(); + if (!relRoot.endsWith(Path.SEPARATOR)) { + relRoot += Path.SEPARATOR; + } + RemoteIterator allFiles = fs.listFiles(path, true); + while (allFiles.hasNext()) { + LocatedFileStatus lfs = allFiles.next(); + Path dirPath = Path.getPathWithoutSchemeAndAuthority(lfs.getPath()); + String dir = dirPath.toString(); + if (!dir.startsWith(relRoot)) { + throw new IOException("Path " + lfs.getPath() + " is not under " + relRoot + + " (when shortened to " + dir + ")"); + } + String subDir = dir.substring(relRoot.length()); + Utilities.LOG14535.info("Looking at " + subDir + " from " + lfs.getPath()); + // If sorted, we'll skip a bunch of files. + if (lastRelDir != null && subDir.startsWith(lastRelDir)) continue; + int startIx = skipLevels > 0 ? -1 : 0; + for (int i = 0; i < skipLevels; ++i) { + startIx = subDir.indexOf(Path.SEPARATOR_CHAR, startIx + 1); + if (startIx == -1) { + Utilities.LOG14535.info("Expected level of nesting (" + skipLevels + ") is not " + + " present in " + subDir + " (from " + lfs.getPath() + ")"); + break; + } + } + if (startIx == -1) continue; + int endIx = subDir.indexOf(Path.SEPARATOR_CHAR, startIx + 1); + if (endIx == -1) { + Utilities.LOG14535.info("Expected level of nesting (" + (skipLevels + 1) + ") is not " + + " present in " + subDir + " (from " + lfs.getPath() + ")"); + continue; + } + lastRelDir = subDir = subDir.substring(0, endIx); + Path candidate = new Path(relRoot, subDir); + Utilities.LOG14535.info("Considering MM directory candidate " + candidate); + if (!filter.accept(candidate)) continue; + results.add(fs.makeQualified(candidate)); + } + return results.toArray(new Path[results.size()]); + } + + private static Path[] getMmDirectoryCandidatesGlobStatus(FileSystem fs, + Path path, int skipLevels, PathFilter filter, long txnId, int stmtId) throws IOException { + StringBuilder sb = new StringBuilder(path.toUri().getPath()); + for (int i = 0; i < skipLevels; i++) { + sb.append(Path.SEPARATOR).append("*"); + } + sb.append(Path.SEPARATOR).append(AcidUtils.deltaSubdir(txnId, txnId, stmtId)); + Path pathPattern = new Path(path, sb.toString()); + Utilities.LOG14535.info("Looking for files via: " + pathPattern); + return statusToPath(fs.globStatus(pathPattern, filter)); + } + + private static void tryDeleteAllMmFiles(FileSystem fs, Path specPath, Path manifestDir, + int dpLevels, int lbLevels, JavaUtils.IdPathFilter filter, + long txnId, int stmtId, Configuration conf) throws IOException { + Path[] files = getMmDirectoryCandidates( + fs, specPath, dpLevels, lbLevels, filter, txnId, stmtId, conf); + if (files != null) { + for (Path path : files) { + Utilities.LOG14535.info("Deleting " + path + " on failure"); + tryDelete(fs, path); + } + } + Utilities.LOG14535.info("Deleting " + manifestDir + " on failure"); + fs.delete(manifestDir, true); + } + + + public static void writeMmCommitManifest(List commitPaths, Path specPath, FileSystem fs, + String taskId, Long txnId, int stmtId, String unionSuffix) throws HiveException { + if (commitPaths.isEmpty()) return; + // We assume one FSOP per task (per specPath), so we create it in specPath. + Path manifestPath = getManifestDir(specPath, txnId, stmtId, unionSuffix); + manifestPath = new Path(manifestPath, taskId + MANIFEST_EXTENSION); + Utilities.LOG14535.info("Writing manifest to " + manifestPath + " with " + commitPaths); + try { + // Don't overwrite the manifest... should fail if we have collisions. + try (FSDataOutputStream out = fs.create(manifestPath, false)) { + if (out == null) { + throw new HiveException("Failed to create manifest at " + manifestPath); + } + out.writeInt(commitPaths.size()); + for (Path path : commitPaths) { + out.writeUTF(path.toString()); + } + } + } catch (IOException e) { + throw new HiveException(e); + } + } + + private static Path getManifestDir(Path specPath, long txnId, int stmtId, String unionSuffix) { + Path manifestPath = new Path(specPath, "_tmp." + AcidUtils.deltaSubdir(txnId, txnId, stmtId)); + return (unionSuffix == null) ? manifestPath : new Path(manifestPath, unionSuffix); + } + + public static final class MissingBucketsContext { + public final TableDesc tableInfo; + public final int numBuckets; + public final boolean isCompressed; + public MissingBucketsContext(TableDesc tableInfo, int numBuckets, boolean isCompressed) { + this.tableInfo = tableInfo; + this.numBuckets = numBuckets; + this.isCompressed = isCompressed; + } + } + + public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Configuration hconf, + boolean success, int dpLevels, int lbLevels, MissingBucketsContext mbc, long txnId, int stmtId, + Reporter reporter, boolean isMmTable, boolean isMmCtas) throws IOException, HiveException { + FileSystem fs = specPath.getFileSystem(hconf); + Path manifestDir = getManifestDir(specPath, txnId, stmtId, unionSuffix); + if (!success) { + JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true); + tryDeleteAllMmFiles(fs, specPath, manifestDir, dpLevels, lbLevels, + filter, txnId, stmtId, hconf); + return; + } + + Utilities.LOG14535.info("Looking for manifests in: " + manifestDir + " (" + txnId + ")"); + // TODO# may be wrong if there are no splits (empty insert/CTAS) + List manifests = new ArrayList<>(); + if (fs.exists(manifestDir)) { + FileStatus[] manifestFiles = fs.listStatus(manifestDir); + if (manifestFiles != null) { + for (FileStatus status : manifestFiles) { + Path path = status.getPath(); + if (path.getName().endsWith(MANIFEST_EXTENSION)) { + Utilities.LOG14535.info("Reading manifest " + path); + manifests.add(path); + } + } + } + } else { + Utilities.LOG14535.info("No manifests found - query produced no output"); + manifestDir = null; + } + + Utilities.LOG14535.info("Looking for files in: " + specPath); + JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true); + if (isMmCtas && !fs.exists(specPath)) { + // TODO: do we also need to do this when creating an empty partition from select? + Utilities.LOG14535.info("Creating table directory for CTAS with no output at " + specPath); + FileUtils.mkdir(fs, specPath, hconf); + } + Path[] files = getMmDirectoryCandidates( + fs, specPath, dpLevels, lbLevels, filter, txnId, stmtId, hconf); + ArrayList mmDirectories = new ArrayList<>(); + if (files != null) { + for (Path path : files) { + Utilities.LOG14535.info("Looking at path: " + path); + mmDirectories.add(path); + } + } + + HashSet committed = new HashSet<>(); + for (Path mfp : manifests) { + try (FSDataInputStream mdis = fs.open(mfp)) { + int fileCount = mdis.readInt(); + for (int i = 0; i < fileCount; ++i) { + String nextFile = mdis.readUTF(); + if (!committed.add(nextFile)) { + throw new HiveException(nextFile + " was specified in multiple manifests"); + } + } + } + } + + if (manifestDir != null) { + Utilities.LOG14535.info("Deleting manifest directory " + manifestDir); + tryDelete(fs, manifestDir); + if (unionSuffix != null) { + // Also delete the parent directory if we are the last union FSOP to execute. + manifestDir = manifestDir.getParent(); + FileStatus[] remainingFiles = fs.listStatus(manifestDir); + if (remainingFiles == null || remainingFiles.length == 0) { + Utilities.LOG14535.info("Deleting manifest directory " + manifestDir); + tryDelete(fs, manifestDir); + } + } + } + + for (Path path : mmDirectories) { + cleanMmDirectory(path, fs, unionSuffix, committed); + } + + if (!committed.isEmpty()) { + throw new HiveException("The following files were committed but not found: " + committed); + } + + if (mmDirectories.isEmpty()) return; + + // TODO: see HIVE-14886 - removeTempOrDuplicateFiles is broken for list bucketing, + // so maintain parity here by not calling it at all. + if (lbLevels != 0) return; + // Create fake file statuses to avoid querying the file system. removeTempOrDuplicateFiles + // doesn't need tocheck anything except path and directory status for MM directories. + FileStatus[] finalResults = new FileStatus[mmDirectories.size()]; + for (int i = 0; i < mmDirectories.size(); ++i) { + finalResults[i] = new PathOnlyFileStatus(mmDirectories.get(i)); + } + List emptyBuckets = Utilities.removeTempOrDuplicateFiles( + fs, finalResults, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, txnId, stmtId, isMmTable); + // create empty buckets if necessary + if (emptyBuckets.size() > 0) { + assert mbc != null; + Utilities.createEmptyBuckets(hconf, emptyBuckets, mbc.isCompressed, mbc.tableInfo, reporter); + } + } + + private static final class PathOnlyFileStatus extends FileStatus { + public PathOnlyFileStatus(Path path) { + super(0, true, 0, 0, 0, path); + } + } + + private static void cleanMmDirectory(Path dir, FileSystem fs, + String unionSuffix, HashSet committed) throws IOException, HiveException { + for (FileStatus child : fs.listStatus(dir)) { + Path childPath = child.getPath(); + if (unionSuffix == null) { + if (committed.remove(childPath.toString())) continue; // A good file. + deleteUncommitedFile(childPath, fs); + } else if (!child.isDirectory()) { + if (committed.contains(childPath.toString())) { + throw new HiveException("Union FSOP has commited " + + childPath + " outside of union directory" + unionSuffix); + } + deleteUncommitedFile(childPath, fs); + } else if (childPath.getName().equals(unionSuffix)) { + // Found the right union directory; treat it as "our" MM directory. + cleanMmDirectory(childPath, fs, null, committed); + } else { + Utilities.LOG14535.info("FSOP for " + unionSuffix + + " is ignoring the other side of the union " + childPath.getName()); + } + } + } + + private static void deleteUncommitedFile(Path childPath, FileSystem fs) + throws IOException, HiveException { + Utilities.LOG14535.info("Deleting " + childPath + " that was not committed"); + // We should actually succeed here - if we fail, don't commit the query. + if (!fs.delete(childPath, true)) { + throw new HiveException("Failed to delete an uncommitted path " + childPath); + } + } + + /** + * @return the complete list of valid MM directories under a table/partition path; null + * if the entire directory is valid (has no uncommitted/temporary files). + */ + public static List getValidMmDirectoriesFromTableOrPart(Path path, Configuration conf, + ValidTxnList validTxnList, int lbLevels) throws IOException { + Utilities.LOG14535.info("Looking for valid MM paths under " + path); + // NULL means this directory is entirely valid. + List result = null; + FileSystem fs = path.getFileSystem(conf); + FileStatus[] children = (lbLevels == 0) ? fs.listStatus(path) + : fs.globStatus(new Path(path, StringUtils.repeat("*" + Path.SEPARATOR, lbLevels) + "*")); + for (int i = 0; i < children.length; ++i) { + FileStatus file = children[i]; + Path childPath = file.getPath(); + Long txnId = JavaUtils.extractTxnId(childPath); + if (!file.isDirectory() || txnId == null || !validTxnList.isTxnValid(txnId)) { + Utilities.LOG14535.info("Skipping path " + childPath); + if (result == null) { + result = new ArrayList<>(children.length - 1); + for (int j = 0; j < i; ++j) { + result.add(children[j].getPath()); + } + } + } else if (result != null) { + result.add(childPath); + } + } + return result; + } public static String getAclStringWithHiveModification(Configuration tezConf, String propertyName, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java index c944a13..2ae18cf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java @@ -44,6 +44,8 @@ Licensed to the Apache Software Foundation (ASF) under one import org.apache.hadoop.hive.ql.plan.ImportTableDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.mortbay.jetty.servlet.AbstractSessionManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -224,7 +226,7 @@ private TaskTracker forNewTable() throws Exception { Path tmpPath) { LoadTableDesc loadTableWork = new LoadTableDesc( tmpPath, Utilities.getTableDesc(table), partSpec.getPartSpec(), - event.replicationSpec().isReplace() + event.replicationSpec().isReplace(), SessionState.get().getTxnMgr().getCurrentTxnId() ); loadTableWork.setInheritTableSpecs(false); MoveWork work = new MoveWork(new HashSet<>(), new HashSet<>(), loadTableWork, null, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java index a1187c4..6354f77 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java @@ -41,6 +41,7 @@ Licensed to the Apache Software Foundation (ASF) under one import org.apache.hadoop.hive.ql.plan.ImportTableDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; +import org.apache.hadoop.hive.ql.session.SessionState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -223,7 +224,8 @@ private String location(ImportTableDesc tblDesc, Database parentDb) ReplCopyTask.getLoadCopyTask(replicationSpec, dataPath, tmpPath, context.hiveConf); LoadTableDesc loadTableWork = new LoadTableDesc( - tmpPath, Utilities.getTableDesc(table), new TreeMap<>(), replicationSpec.isReplace() + tmpPath, Utilities.getTableDesc(table), new TreeMap<>(), replicationSpec.isReplace(), + SessionState.get().getTxnMgr().getCurrentTxnId() ); MoveWork moveWork = new MoveWork(new HashSet<>(), new HashSet<>(), loadTableWork, null, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java index 5c0c3ed..92a1ebc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java @@ -275,7 +275,7 @@ private boolean schemaEvolved(InputSplit s, InputSplit prevSplit, boolean groupA MapWork work) throws IOException { boolean retval = false; Path path = ((FileSplit) s).getPath(); - PartitionDesc pd = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + PartitionDesc pd = HiveFileFormatUtils.getFromPathRecursively( work.getPathToPartitionInfo(), path, cache); String currentDeserializerClass = pd.getDeserializerClassName(); Class currentInputFormatClass = pd.getInputFileFormatClass(); @@ -288,7 +288,7 @@ private boolean schemaEvolved(InputSplit s, InputSplit prevSplit, boolean groupA return !path.equals(prevPath); } PartitionDesc prevPD = - HiveFileFormatUtils.getPartitionDescFromPathRecursively(work.getPathToPartitionInfo(), + HiveFileFormatUtils.getFromPathRecursively(work.getPathToPartitionInfo(), prevPath, cache); previousDeserializerClass = prevPD.getDeserializerClassName(); previousInputFormatClass = prevPD.getInputFileFormatClass(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 90d1372..b5733ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -173,7 +173,7 @@ public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, Map pathToPartitionInfo = mapWork.getPathToPartitionInfo(); PartitionDesc partDesc = HiveFileFormatUtils - .getPartitionDescFromPathRecursively(pathToPartitionInfo, + .getFromPathRecursively(pathToPartitionInfo, split.getPath(), IOPrepareCache.get().getPartitionDescMap()); getPartitionValues(vrbCtx, partDesc, partitionValues); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java index da8c1e2..d68d3b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java @@ -213,7 +213,8 @@ public static WriteType determineAlterTableWriteType(AlterTableDesc.AlterTableTy case ADDPARTITION: case ADDSERDEPROPS: - case ADDPROPS: return WriteType.DDL_SHARED; + case ADDPROPS: + return WriteType.DDL_SHARED; case COMPACT: case TOUCH: return WriteType.DDL_NO_LOCK; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java index a02baf9..ccdd693 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java @@ -75,7 +75,7 @@ public HiveIndexedInputFormat(String indexFileName) { PartitionDesc part; for (Path dir : dirs) { part = HiveFileFormatUtils - .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, + .getFromPathRecursively(pathToPartitionInfo, dir, IOPrepareCache.get().allocatePartitionDescMap(), true); // create a new InputFormat instance if this is the first time to see this // class diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index feacdd8..9c9a2d3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.DataOperationType; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TransactionalValidationListener; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater; @@ -316,7 +317,7 @@ else if (filename.startsWith(BUCKET_PREFIX)) { } public enum Operation { - NOT_ACID, INSERT, UPDATE, DELETE; + NOT_ACID, INSERT, UPDATE, DELETE } /** @@ -384,7 +385,10 @@ public HdfsFileStatusWithId getHdfsFileStatusWithId() { public static final String SPLIT_UPDATE_STRING = "split_update"; public static final int HASH_BASED_MERGE_BIT = 0x02; public static final String HASH_BASED_MERGE_STRING = "hash_merge"; + public static final int INSERT_ONLY_BIT = 0x04; + public static final String INSERT_ONLY_STRING = "insert_only"; public static final String DEFAULT_VALUE_STRING = TransactionalValidationListener.DEFAULT_TRANSACTIONAL_PROPERTY; + public static final String INSERTONLY_VALUE_STRING = TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY; private AcidOperationalProperties() { } @@ -399,6 +403,18 @@ public static AcidOperationalProperties getDefault() { AcidOperationalProperties obj = new AcidOperationalProperties(); obj.setSplitUpdate(true); obj.setHashBasedMerge(false); + obj.setInsertOnly(false); + return obj; + } + + /** + * Returns an acidOperationalProperties object for tables that uses ACID framework but only + * supports INSERT operation and does not require ORC or bucketing + * @return the acidOperationalProperties object + */ + public static AcidOperationalProperties getInsertOnly() { + AcidOperationalProperties obj = new AcidOperationalProperties(); + obj.setInsertOnly(true); return obj; } @@ -414,6 +430,9 @@ public static AcidOperationalProperties parseString(String propertiesStr) { if (propertiesStr.equalsIgnoreCase(DEFAULT_VALUE_STRING)) { return AcidOperationalProperties.getDefault(); } + if (propertiesStr.equalsIgnoreCase(INSERTONLY_VALUE_STRING)) { + return AcidOperationalProperties.getInsertOnly(); + } AcidOperationalProperties obj = new AcidOperationalProperties(); String[] options = propertiesStr.split("\\|"); for (String option : options) { @@ -425,6 +444,8 @@ public static AcidOperationalProperties parseString(String propertiesStr) { case HASH_BASED_MERGE_STRING: obj.setHashBasedMerge(true); break; + case INSERT_ONLY_STRING: + obj.setInsertOnly(true); default: throw new IllegalArgumentException( "Unexpected value " + option + " for ACID operational properties!"); @@ -446,6 +467,9 @@ public static AcidOperationalProperties parseInt(int properties) { if ((properties & HASH_BASED_MERGE_BIT) > 0) { obj.setHashBasedMerge(true); } + if ((properties & INSERT_ONLY_BIT) > 0) { + obj.setInsertOnly(true); + } return obj; } @@ -474,6 +498,12 @@ public AcidOperationalProperties setHashBasedMerge(boolean isHashBasedMerge) { return this; } + public AcidOperationalProperties setInsertOnly(boolean isInsertOnly) { + description = (isInsertOnly + ? (description | INSERT_ONLY_BIT) : (description & ~INSERT_ONLY_BIT)); + return this; + } + public boolean isSplitUpdate() { return (description & SPLIT_UPDATE_BIT) > 0; } @@ -482,6 +512,10 @@ public boolean isHashBasedMerge() { return (description & HASH_BASED_MERGE_BIT) > 0; } + public boolean isInsertOnly() { + return (description & INSERT_ONLY_BIT) > 0; + } + public int toInt() { return description; } @@ -495,6 +529,9 @@ public String toString() { if (isHashBasedMerge()) { str.append("|" + HASH_BASED_MERGE_STRING); } + if (isInsertOnly()) { + str.append("|" + INSERT_ONLY_STRING); + } return str.toString(); } } @@ -529,6 +566,12 @@ public String toString() { * more up to date ones. Not {@code null}. */ List getObsolete(); + + /** + * Get the list of directories that has nothing but aborted transactions. + * @return the list of aborted directories + */ + List getAbortedDirectories(); } public static class ParsedDelta implements Comparable { @@ -790,21 +833,22 @@ public static Directory getAcidState(Path directory, boolean useFileIds, boolean ignoreEmptyFiles ) throws IOException { - return getAcidState(directory, conf, txnList, Ref.from(useFileIds), ignoreEmptyFiles); + return getAcidState(directory, conf, txnList, Ref.from(useFileIds), ignoreEmptyFiles, null); } public static Directory getAcidState(Path directory, Configuration conf, ValidTxnList txnList, Ref useFileIds, - boolean ignoreEmptyFiles - ) throws IOException { + boolean ignoreEmptyFiles, + Map tblproperties) throws IOException { FileSystem fs = directory.getFileSystem(conf); // The following 'deltas' includes all kinds of delta files including insert & delete deltas. final List deltas = new ArrayList(); List working = new ArrayList(); List originalDirectories = new ArrayList(); final List obsolete = new ArrayList(); + final List abortedDirectories = new ArrayList<>(); List childrenWithId = null; Boolean val = useFileIds.value; if (val == null || val) { @@ -824,14 +868,14 @@ public static Directory getAcidState(Path directory, final List original = new ArrayList<>(); if (childrenWithId != null) { for (HdfsFileStatusWithId child : childrenWithId) { - getChildState(child.getFileStatus(), child, txnList, working, - originalDirectories, original, obsolete, bestBase, ignoreEmptyFiles); + getChildState(child.getFileStatus(), child, txnList, working, originalDirectories, original, + obsolete, bestBase, ignoreEmptyFiles, abortedDirectories, tblproperties); } } else { List children = HdfsUtils.listLocatedStatus(fs, directory, hiddenFileFilter); for (FileStatus child : children) { - getChildState( - child, null, txnList, working, originalDirectories, original, obsolete, bestBase, ignoreEmptyFiles); + getChildState(child, null, txnList, working, originalDirectories, original, obsolete, + bestBase, ignoreEmptyFiles, abortedDirectories, tblproperties); } } @@ -951,6 +995,11 @@ public Path getBaseDirectory() { public List getObsolete() { return obsolete; } + + @Override + public List getAbortedDirectories() { + return abortedDirectories; + } }; } /** @@ -971,7 +1020,7 @@ private static boolean isValidBase(long baseTxnId, ValidTxnList txnList) { private static void getChildState(FileStatus child, HdfsFileStatusWithId childWithId, ValidTxnList txnList, List working, List originalDirectories, List original, List obsolete, TxnBase bestBase, - boolean ignoreEmptyFiles) throws IOException { + boolean ignoreEmptyFiles, List aborted, Map tblproperties) throws IOException { Path p = child.getPath(); String fn = p.getName(); if (fn.startsWith(BASE_PREFIX) && child.isDir()) { @@ -1000,6 +1049,10 @@ private static void getChildState(FileStatus child, HdfsFileStatusWithId childWi String deltaPrefix = (fn.startsWith(DELTA_PREFIX)) ? DELTA_PREFIX : DELETE_DELTA_PREFIX; ParsedDelta delta = parseDelta(child, deltaPrefix); + if (tblproperties != null && MetaStoreUtils.isInsertOnlyTable(tblproperties) && + ValidTxnList.RangeResponse.ALL == txnList.isTxnRangeAborted(delta.minTransaction, delta.maxTransaction)) { + aborted.add(child); + } if (txnList.isTxnRangeValid(delta.minTransaction, delta.maxTransaction) != ValidTxnList.RangeResponse.NONE) { @@ -1143,6 +1196,10 @@ public static boolean isAcidTable(Table table) { return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true"); } + public static boolean isFullAcidTable(Table table) { + return isAcidTable(table) && !MetaStoreUtils.isInsertOnlyTable(table.getParameters()); + } + /** * Sets the acidOperationalProperties in the configuration object argument. * @param conf Mutable configuration object diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java index 5256c46..6f2aaf0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.io; +import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; + import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; @@ -43,6 +45,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.Driver.DriverState; import org.apache.hadoop.hive.ql.Driver.LockedDriverState; import org.apache.hadoop.hive.ql.exec.Operator; @@ -88,32 +91,44 @@ private final int start; private final int length; private final JobConf conf; + private final boolean isMerge; - public CheckNonCombinablePathCallable(Path[] paths, int start, int length, JobConf conf) { + public CheckNonCombinablePathCallable( + Path[] paths, int start, int length, JobConf conf, boolean isMerge) { this.paths = paths; this.start = start; this.length = length; this.conf = conf; + this.isMerge = isMerge; } @Override public Set call() throws Exception { Set nonCombinablePathIndices = new HashSet(); for (int i = 0; i < length; i++) { - PartitionDesc part = - HiveFileFormatUtils.getPartitionDescFromPathRecursively( + PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, paths[i + start], IOPrepareCache.get().allocatePartitionDescMap()); // Use HiveInputFormat if any of the paths is not splittable Class inputFormatClass = part.getInputFileFormatClass(); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, conf); - if (inputFormat instanceof AvoidSplitCombination && - ((AvoidSplitCombination) inputFormat).shouldSkipCombine(paths[i + start], conf)) { - if (LOG.isDebugEnabled()) { - LOG.debug("The path [" + paths[i + start] + + boolean isAvoidSplitCombine = inputFormat instanceof AvoidSplitCombination && + ((AvoidSplitCombination) inputFormat).shouldSkipCombine(paths[i + start], conf); + TableDesc tbl = part.getTableDesc(); + boolean isMmNonMerge = false; + if (tbl != null) { + isMmNonMerge = !isMerge && MetaStoreUtils.isInsertOnlyTable(tbl.getProperties()); + } else { + // This would be the case for obscure tasks like truncate column (unsupported for MM). + Utilities.LOG14535.warn("Assuming not insert-only; no table in partition spec " + part); + } + + if (isAvoidSplitCombine || isMmNonMerge) { + //if (LOG.isDebugEnabled()) { + Utilities.LOG14535.info("The path [" + paths[i + start] + "] is being parked for HiveInputFormat.getSplits"); - } + //} nonCombinablePathIndices.add(i + start); } } @@ -159,7 +174,7 @@ public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim, Path[] ipaths = inputSplitShim.getPaths(); if (ipaths.length > 0) { PartitionDesc part = HiveFileFormatUtils - .getPartitionDescFromPathRecursively(this.pathToPartitionInfo, + .getFromPathRecursively(this.pathToPartitionInfo, ipaths[0], IOPrepareCache.get().getPartitionDescMap()); inputFormatClassName = part.getInputFileFormatClass().getName(); } @@ -273,7 +288,7 @@ public void write(DataOutput out) throws IOException { // extract all the inputFormatClass names for each chunk in the // CombinedSplit. - PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, + PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo, inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see @@ -357,7 +372,7 @@ public int hashCode() { if (lDrvStat != null && lDrvStat.driverState == DriverState.INTERRUPT) throw new IOException("Operation is Canceled. "); - PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); if ((tableDesc != null) && tableDesc.isNonNative()) { @@ -472,11 +487,12 @@ public int hashCode() { ExecutorService executor = Executors.newFixedThreadPool(numThreads); List>> futureList = new ArrayList>>(numThreads); try { + boolean isMerge = mrwork != null && mrwork.isMergeFromResolver(); for (int i = 0; i < numThreads; i++) { int start = i * numPathPerThread; int length = i != numThreads - 1 ? numPathPerThread : paths.length - start; - futureList.add(executor.submit( - new CheckNonCombinablePathCallable(paths, start, length, job))); + futureList.add(executor.submit(new CheckNonCombinablePathCallable( + paths, start, length, job, isMerge))); } Set nonCombinablePathIndices = new HashSet(); for (Future> future : futureList) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java index de36f2b..6911fb7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java @@ -114,7 +114,7 @@ private PartitionDesc extractSinglePartSpec(CombineHiveInputSplit hsplit) throws PartitionDesc part = null; Map, Map> cache = new HashMap<>(); for (Path path : hsplit.getPaths()) { - PartitionDesc otherPart = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + PartitionDesc otherPart = HiveFileFormatUtils.getFromPathRecursively( pathToPartInfo, path, cache); LOG.debug("Found spec for " + path + " " + otherPart + " from " + pathToPartInfo); if (part == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java index 46f9970..4bc60dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java @@ -326,7 +326,7 @@ public boolean doNext(K key, V value) throws IOException { .getMapWork(jobConf).getPathToPartitionInfo(); } part = HiveFileFormatUtils - .getPartitionDescFromPathRecursively(pathToPartitionInfo, + .getFromPathRecursively(pathToPartitionInfo, filePath, IOPrepareCache.get().getPartitionDescMap()); } catch (AssertionError ae) { LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath() diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java index 0070c68..8ca2506 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java @@ -350,41 +350,41 @@ private static RecordUpdater getRecordUpdater(JobConf jc, .finalDestination(conf.getDestPath())); } - public static PartitionDesc getPartitionDescFromPathRecursively( - Map pathToPartitionInfo, Path dir, - Map, Map> cacheMap) - throws IOException { - return getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, - cacheMap, false); + public static T getFromPathRecursively(Map pathToPartitionInfo, Path dir, + Map, Map> cacheMap) throws IOException { + return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, false); } - public static PartitionDesc getPartitionDescFromPathRecursively( - Map pathToPartitionInfo, Path dir, - Map, Map> cacheMap, boolean ignoreSchema) - throws IOException { + public static T getFromPathRecursively(Map pathToPartitionInfo, Path dir, + Map, Map> cacheMap, boolean ignoreSchema) throws IOException { + return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, ignoreSchema, false); + } - PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir); + public static T getFromPathRecursively(Map pathToPartitionInfo, Path dir, + Map, Map> cacheMap, boolean ignoreSchema, boolean ifPresent) + throws IOException { + T part = getFromPath(pathToPartitionInfo, dir); if (part == null && (ignoreSchema || (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim().equals("")) || FileUtils.pathsContainNoScheme(pathToPartitionInfo.keySet()))) { - Map newPathToPartitionInfo = null; + Map newPathToPartitionInfo = null; if (cacheMap != null) { newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo); } if (newPathToPartitionInfo == null) { // still null - newPathToPartitionInfo = populateNewPartitionDesc(pathToPartitionInfo); + newPathToPartitionInfo = populateNewT(pathToPartitionInfo); if (cacheMap != null) { cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo); } } - part = doGetPartitionDescFromPath(newPathToPartitionInfo, dir); + part = getFromPath(newPathToPartitionInfo, dir); } - if (part != null) { + if (part != null || ifPresent) { return part; } else { throw new IOException("cannot find dir = " + dir.toString() @@ -392,18 +392,18 @@ public static PartitionDesc getPartitionDescFromPathRecursively( } } - private static Map populateNewPartitionDesc(Map pathToPartitionInfo) { - Map newPathToPartitionInfo = new HashMap<>(); - for (Map.Entry entry: pathToPartitionInfo.entrySet()) { - PartitionDesc partDesc = entry.getValue(); + private static Map populateNewT(Map pathToPartitionInfo) { + Map newPathToPartitionInfo = new HashMap<>(); + for (Map.Entry entry: pathToPartitionInfo.entrySet()) { + T partDesc = entry.getValue(); Path pathOnly = Path.getPathWithoutSchemeAndAuthority(entry.getKey()); newPathToPartitionInfo.put(pathOnly, partDesc); } return newPathToPartitionInfo; } - private static PartitionDesc doGetPartitionDescFromPath( - Map pathToPartitionInfo, Path dir) { + private static T getFromPath( + Map pathToPartitionInfo, Path dir) { // We first do exact match, and then do prefix matching. The latter is due to input dir // could be /dir/ds='2001-02-21'/part-03 where part-03 is not part of partition diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 5c9d289..c3b9190 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -25,20 +25,28 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.Map.Entry; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StringInternUtils; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; +import org.apache.hive.common.util.Ref; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; @@ -378,7 +386,7 @@ public RecordReader getRecordReader(InputSplit split, JobConf job, } boolean nonNative = false; - PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, hsplit.getPath(), null); if (LOG.isDebugEnabled()) { LOG.debug("Found spec for " + hsplit.getPath() + " " + part + " from " + pathToPartitionInfo); @@ -448,7 +456,15 @@ protected void init(JobConf job) { */ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, JobConf conf, InputFormat inputFormat, Class inputFormatClass, int splits, - TableDesc table, List result) throws IOException { + TableDesc table, List result) + throws IOException { + ValidTxnList validTxnList; + if (MetaStoreUtils.isInsertOnlyTable(table.getProperties())) { + String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY); + validTxnList = txnString == null ? new ValidReadTxnList() : new ValidReadTxnList(txnString); + } else { + validTxnList = null; // for non-MM case + } try { Utilities.copyTablePropertiesToConf(table, conf); @@ -460,7 +476,11 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job pushFilters(conf, tableScan); } - FileInputFormat.setInputPaths(conf, dirs.toArray(new Path[dirs.size()])); + Path[] finalDirs = processPathsForMmRead(dirs, conf, validTxnList); + if (finalDirs == null) { + return; // No valid inputs. + } + FileInputFormat.setInputPaths(conf, finalDirs); conf.setInputFormat(inputFormat.getClass()); int headerCount = 0; @@ -480,6 +500,63 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job } } + public static Path[] processPathsForMmRead(List dirs, JobConf conf, + ValidTxnList validTxnList) throws IOException { + if (validTxnList == null) { + return dirs.toArray(new Path[dirs.size()]); + } else { + List finalPaths = new ArrayList<>(dirs.size()); + for (Path dir : dirs) { + processForWriteIds(dir, conf, validTxnList, finalPaths); + } + if (finalPaths.isEmpty()) { + LOG.warn("No valid inputs found in " + dirs); + return null; + } + return finalPaths.toArray(new Path[finalPaths.size()]); + } + } + + private static void processForWriteIds(Path dir, JobConf conf, + ValidTxnList validTxnList, List finalPaths) throws IOException { + FileSystem fs = dir.getFileSystem(conf); + Utilities.LOG14535.warn("Checking " + dir + " (root) for inputs"); + // Ignore nullscan-optimized paths. + if (fs instanceof NullScanFileSystem) { + finalPaths.add(dir); + return; + } + + // Tez require the use of recursive input dirs for union processing, so we have to look into the + // directory to find out + LinkedList subdirs = new LinkedList<>(); + subdirs.add(dir); // add itself as a starting point + while (!subdirs.isEmpty()) { + Path currDir = subdirs.poll(); + FileStatus[] files = fs.listStatus(currDir); + boolean hadAcidState = false; // whether getAcidState has been called for currDir + for (FileStatus file : files) { + Path path = file.getPath(); + Utilities.LOG14535.warn("Checking " + path + " for inputs"); + if (!file.isDirectory()) { + Utilities.LOG14535.warn("Ignoring a file not in MM directory " + path); + } else if (JavaUtils.extractTxnId(path) == null) { + subdirs.add(path); + } else { + if (!hadAcidState) { + AcidUtils.Directory dirInfo = AcidUtils.getAcidState(currDir, conf, validTxnList, Ref.from(false), true, null); + hadAcidState = true; + // todo for IOW, we also need to count in base dir, if any + for (AcidUtils.ParsedDelta delta : dirInfo.getCurrentDirectories()) { + Utilities.LOG14535.info("Adding input " + delta.getPath()); + finalPaths.add(delta.getPath()); + } + } + } + } + } + } + Path[] getInputPaths(JobConf job) throws IOException { Path[] dirs; if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) { @@ -627,6 +704,9 @@ protected static PartitionDesc getPartitionDescFromPath( throws IOException { PartitionDesc partDesc = pathToPartitionInfo.get(dir); if (partDesc == null) { + // TODO: HiveFileFormatUtils.getPartitionDescFromPathRecursively for MM tables? + // So far, the only case when this is called for a MM directory was in error. + // Keep it like this for now; may need replacement if we find a valid usage like this. partDesc = pathToPartitionInfo.get(Path.getPathWithoutSchemeAndAuthority(dir)); } if (partDesc == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java index 94b9431..8d340df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java @@ -60,13 +60,13 @@ private Class internalInputFormat; public MergeFileWork(List inputPaths, Path outputDir, - String srcTblInputFormat) { - this(inputPaths, outputDir, false, srcTblInputFormat); + String srcTblInputFormat, TableDesc tbl) { + this(inputPaths, outputDir, false, srcTblInputFormat, tbl); } public MergeFileWork(List inputPaths, Path outputDir, boolean hasDynamicPartitions, - String srcTblInputFormat) { + String srcTblInputFormat, TableDesc tbl) { this.inputPaths = inputPaths; this.outputDir = outputDir; this.hasDynamicPartitions = hasDynamicPartitions; @@ -78,6 +78,7 @@ public MergeFileWork(List inputPaths, Path outputDir, this.internalInputFormat = RCFileBlockMergeInputFormat.class; } partDesc.setInputFileFormatClass(internalInputFormat); + partDesc.setTableDesc(tbl); for (Path path : this.inputPaths) { this.addPathToPartitionInfo(path, partDesc); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 69a9f9f..52c0c43 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1106,7 +1106,7 @@ public AcidDirInfo run() throws Exception { private AcidDirInfo callInternal() throws IOException { AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir, context.conf, - context.transactionList, useFileIds, true); + context.transactionList, useFileIds, true, null); Path base = dirInfo.getBaseDirectory(); // find the base files (original or new style) List baseFiles = new ArrayList(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index 38aaeed..6178de2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -14,12 +14,20 @@ package org.apache.hadoop.hive.ql.io.parquet; import java.io.IOException; +import java.util.Map; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.io.DataCache; import org.apache.hadoop.hive.common.io.FileMetadataCache; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.LlapCacheOnlyInputFormatInterface; +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.Utilities; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java index 855efa0..afa353f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -83,17 +84,19 @@ private void pushProjectionsAndFilters(final JobConf jobConf, } final Set aliases = new HashSet(); - final Iterator>> iterator = - mapWork.getPathToAliases().entrySet().iterator(); - - while (iterator.hasNext()) { - final Entry> entry = iterator.next(); - final String key = entry.getKey().toUri().getPath(); - if (splitPath.equals(key) || splitPathWithNoSchema.equals(key)) { - aliases.addAll(entry.getValue()); + try { + ArrayList a = HiveFileFormatUtils.getFromPathRecursively( + mapWork.getPathToAliases(), new Path(splitPath), null, false, true); + if (a != null) { + aliases.addAll(a); + } + if (a == null || a.isEmpty()) { + // TODO: not having aliases for path usually means some bug. Should it give up? + LOG.warn("Couldn't find aliases for " + splitPath); } + } catch (IllegalArgumentException | IOException e) { + throw new RuntimeException(e); } - // Collect the needed columns from all the aliases and create ORed filter // expression for the table. boolean allColumnsNeeded = false; @@ -183,7 +186,8 @@ public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path) throws IOException { updateMrWork(jobConf); // TODO: refactor this in HIVE-6366 final JobConf cloneJobConf = new JobConf(jobConf); - final PartitionDesc part = pathToPartitionInfo.get(path); + final PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( + pathToPartitionInfo, path, null, false, true); try { if ((part != null) && (part.getTableDesc() != null)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java index ad921f3..151da11 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java @@ -346,7 +346,7 @@ public static void main(String[] args) { QueryState queryState = new QueryState.Builder().withHiveConf(new HiveConf(conf, PartialScanTask.class)).build(); - PartialScanWork mergeWork = new PartialScanWork(inputPaths); + PartialScanWork mergeWork = new PartialScanWork(inputPaths, null); DriverContext driverCxt = new DriverContext(); PartialScanTask taskExec = new PartialScanTask(); taskExec.initialize(queryState, null, driverCxt, new CompilationOpContext()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java index 919cea0..d8ee7d2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.mapred.Mapper; /** @@ -47,15 +48,18 @@ private transient List inputPaths; private String aggKey; private String statsTmpDir; + private TableDesc tblDesc; public PartialScanWork() { } - public PartialScanWork(List inputPaths) { + public PartialScanWork(List inputPaths, TableDesc tblDesc) { super(); this.inputPaths = inputPaths; + this.tblDesc = tblDesc; PartitionDesc partDesc = new PartitionDesc(); partDesc.setInputFileFormatClass(RCFileBlockMergeInputFormat.class); + partDesc.setTableDesc(tblDesc); for(Path path: this.inputPaths) { this.addPathToPartitionInfo(path, partDesc); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java index 5dec791..82804b2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java @@ -371,7 +371,7 @@ LockState acquireLocks(QueryPlan plan, Context ctx, String username, boolean isB continue; } if(t != null) { - compBuilder.setIsAcid(AcidUtils.isAcidTable(t)); + compBuilder.setIsAcid(AcidUtils.isFullAcidTable(t)); } LockComponent comp = compBuilder.build(); LOG.debug("Adding lock component to lock request " + comp.toString()); @@ -435,7 +435,7 @@ Seems much cleaner if each stmt is identified as a particular HiveOperation (whi break; case INSERT: assert t != null; - if(AcidUtils.isAcidTable(t)) { + if(AcidUtils.isFullAcidTable(t)) { compBuilder.setShared(); } else { @@ -469,7 +469,7 @@ Seems much cleaner if each stmt is identified as a particular HiveOperation (whi output.getWriteType().toString()); } if(t != null) { - compBuilder.setIsAcid(AcidUtils.isAcidTable(t)); + compBuilder.setIsAcid(AcidUtils.isFullAcidTable(t)); } compBuilder.setIsDynamicPartitionWrite(output.isDynamicPartitionWrite()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index d661f10..a0b735c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -52,6 +52,9 @@ import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; + import javax.jdo.JDODataStoreException; import org.apache.calcite.plan.RelOptMaterialization; @@ -65,6 +68,7 @@ import org.apache.hadoop.hive.common.BlobStorageUtils; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveStatsUtils; +import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; @@ -158,7 +162,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -1633,12 +1636,13 @@ public Database getDatabaseCurrent() throws HiveException { * @throws HiveException */ public void loadPartition(Path loadPath, String tableName, - Map partSpec, boolean replace, - boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, - boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException { + Map partSpec, boolean replace, boolean inheritTableSpecs, + boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcid, + boolean hasFollowingStatsTask, Long txnId, int stmtId) + throws HiveException { Table tbl = getTable(tableName); loadPartition(loadPath, tbl, partSpec, replace, inheritTableSpecs, - isSkewedStoreAsSubdir, isSrcLocal, isAcid, hasFollowingStatsTask); + isSkewedStoreAsSubdir, isSrcLocal, isAcid, hasFollowingStatsTask, txnId, stmtId); } /** @@ -1666,12 +1670,12 @@ public void loadPartition(Path loadPath, String tableName, * true if there is a following task which updates the stats, so, this method need not update. * @return Partition object being loaded with data */ - public Partition loadPartition(Path loadPath, Table tbl, - Map partSpec, boolean replace, - boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, - boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException { - + public Partition loadPartition(Path loadPath, Table tbl, Map partSpec, + boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, + boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask, Long txnId, int stmtId) + throws HiveException { Path tblDataLocationPath = tbl.getDataLocation(); + boolean isMmTableWrite = MetaStoreUtils.isInsertOnlyTable(tbl.getParameters()); try { // Get the partition object if it already exists Partition oldPart = getPartition(tbl, partSpec, false); @@ -1711,21 +1715,45 @@ public Partition loadPartition(Path loadPath, Table tbl, List newFiles = null; PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin("MoveTask", "FileMoves"); - // If config is set, table is not temporary and partition being inserted exists, capture // the list of files added. For not yet existing partitions (insert overwrite to new partition // or dynamic partition inserts), the add partition event will capture the list of files added. if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && (null != oldPart)) { newFiles = Collections.synchronizedList(new ArrayList()); } - - if (replace || (oldPart == null && !isAcid)) { - boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); - replaceFiles(tbl.getPath(), loadPath, newPartPath, oldPartPath, getConf(), - isSrcLocal, isAutoPurge, newFiles); + // TODO: this assumes both paths are qualified; which they are, currently. + if (isMmTableWrite && loadPath.equals(newPartPath)) { + // MM insert query, move itself is a no-op. + Utilities.LOG14535.info("not moving " + loadPath + " to " + newPartPath + " (MM)"); + assert !isAcid; + if (areEventsForDmlNeeded(tbl, oldPart)) { + newFiles = listFilesCreatedByQuery(loadPath, txnId, stmtId); + } + Utilities.LOG14535.info("maybe deleting stuff from " + oldPartPath + " (new " + newPartPath + ") for replace"); + if (replace && oldPartPath != null) { + boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); + deleteOldPathForReplace(newPartPath, oldPartPath, getConf(), isAutoPurge, + new JavaUtils.IdPathFilter(txnId, stmtId, false, true), true, + tbl.isStoredAsSubDirectories() ? tbl.getSkewedColNames().size() : 0); + } } else { - FileSystem fs = tbl.getDataLocation().getFileSystem(conf); - Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles); + // Either a non-MM query, or a load into MM table from an external source. + PathFilter filter = FileUtils.HIDDEN_FILES_PATH_FILTER; + Path destPath = newPartPath; + if (isMmTableWrite) { + // We will load into MM directory, and delete from the parent if needed. + destPath = new Path(destPath, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); + filter = replace ? new JavaUtils.IdPathFilter(txnId, stmtId, false, true) : filter; + } + Utilities.LOG14535.info("moving " + loadPath + " to " + destPath); + if (replace || (oldPart == null && !isAcid)) { + boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); + replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, getConf(), + isSrcLocal, isAutoPurge, newFiles, filter, isMmTableWrite); + } else { + FileSystem fs = tbl.getDataLocation().getFileSystem(conf); + Hive.copyFiles(conf, loadPath, destPath, fs, isSrcLocal, isAcid, newFiles); + } } perfLogger.PerfLogEnd("MoveTask", "FileMoves"); Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath); @@ -1811,6 +1839,54 @@ public Partition loadPartition(Path loadPath, Table tbl, } } + + private boolean areEventsForDmlNeeded(Table tbl, Partition oldPart) { + return conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null; + } + + private List listFilesCreatedByQuery(Path loadPath, long txnId, int stmtId) throws HiveException { + List newFiles = new ArrayList(); + final String filePrefix = AcidUtils.deltaSubdir(txnId, txnId, stmtId); + FileStatus[] srcs; + FileSystem srcFs; + try { + srcFs = loadPath.getFileSystem(conf); + srcs = srcFs.listStatus(loadPath); + } catch (IOException e) { + LOG.error("Error listing files", e); + throw new HiveException(e); + } + if (srcs == null) { + LOG.info("No sources specified: " + loadPath); + return newFiles; + } + PathFilter subdirFilter = null; + + // TODO: just like the move path, we only do one level of recursion. + for (FileStatus src : srcs) { + if (src.isDirectory()) { + if (subdirFilter == null) { + subdirFilter = new PathFilter() { + @Override + public boolean accept(Path path) { + return path.getName().startsWith(filePrefix); + } + }; + } + try { + for (FileStatus srcFile : srcFs.listStatus(src.getPath(), subdirFilter)) { + newFiles.add(srcFile.getPath()); + } + } catch (IOException e) { + throw new HiveException(e); + } + } else if (src.getPath().getName().startsWith(filePrefix)) { + newFiles.add(src.getPath()); + } + } + return newFiles; + } + private void setStatsPropAndAlterPartition(boolean hasFollowingStatsTask, Table tbl, Partition newTPart) throws MetaException, TException { EnvironmentContext environmentContext = null; @@ -1836,8 +1912,11 @@ private void setStatsPropAndAlterPartition(boolean hasFollowingStatsTask, Table private void walkDirTree(FileStatus fSta, FileSystem fSys, Map, String> skewedColValueLocationMaps, Path newPartPath, SkewedInfo skewedInfo) throws IOException { + // TODO# HERE broken + /* Base Case. It's leaf. */ if (!fSta.isDir()) { + Utilities.LOG14535.info("Processing LB leaf " + fSta.getPath()); /* construct one location map if not exists. */ constructOneLBLocationMap(fSta, skewedColValueLocationMaps, newPartPath, skewedInfo); return; @@ -1846,6 +1925,7 @@ private void walkDirTree(FileStatus fSta, FileSystem fSys, /* dfs. */ FileStatus[] children = fSys.listStatus(fSta.getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); if (children != null) { + Utilities.LOG14535.info("Processing LB dir " + fSta.getPath()); for (FileStatus child : children) { walkDirTree(child, fSys, skewedColValueLocationMaps, newPartPath, skewedInfo); } @@ -1866,22 +1946,37 @@ private void constructOneLBLocationMap(FileStatus fSta, List skewedValue = new ArrayList(); String lbDirName = FileUtils.unescapePathName(lbdPath.toString()); String partDirName = FileUtils.unescapePathName(newPartPath.toString()); - String lbDirSuffix = lbDirName.replace(partDirName, ""); + String lbDirSuffix = lbDirName.replace(partDirName, ""); // TODO: wtf? + if (lbDirSuffix.startsWith(Path.SEPARATOR)) { + lbDirSuffix = lbDirSuffix.substring(1); + } String[] dirNames = lbDirSuffix.split(Path.SEPARATOR); - for (String dirName : dirNames) { - if ((dirName != null) && (dirName.length() > 0)) { - // Construct skewed-value to location map except default directory. - // why? query logic knows default-dir structure and don't need to get from map - if (!dirName - .equalsIgnoreCase(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME)) { - String[] kv = dirName.split("="); - if (kv.length == 2) { - skewedValue.add(kv[1]); - } + int keysFound = 0, dirsToTake = 0; + int colCount = skewedInfo.getSkewedColNames().size(); + while (dirsToTake < dirNames.length && keysFound < colCount) { + String dirName = dirNames[dirsToTake++]; + // Construct skewed-value to location map except default directory. + // why? query logic knows default-dir structure and don't need to get from map + if (dirName.equalsIgnoreCase(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME)) { + ++keysFound; + } else { + String[] kv = dirName.split("="); + if (kv.length == 2) { + skewedValue.add(kv[1]); + ++keysFound; + } else { + // TODO: we should really probably throw. Keep the existing logic for now. + LOG.warn("Skipping unknown directory " + dirName + + " when expecting LB keys or default directory (from " + lbDirName + ")"); } } } - if ((skewedValue.size() > 0) && (skewedValue.size() == skewedInfo.getSkewedColNames().size()) + for (int i = 0; i < (dirNames.length - dirsToTake); ++i) { + lbdPath = lbdPath.getParent(); + } + Utilities.LOG14535.info("Saving LB location " + lbdPath + " based on " + + colCount + " keys and " + fSta.getPath()); + if ((skewedValue.size() > 0) && (skewedValue.size() == colCount) && !skewedColValueLocationMaps.containsKey(skewedValue)) { skewedColValueLocationMaps.put(skewedValue, lbdPath.toString()); } @@ -1898,10 +1993,11 @@ private void constructOneLBLocationMap(FileStatus fSta, */ private Map, String> constructListBucketingLocationMap(Path newPartPath, SkewedInfo skewedInfo) throws IOException, FileNotFoundException { + Utilities.LOG14535.info("Constructing list bucketing map for " + newPartPath); Map, String> skewedColValueLocationMaps = new HashMap, String>(); FileSystem fSys = newPartPath.getFileSystem(conf); - walkDirTree(fSys.getFileStatus(newPartPath), fSys, skewedColValueLocationMaps, newPartPath, - skewedInfo); + walkDirTree(fSys.getFileStatus(newPartPath), + fSys, skewedColValueLocationMaps, newPartPath, skewedInfo); return skewedColValueLocationMaps; } @@ -1912,17 +2008,35 @@ private void constructOneLBLocationMap(FileStatus fSta, * @return Set of valid partitions * @throws HiveException */ - private Set getValidPartitionsInPath(int numDP, Path loadPath) throws HiveException { + private Set getValidPartitionsInPath( + int numDP, int numLB, Path loadPath, Long txnId, int stmtId, boolean isMmTable) throws HiveException { Set validPartitions = new HashSet(); try { FileSystem fs = loadPath.getFileSystem(conf); - FileStatus[] leafStatus = HiveStatsUtils.getFileStatusRecurse(loadPath, numDP, fs); - // Check for empty partitions - for (FileStatus s : leafStatus) { - if (!s.isDirectory()) { - throw new HiveException("partition " + s.getPath() + " is not a directory!"); + if (!isMmTable) { + FileStatus[] leafStatus = HiveStatsUtils.getFileStatusRecurse(loadPath, numDP, fs); + // Check for empty partitions + for (FileStatus s : leafStatus) { + if (!s.isDirectory()) { + throw new HiveException("partition " + s.getPath() + " is not a directory!"); + } + Path dpPath = s.getPath(); + Utilities.LOG14535.info("Found DP " + dpPath); + validPartitions.add(dpPath); + } + } else { + // The non-MM path only finds new partitions, as it is looking at the temp path. + // To produce the same effect, we will find all the partitions affected by this write ID. + Path[] leafStatus = Utilities.getMmDirectoryCandidates( + fs, loadPath, numDP, numLB, null, txnId, stmtId, conf); + for (Path p : leafStatus) { + Path dpPath = p.getParent(); // Skip the MM directory that we have found. + for (int i = 0; i < numLB; ++i) { + dpPath = dpPath.getParent(); // Now skip the LB directories, if any... + } + Utilities.LOG14535.info("Found DP " + dpPath); + validPartitions.add(dpPath); } - validPartitions.add(s.getPath()); } } catch (IOException e) { throw new HiveException(e); @@ -1960,7 +2074,7 @@ private void constructOneLBLocationMap(FileStatus fSta, */ public Map, Partition> loadDynamicPartitions(final Path loadPath, final String tableName, final Map partSpec, final boolean replace, - final int numDP, final boolean listBucketingEnabled, final boolean isAcid, final long txnId, + final int numDP, final int numLB, final boolean isAcid, final long txnId, final int stmtId, final boolean hasFollowingStatsTask, final AcidUtils.Operation operation) throws HiveException { @@ -1976,7 +2090,8 @@ private void constructOneLBLocationMap(FileStatus fSta, // Get all valid partition paths and existing partitions for them (if any) final Table tbl = getTable(tableName); - final Set validPartitions = getValidPartitionsInPath(numDP, loadPath); + final Set validPartitions = getValidPartitionsInPath(numDP, numLB, loadPath, txnId, stmtId, + MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())); final int partsToLoad = validPartitions.size(); final AtomicInteger partitionsLoaded = new AtomicInteger(0); @@ -1994,7 +2109,11 @@ private void constructOneLBLocationMap(FileStatus fSta, for(final Path partPath : validPartitions) { // generate a full partition specification final LinkedHashMap fullPartSpec = Maps.newLinkedHashMap(partSpec); - Warehouse.makeSpecFromName(fullPartSpec, partPath); + if (!Warehouse.makeSpecFromName( + fullPartSpec, partPath, new HashSet(partSpec.keySet()))) { + Utilities.LOG14535.warn("Ignoring invalid DP directory " + partPath); + continue; + } futures.add(pool.submit(new Callable() { @Override public Void call() throws Exception { @@ -2004,9 +2123,10 @@ public Void call() throws Exception { LOG.info("New loading path = " + partPath + " with partSpec " + fullPartSpec); // load the partition + Utilities.LOG14535.info("loadPartition called for DPP from " + partPath + " to " + tbl.getTableName()); Partition newPartition = loadPartition(partPath, tbl, fullPartSpec, - replace, true, listBucketingEnabled, - false, isAcid, hasFollowingStatsTask); + replace, true, numLB > 0, + false, isAcid, hasFollowingStatsTask, txnId, stmtId); partitionsMap.put(fullPartSpec, newPartition); if (inPlaceEligible) { @@ -2030,7 +2150,7 @@ public Void call() throws Exception { + " table=" + tbl.getTableName() + ", " + " partSpec=" + fullPartSpec + ", " + " replace=" + replace + ", " - + " listBucketingEnabled=" + listBucketingEnabled + ", " + + " listBucketingLevel=" + numLB + ", " + " isAcid=" + isAcid + ", " + " hasFollowingStatsTask=" + hasFollowingStatsTask, t); throw t; @@ -2097,26 +2217,46 @@ public Void call() throws Exception { * @param isAcid true if this is an ACID based write */ public void loadTable(Path loadPath, String tableName, boolean replace, boolean isSrcLocal, - boolean isSkewedStoreAsSubdir, boolean isAcid, boolean hasFollowingStatsTask) - throws HiveException { - + boolean isSkewedStoreAsSubdir, boolean isAcid, boolean hasFollowingStatsTask, + Long txnId, int stmtId, boolean isMmTable) throws HiveException { List newFiles = null; Table tbl = getTable(tableName); HiveConf sessionConf = SessionState.getSessionConf(); if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary()) { newFiles = Collections.synchronizedList(new ArrayList()); } - if (replace) { - Path tableDest = tbl.getPath(); - boolean isAutopurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); - replaceFiles(tableDest, loadPath, tableDest, tableDest, sessionConf, isSrcLocal, isAutopurge, newFiles); + // TODO: this assumes both paths are qualified; which they are, currently. + if (isMmTable && loadPath.equals(tbl.getPath())) { + Utilities.LOG14535.info("not moving " + loadPath + " to " + tbl.getPath()); + if (replace) { + Path tableDest = tbl.getPath(); + boolean isAutopurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); + deleteOldPathForReplace(tableDest, tableDest, sessionConf, isAutopurge, + new JavaUtils.IdPathFilter(txnId, stmtId, false, true), true, + tbl.isStoredAsSubDirectories() ? tbl.getSkewedColNames().size() : 0); + } + newFiles = listFilesCreatedByQuery(loadPath, txnId, stmtId); } else { - FileSystem fs; - try { - fs = tbl.getDataLocation().getFileSystem(sessionConf); - copyFiles(sessionConf, loadPath, tbl.getPath(), fs, isSrcLocal, isAcid, newFiles); - } catch (IOException e) { - throw new HiveException("addFiles: filesystem error in check phase", e); + // Either a non-MM query, or a load into MM table from an external source. + Path tblPath = tbl.getPath(), destPath = tblPath; + PathFilter filter = FileUtils.HIDDEN_FILES_PATH_FILTER; + if (isMmTable) { + // We will load into MM directory, and delete from the parent if needed. + destPath = new Path(destPath, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); + filter = replace ? new JavaUtils.IdPathFilter(txnId, stmtId, false, true) : filter; + } + Utilities.LOG14535.info("moving " + loadPath + " to " + tblPath + " (replace = " + replace + ")"); + if (replace) { + boolean isAutopurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); + replaceFiles(tblPath, loadPath, destPath, tblPath, + sessionConf, isSrcLocal, isAutopurge, newFiles, filter, isMmTable); + } else { + try { + FileSystem fs = tbl.getDataLocation().getFileSystem(sessionConf); + copyFiles(sessionConf, loadPath, destPath, fs, isSrcLocal, isAcid, newFiles); + } catch (IOException e) { + throw new HiveException("addFiles: filesystem error in check phase", e); + } } } if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { @@ -2577,7 +2717,6 @@ public boolean dropPartition(String dbName, String tableName, List partV * @param tbl * object for which partition is needed * @return list of partition objects - * @throws HiveException */ public List getPartitions(Table tbl) throws HiveException { if (tbl.isPartitioned()) { @@ -3244,15 +3383,15 @@ public static boolean moveFile(final HiveConf conf, Path srcf, final Path destf, HdfsUtils.HadoopFileStatus destStatus = null; - // If source path is a subdirectory of the destination path: + // If source path is a subdirectory of the destination path (or the other way around): // ex: INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300; // where the staging directory is a subdirectory of the destination directory // (1) Do not delete the dest dir before doing the move operation. // (2) It is assumed that subdir and dir are in same encryption zone. // (3) Move individual files from scr dir to dest dir. - boolean destIsSubDir = isSubDir(srcf, destf, srcFs, destFs, isSrcLocal); + boolean srcIsSubDirOfDest = isSubDir(srcf, destf, srcFs, destFs, isSrcLocal), + destIsSubDirOfSrc = isSubDir(destf, srcf, destFs, srcFs, false); final String msg = "Unable to move source " + srcf + " to destination " + destf; - try { if (replace) { try{ @@ -3262,7 +3401,7 @@ public static boolean moveFile(final HiveConf conf, Path srcf, final Path destf, //if replace is false, rename (mv) actually move the src under dest dir //if destf is an existing file, rename is actually a replace, and do not need // to delete the file first - if (replace && !destIsSubDir) { + if (replace && !srcIsSubDirOfDest) { destFs.delete(destf, true); LOG.debug("The path " + destf.toString() + " is deleted"); } @@ -3284,13 +3423,17 @@ public static boolean moveFile(final HiveConf conf, Path srcf, final Path destf, replace, // overwrite destination conf); } else { - if (destIsSubDir) { + if (srcIsSubDirOfDest || destIsSubDirOfSrc) { FileStatus[] srcs = destFs.listStatus(srcf, FileUtils.HIDDEN_FILES_PATH_FILTER); List> futures = new LinkedList<>(); final ExecutorService pool = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25) > 0 ? Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Move-Thread-%d").build()) : null; + if (destIsSubDirOfSrc && !destFs.exists(destf)) { + Utilities.LOG14535.info("Creating " + destf); + destFs.mkdirs(destf); + } /* Move files one by one because source is a subdirectory of destination */ for (final FileStatus srcStatus : srcs) { @@ -3587,7 +3730,8 @@ private static void moveAcidFiles(String deltaFileType, PathFilter pathFilter, F * Output the list of new files replaced in the destination path */ protected void replaceFiles(Path tablePath, Path srcf, Path destf, Path oldPath, HiveConf conf, - boolean isSrcLocal, boolean purge, List newFiles) throws HiveException { + boolean isSrcLocal, boolean purge, List newFiles, PathFilter deletePathFilter, + boolean isMmTable) throws HiveException { try { FileSystem destFs = destf.getFileSystem(conf); @@ -3606,41 +3750,9 @@ protected void replaceFiles(Path tablePath, Path srcf, Path destf, Path oldPath, } if (oldPath != null) { - boolean oldPathDeleted = false; - boolean isOldPathUnderDestf = false; - FileStatus[] statuses = null; - try { - FileSystem oldFs = oldPath.getFileSystem(conf); - - // Do not delete oldPath if: - // - destf is subdir of oldPath - isOldPathUnderDestf = isSubDir(oldPath, destf, oldFs, destFs, false); - if (isOldPathUnderDestf) { - // if oldPath is destf or its subdir, its should definitely be deleted, otherwise its - // existing content might result in incorrect (extra) data. - // But not sure why we changed not to delete the oldPath in HIVE-8750 if it is - // not the destf or its subdir? - if (conf.getBoolVar(HiveConf.ConfVars.REPLCMENABLED)) { - recycleDirToCmPath(oldPath, purge); - } - statuses = oldFs.listStatus(oldPath, FileUtils.HIDDEN_FILES_PATH_FILTER); - oldPathDeleted = trashFiles(oldFs, statuses, conf, purge); - } - } catch (IOException e) { - if (isOldPathUnderDestf) { - // if oldPath is a subdir of destf but it could not be cleaned - throw new HiveException("Directory " + oldPath.toString() - + " could not be cleaned up.", e); - } else { - //swallow the exception since it won't affect the final result - LOG.warn("Directory " + oldPath.toString() + " cannot be cleaned: " + e, e); - } - } - if (statuses != null && statuses.length > 0) { - if (!oldPathDeleted) { - throw new HiveException("Destination directory " + destf + " has not be cleaned up."); - } - } + // TODO: we assume lbLevels is 0 here. Same as old code for non-MM. + // For MM tables, this can only be a LOAD command. Does LOAD even support LB? + deleteOldPathForReplace(destf, oldPath, conf, purge, deletePathFilter, isMmTable, 0); } // first call FileUtils.mkdir to make sure that destf directory exists, if not, it creates @@ -3685,6 +3797,69 @@ protected void replaceFiles(Path tablePath, Path srcf, Path destf, Path oldPath, } } + private void deleteOldPathForReplace(Path destPath, Path oldPath, HiveConf conf, boolean purge, + PathFilter pathFilter, boolean isMmTable, int lbLevels) throws HiveException { + Utilities.LOG14535.info("Deleting old paths for replace in " + destPath + " and old path " + oldPath); + boolean isOldPathUnderDestf = false; + try { + FileSystem oldFs = oldPath.getFileSystem(conf); + FileSystem destFs = destPath.getFileSystem(conf); + // if oldPath is destf or its subdir, its should definitely be deleted, otherwise its + // existing content might result in incorrect (extra) data. + // But not sure why we changed not to delete the oldPath in HIVE-8750 if it is + // not the destf or its subdir? + isOldPathUnderDestf = isSubDir(oldPath, destPath, oldFs, destFs, false); + if (isOldPathUnderDestf || isMmTable) { + if (lbLevels == 0 || !isMmTable) { + cleanUpOneDirectoryForReplace(oldPath, oldFs, pathFilter, conf, purge); + } else { + // We need to clean up different MM IDs from each LB directory separately. + // Avoid temporary directories in the immediate table/part dir. + // TODO: we could just find directories with any MM directories inside? + // the rest doesn't have to be cleaned up. + String mask = "[^._]*"; + for (int i = 0; i < lbLevels - 1; ++i) { + mask += Path.SEPARATOR + "*"; + } + Path glob = new Path(oldPath, mask); + FileStatus[] lbDirs = oldFs.globStatus(glob); + for (FileStatus lbDir : lbDirs) { + Path lbPath = lbDir.getPath(); + if (!lbDir.isDirectory()) { + throw new HiveException("Unexpected path during overwrite: " + lbPath); + } + Utilities.LOG14535.info("Cleaning up LB directory " + lbPath); + cleanUpOneDirectoryForReplace(lbPath, oldFs, pathFilter, conf, purge); + } + } + } + } catch (IOException e) { + if (isOldPathUnderDestf || isMmTable) { + // if oldPath is a subdir of destf but it could not be cleaned + throw new HiveException("Directory " + oldPath.toString() + + " could not be cleaned up.", e); + } else { + //swallow the exception since it won't affect the final result + LOG.warn("Directory " + oldPath.toString() + " cannot be cleaned: " + e, e); + } + } + } + + + private void cleanUpOneDirectoryForReplace(Path path, FileSystem fs, + PathFilter pathFilter, HiveConf conf, boolean purge) throws IOException, HiveException { + FileStatus[] statuses = fs.listStatus(path, pathFilter); + if (statuses == null || statuses.length == 0) return; + String s = "Deleting files under " + path + " for replace: "; + for (FileStatus file : statuses) { + s += file.getPath().getName() + ", "; + } + Utilities.LOG14535.info(s); + if (!trashFiles(fs, statuses, conf, purge)) { + throw new HiveException("Old path " + path + " has not been cleaned up."); + } + } + /** * Trashes or deletes all files under a directory. Leaves the directory as is. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 75179ab..44026fa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -373,6 +373,7 @@ public void setBucketCount(int newBucketNum) { // TODO: add test case and clean it up @SuppressWarnings("nls") public Path getBucketPath(int bucketNum) { + // Note: this makes assumptions that won't work with MM tables, unions, etc. FileStatus srcs[] = getSortedPaths(); if (srcs == null) { return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index a53f774..6282548 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -1026,5 +1026,4 @@ public void setTableSpec(TableSpec tableSpec) { public boolean hasDeserializer() { return deserializer != null; } - }; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java index d795a19..ee8c249 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java @@ -248,7 +248,7 @@ public void showTableStatus(DataOutputStream out, Hive db, HiveConf conf, * @param tblPath not NULL * @throws IOException */ - // Duplicates logic in TextMetaDataFormatter + // Duplicates logic in TextMetaDataFormatter TODO: wtf?!! private void putFileSystemsStats(MapBuilder builder, List locations, HiveConf conf, Path tblPath) throws IOException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java index dcecb0e..41bbdfa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata.formatting; import java.io.DataOutputStream; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; @@ -306,28 +307,30 @@ public void showTableStatus(DataOutputStream outStream, } catch (IOException e) { throw new HiveException(e); } - } + } + + private static class FileData { + public long totalFileSize = 0; + public long maxFileSize = 0; + public long minFileSize = Long.MAX_VALUE; + public long lastAccessTime = 0; + public long lastUpdateTime = 0; + public int numOfFiles = 0; + } + // TODO: why is this in text formatter? grrr private void writeFileSystemStats(DataOutputStream outStream, HiveConf conf, List locations, - Path tblPath, boolean partSpecified, int indent) - throws IOException - { - long totalFileSize = 0; - long maxFileSize = 0; - long minFileSize = Long.MAX_VALUE; - long lastAccessTime = 0; - long lastUpdateTime = 0; - int numOfFiles = 0; - + Path tblPath, boolean partSpecified, int indent) throws IOException { + FileData fd = new FileData(); boolean unknown = false; FileSystem fs = tblPath.getFileSystem(conf); // in case all files in locations do not exist try { FileStatus tmpStatus = fs.getFileStatus(tblPath); - lastAccessTime = tmpStatus.getAccessTime(); - lastUpdateTime = tmpStatus.getModificationTime(); + fd.lastAccessTime = tmpStatus.getAccessTime(); + fd.lastUpdateTime = tmpStatus.getModificationTime(); if (partSpecified) { // check whether the part exists or not in fs tmpStatus = fs.getFileStatus(locations.get(0)); @@ -341,43 +344,13 @@ private void writeFileSystemStats(DataOutputStream outStream, if (!unknown) { for (Path loc : locations) { try { - FileStatus status = fs.getFileStatus(tblPath); - FileStatus[] files = fs.listStatus(loc); - long accessTime = status.getAccessTime(); - long updateTime = status.getModificationTime(); + FileStatus status = fs.getFileStatus(loc); // no matter loc is the table location or part location, it must be a // directory. - if (!status.isDir()) { + if (!status.isDirectory()) { continue; } - if (accessTime > lastAccessTime) { - lastAccessTime = accessTime; - } - if (updateTime > lastUpdateTime) { - lastUpdateTime = updateTime; - } - for (FileStatus currentStatus : files) { - if (currentStatus.isDir()) { - continue; - } - numOfFiles++; - long fileLen = currentStatus.getLen(); - totalFileSize += fileLen; - if (fileLen > maxFileSize) { - maxFileSize = fileLen; - } - if (fileLen < minFileSize) { - minFileSize = fileLen; - } - accessTime = currentStatus.getAccessTime(); - updateTime = currentStatus.getModificationTime(); - if (accessTime > lastAccessTime) { - lastAccessTime = accessTime; - } - if (updateTime > lastUpdateTime) { - lastUpdateTime = updateTime; - } - } + processDir(status, fs, fd); } catch (IOException e) { // ignore } @@ -389,29 +362,29 @@ private void writeFileSystemStats(DataOutputStream outStream, outStream.write(Utilities.INDENT.getBytes("UTF-8")); } outStream.write("totalNumberFiles:".getBytes("UTF-8")); - outStream.write((unknown ? unknownString : "" + numOfFiles).getBytes("UTF-8")); + outStream.write((unknown ? unknownString : "" + fd.numOfFiles).getBytes("UTF-8")); outStream.write(terminator); for (int k = 0; k < indent; k++) { outStream.write(Utilities.INDENT.getBytes("UTF-8")); } outStream.write("totalFileSize:".getBytes("UTF-8")); - outStream.write((unknown ? unknownString : "" + totalFileSize).getBytes("UTF-8")); + outStream.write((unknown ? unknownString : "" + fd.totalFileSize).getBytes("UTF-8")); outStream.write(terminator); for (int k = 0; k < indent; k++) { outStream.write(Utilities.INDENT.getBytes("UTF-8")); } outStream.write("maxFileSize:".getBytes("UTF-8")); - outStream.write((unknown ? unknownString : "" + maxFileSize).getBytes("UTF-8")); + outStream.write((unknown ? unknownString : "" + fd.maxFileSize).getBytes("UTF-8")); outStream.write(terminator); for (int k = 0; k < indent; k++) { outStream.write(Utilities.INDENT.getBytes("UTF-8")); } outStream.write("minFileSize:".getBytes("UTF-8")); - if (numOfFiles > 0) { - outStream.write((unknown ? unknownString : "" + minFileSize).getBytes("UTF-8")); + if (fd.numOfFiles > 0) { + outStream.write((unknown ? unknownString : "" + fd.minFileSize).getBytes("UTF-8")); } else { outStream.write((unknown ? unknownString : "" + 0).getBytes("UTF-8")); } @@ -421,17 +394,53 @@ private void writeFileSystemStats(DataOutputStream outStream, outStream.write(Utilities.INDENT.getBytes("UTF-8")); } outStream.write("lastAccessTime:".getBytes("UTF-8")); - outStream.writeBytes((unknown || lastAccessTime < 0) ? unknownString : "" - + lastAccessTime); + outStream.writeBytes((unknown || fd.lastAccessTime < 0) ? unknownString : "" + + fd.lastAccessTime); outStream.write(terminator); for (int k = 0; k < indent; k++) { outStream.write(Utilities.INDENT.getBytes("UTF-8")); } outStream.write("lastUpdateTime:".getBytes("UTF-8")); - outStream.write((unknown ? unknownString : "" + lastUpdateTime).getBytes("UTF-8")); + outStream.write((unknown ? unknownString : "" + fd.lastUpdateTime).getBytes("UTF-8")); outStream.write(terminator); - } + } + + private void processDir(FileStatus status, FileSystem fs, FileData fd) throws IOException { + Utilities.LOG14535.info("Processing dir for status: " + status.getPath()); + long accessTime = status.getAccessTime(); + long updateTime = status.getModificationTime(); + if (accessTime > fd.lastAccessTime) { + fd.lastAccessTime = accessTime; + } + if (updateTime > fd.lastUpdateTime) { + fd.lastUpdateTime = updateTime; + } + FileStatus[] files = fs.listStatus(status.getPath()); + for (FileStatus currentStatus : files) { + if (currentStatus.isDirectory()) { + processDir(currentStatus, fs, fd); + continue; + } + fd.numOfFiles++; + long fileLen = currentStatus.getLen(); + fd.totalFileSize += fileLen; + if (fileLen > fd.maxFileSize) { + fd.maxFileSize = fileLen; + } + if (fileLen < fd.minFileSize) { + fd.minFileSize = fileLen; + } + accessTime = currentStatus.getAccessTime(); + updateTime = currentStatus.getModificationTime(); + if (accessTime > fd.lastAccessTime) { + fd.lastAccessTime = accessTime; + } + if (updateTime > fd.lastUpdateTime) { + fd.lastUpdateTime = updateTime; + } + } + } /** * Show the table partitions. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java index a0bc19f..02bdd91 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java @@ -34,10 +34,12 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -262,6 +264,10 @@ protected boolean checkConvertBucketMapJoin( } Table tbl = tso.getConf().getTableMetadata(); + if (MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())) { + Utilities.LOG14535.debug("No bucketed join on MM table " + tbl.getTableName()); + return false; + } if (tbl.isPartitioned()) { PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(alias, tso); List partitions = prunedParts.getNotDeniedPartns(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java index 2b4c3a8..09c3079 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java @@ -26,6 +26,7 @@ import java.util.Stack; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -236,6 +237,7 @@ private void storeBucketPathMapping(TableScanOperator tsOp, FileStatus[] srcs) { Map bucketFileNameMapping = new HashMap(); for (int pos = 0; pos < srcs.length; pos++) { if (ShimLoader.getHadoopShims().isDirectory(srcs[pos])) { + // TODO# HERE throw new RuntimeException("Was expecting '" + srcs[pos].getPath() + "' to be bucket file."); } bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos); @@ -407,12 +409,16 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - if(stack.get(0) instanceof TableScanOperator) { + if (stack.get(0) instanceof TableScanOperator) { TableScanOperator tso = ((TableScanOperator)stack.get(0)); - if(AcidUtils.isAcidTable(tso.getConf().getTableMetadata())) { + Table tab = tso.getConf().getTableMetadata(); + if (AcidUtils.isFullAcidTable(tab)) { /*ACID tables have complex directory layout and require merging of delta files * on read thus we should not try to read bucket files directly*/ return null; + } else if (MetaStoreUtils.isInsertOnlyTable(tab.getParameters())) { + // Do not support MM tables either at this point. We could do it with some extra logic. + return null; } } // Support for dynamic partitions can be added later @@ -455,6 +461,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List sourceTableSortCols = new ArrayList(); op = op.getParentOperators().get(0); + boolean isSrcMmTable = false; while (true) { if (!(op instanceof TableScanOperator) && !(op instanceof FilterOperator) && @@ -503,6 +510,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, assert !useBucketSortPositions; TableScanOperator ts = (TableScanOperator) op; Table srcTable = ts.getConf().getTableMetadata(); + // Not supported for MM tables for now. + if (MetaStoreUtils.isInsertOnlyTable(destTable.getParameters())) { + return null; + } + // Find the positions of the bucketed columns in the table corresponding // to the select list. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 9297a0b..78d1e54 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; @@ -195,7 +196,8 @@ private void handlePartialScanCommand(TableScanOperator op, GenMRProcContext ctx aggregationKey = aggregationKeyBuffer.toString(); // scan work - PartialScanWork scanWork = new PartialScanWork(inputPaths); + PartialScanWork scanWork = new PartialScanWork(inputPaths, + Utilities.getTableDesc(op.getConf().getTableMetadata())); scanWork.setMapperCannotSpanPartns(true); scanWork.setAggKey(aggregationKey); scanWork.setStatsTmpDir(op.getConf().getTmpStatsDir(), parseCtx.getConf()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index da153e3..02e6512 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -97,6 +97,7 @@ import org.apache.hadoop.hive.ql.plan.FileMergeDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc; +import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -678,7 +679,7 @@ public static void setMapWork(MapWork plan, ParseContext parseCtx, Set 0, fsInput.getCompilationOpContext()); if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { @@ -1351,38 +1364,41 @@ public static void createMRWorkForMergingFiles (FileSinkOperator fsInput, cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"); // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't // know if merge MR2 will be triggered at execution time - Task mvTask = GenMapRedUtils.findMoveTask(mvTasks, fsOutput); + MoveWork dummyMv = null; + if (srcMmWriteId == null) { + // Only create the movework for non-MM table. No action needed for a MM table. + Utilities.LOG14535.info("creating dummy movetask for merge (with lfd)"); + dummyMv = new MoveWork(null, null, null, + new LoadFileDesc(inputDirName, finalName, true, null, null, false), false); + } else { + // TODO# create the noop MoveWork to avoid q file changes for now. Should be removed. + dummyMv = new MoveWork(null, null, null, + new LoadFileDesc(inputDirName, finalName, true, null, null, false), false); + dummyMv.setNoop(true); + } + // Use the original fsOp path here in case of MM - while the new FSOP merges files inside the + // MM directory, the original MoveTask still commits based on the parent. Note that this path + // can only be triggered for a merge that's part of insert for now; MM tables do not support + // concatenate. Keeping the old logic for non-MM tables with temp directories and stuff. + // TODO# is this correct? + Path fsopPath = srcMmWriteId != null ? fsInputDesc.getFinalDirName() : finalName; + Utilities.LOG14535.info("Looking for MoveTask to make it dependant on the conditional tasks"); + + Task mvTask = GenMapRedUtils.findMoveTaskForFsopOutput( + mvTasks, fsopPath, fsInputDesc.isMmTable()); + // TODO# questionable master merge here ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work, - fsInputDesc.getFinalDirName(), finalName, mvTask, dependencyTask); + fsInputDesc.getMergeInputDirName(), finalName, mvTask, dependencyTask); // keep the dynamic partition context in conditional task resolver context ConditionalResolverMergeFilesCtx mrCtx = (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx(); mrCtx.setDPCtx(fsInputDesc.getDynPartCtx()); mrCtx.setLbCtx(fsInputDesc.getLbCtx()); - } - - /** - * Make the move task in the GenMRProcContext following the FileSinkOperator a dependent of all - * possible subtrees branching from the ConditionalTask. - * - * @param newOutput - * @param cndTsk - * @param mvTasks - * @param hconf - * @param dependencyTask - */ - public static void linkMoveTask(FileSinkOperator newOutput, - ConditionalTask cndTsk, List> mvTasks, HiveConf hconf, - DependencyCollectionTask dependencyTask) { - - Task mvTask = GenMapRedUtils.findMoveTask(mvTasks, newOutput); - for (Task tsk : cndTsk.getListTasks()) { - linkMoveTask(mvTask, tsk, hconf, dependencyTask); - } } + /** * Follows the task tree down from task and makes all leaves parents of mvTask * @@ -1391,7 +1407,7 @@ public static void linkMoveTask(FileSinkOperator newOutput, * @param hconf * @param dependencyTask */ - public static void linkMoveTask(Task mvTask, + private static void linkMoveTask(Task mvTask, Task task, HiveConf hconf, DependencyCollectionTask dependencyTask) { @@ -1544,11 +1560,12 @@ private static MapWork createMRWorkForMergingFiles (HiveConf conf, TableScanOperator topOp, FileSinkDesc fsDesc) { ArrayList aliases = new ArrayList(); - Path inputDir = StringInternUtils.internUriStringsInPath(fsDesc.getFinalDirName()); + Path inputDir = StringInternUtils.internUriStringsInPath(fsDesc.getMergeInputDirName()); String inputDirStr = inputDir.toString().intern(); TableDesc tblDesc = fsDesc.getTableInfo(); aliases.add(inputDirStr); // dummy alias: just use the input path + Utilities.LOG14535.info("createMRWorkForMergingFiles for " + inputDir); // constructing the default MapredWork MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf); MapWork cplan = cMrPlan.getMapWork(); @@ -1573,8 +1590,9 @@ private static MapWork createMRWorkForMergingFiles (HiveConf conf, */ public static MapWork createMergeTask(FileSinkDesc fsInputDesc, Path finalName, boolean hasDynamicPartitions, CompilationOpContext ctx) throws SemanticException { + + Path inputDir = fsInputDesc.getMergeInputDirName(); - Path inputDir = fsInputDesc.getFinalDirName(); TableDesc tblDesc = fsInputDesc.getTableInfo(); List inputDirs = new ArrayList(1); @@ -1598,9 +1616,10 @@ public static MapWork createMergeTask(FileSinkDesc fsInputDesc, Path finalName, + " format other than RCFile or ORCFile"); } + Utilities.LOG14535.info("creating mergefilework from " + inputDirs + " to " + finalName); // create the merge file work MergeFileWork work = new MergeFileWork(inputDirs, finalName, - hasDynamicPartitions, tblDesc.getInputFileFormatClass().getName()); + hasDynamicPartitions, tblDesc.getInputFileFormatClass().getName(), tblDesc); LinkedHashMap> pathToAliases = new LinkedHashMap<>(); pathToAliases.put(inputDir, inputDirstr); work.setMapperCannotSpanPartns(true); @@ -1620,6 +1639,10 @@ public static MapWork createMergeTask(FileSinkDesc fsInputDesc, Path finalName, } else { fmd = new OrcFileMergeDesc(); } + fmd.setIsMmTable(fsInputDesc.isMmTable()); + fmd.setTxnId(fsInputDesc.getTransactionId()); + int stmtId = fsInputDesc.getStatementId(); + fmd.setStmtId(stmtId == -1 ? 0 : stmtId); fmd.setDpCtx(fsInputDesc.getDynPartCtx()); fmd.setOutputPath(finalName); fmd.setHasDynamicPartitions(work.hasDynamicPartitions()); @@ -1721,18 +1744,22 @@ protected static MoveWork mergeMovePaths(Path condInputPath, MoveWork linkedMove * a dependency task that may be linked to the conditional sub-tasks * @return The conditional task */ + @SuppressWarnings("unchecked") private static ConditionalTask createCondTask(HiveConf conf, - Task currTask, MoveWork dummyMoveWork, Serializable mergeWork, - Path condInputPath, Path condOutputPath, Task moveTaskToLink, DependencyCollectionTask dependencyTask) { + Task currTask, MoveWork mvWork, Serializable mergeWork, + Path condInputPath, Path condOutputPath, Task moveTaskToLink, + DependencyCollectionTask dependencyTask) { + Utilities.LOG14535.info("Creating conditional merge task for " + condInputPath); + // Create a dummy task if no move is needed. + Serializable moveWork = mvWork != null ? mvWork : new DependencyCollectionWork(); + // TODO: this should never happen for mm tables. boolean shouldMergeMovePaths = (moveTaskToLink != null && dependencyTask == null && shouldMergeMovePaths(conf, condInputPath, condOutputPath, moveTaskToLink.getWork())); - MoveWork workForMoveOnlyTask; + Serializable workForMoveOnlyTask = moveWork; if (shouldMergeMovePaths) { workForMoveOnlyTask = mergeMovePaths(condInputPath, moveTaskToLink.getWork()); - } else { - workForMoveOnlyTask = dummyMoveWork; } // There are 3 options for this ConditionalTask: @@ -1741,10 +1768,12 @@ private static ConditionalTask createCondTask(HiveConf conf, // 3) Merge some partitions and move other partitions (i.e. merge some partitions and don't // merge others) in this case the merge is done first followed by the move to prevent // conflicts. + // TODO: if we are not dealing with concatenate DDL, we should not create a merge+move path + // because it should be impossible to get incompatible outputs. Task mergeOnlyMergeTask = TaskFactory.get(mergeWork, conf); Task moveOnlyMoveTask = TaskFactory.get(workForMoveOnlyTask, conf); Task mergeAndMoveMergeTask = TaskFactory.get(mergeWork, conf); - Task mergeAndMoveMoveTask = TaskFactory.get(dummyMoveWork, conf); + Task mergeAndMoveMoveTask = TaskFactory.get(moveWork, conf); // NOTE! It is necessary merge task is the parent of the move task, and not // the other way around, for the proper execution of the execute method of @@ -1785,6 +1814,7 @@ private static ConditionalTask createCondTask(HiveConf conf, addDependentMoveTasks(moveTaskToLink, conf, moveOnlyMoveTask, dependencyTask); } + addDependentMoveTasks(moveTaskToLink, conf, mergeOnlyMergeTask, dependencyTask); addDependentMoveTasks(moveTaskToLink, conf, mergeAndMoveMoveTask, dependencyTask); @@ -1802,20 +1832,28 @@ public static boolean isSkewedStoredAsDirs(FileSinkDesc fsInputDesc) { .isSkewedStoredAsDir(); } - public static Task findMoveTask( - List> mvTasks, FileSinkOperator fsOp) { + public static Task findMoveTaskForFsopOutput( + List> mvTasks, Path fsopFinalDir, boolean isMmFsop) { // find the move task for (Task mvTsk : mvTasks) { MoveWork mvWork = mvTsk.getWork(); + if (mvWork.isNoop()) continue; Path srcDir = null; + boolean isLfd = false; if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourcePath(); + isLfd = true; + if (isMmFsop) { + srcDir = srcDir.getParent(); + } } else if (mvWork.getLoadTableWork() != null) { - srcDir = mvWork.getLoadTableWork().getSourcePath(); + srcDir = mvWork.getLoadTableWork().getSourcePath(); // TODO# THIS } + Utilities.LOG14535.info("Observing MoveWork " + System.identityHashCode(mvWork) + + " with " + srcDir + "(from " + (isLfd ? "LFD" : "LTD") + ") while looking for " + + fsopFinalDir + "(mm = " + isMmFsop + ")"); - if ((srcDir != null) - && (srcDir.equals(fsOp.getConf().getFinalDirName()))) { + if ((srcDir != null) && srcDir.equals(fsopFinalDir)) { return mvTsk; } } @@ -1824,70 +1862,62 @@ public static boolean isSkewedStoredAsDirs(FileSinkDesc fsInputDesc) { /** * Returns true iff the fsOp requires a merge - * @param mvTasks - * @param hconf - * @param fsOp - * @param currTask - * @param isInsertTable - * @return */ - public static boolean isMergeRequired(List> mvTasks, HiveConf hconf, FileSinkOperator fsOp, - Task currTask, boolean isInsertTable) { - + public static boolean isMergeRequired(List> mvTasks, HiveConf hconf, + FileSinkOperator fsOp, Task currTask, boolean isInsertTable) { // Has the user enabled merging of files for map-only jobs or for all jobs - if ((mvTasks != null) && (!mvTasks.isEmpty())) { - - // no need of merging if the move is to a local file system - MoveTask mvTask = (MoveTask) GenMapRedUtils.findMoveTask(mvTasks, fsOp); - - if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) - && !fsOp.getConf().isMaterialization()) { - // mark the MapredWork and FileSinkOperator for gathering stats - fsOp.getConf().setGatherStats(true); - fsOp.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); - if (!mvTask.hasFollowingStatsTask()) { - GenMapRedUtils.addStatsTask(fsOp, mvTask, currTask, hconf); - } + if (mvTasks == null || mvTasks.isEmpty()) return false; + + // no need of merging if the move is to a local file system + // We are looking based on the original FSOP, so use the original path as is. + MoveTask mvTask = (MoveTask) GenMapRedUtils.findMoveTaskForFsopOutput( + mvTasks, fsOp.getConf().getFinalDirName(), fsOp.getConf().isMmTable()); + + // TODO: wtf? wtf?!! why is this in this method? + if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) + && !fsOp.getConf().isMaterialization()) { + // mark the MapredWork and FileSinkOperator for gathering stats + fsOp.getConf().setGatherStats(true); + fsOp.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); + if (!mvTask.hasFollowingStatsTask()) { + GenMapRedUtils.addStatsTask(fsOp, mvTask, currTask, hconf); } + } - if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) { + if (mvTask == null || mvTask.isLocal() || !fsOp.getConf().canBeMerged()) return false; - if (currTask.getWork() instanceof TezWork) { - // tez blurs the boundary between map and reduce, thus it has it's own - // config - return hconf.getBoolVar(ConfVars.HIVEMERGETEZFILES); - } else if (currTask.getWork() instanceof SparkWork) { - // spark has its own config for merging - return hconf.getBoolVar(ConfVars.HIVEMERGESPARKFILES); - } + if (currTask.getWork() instanceof TezWork) { + // tez blurs the boundary between map and reduce, thus it has it's own config + return hconf.getBoolVar(ConfVars.HIVEMERGETEZFILES); + } else if (currTask.getWork() instanceof SparkWork) { + // spark has its own config for merging + return hconf.getBoolVar(ConfVars.HIVEMERGESPARKFILES); + } + return isMergeRequiredForMr(hconf, fsOp, currTask); + } - if (fsOp.getConf().isLinkedFileSink()) { - // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the - // number of reducers are few, so the number of files anyway are small. - // However, with this optimization, we are increasing the number of files - // possibly by a big margin. So, merge aggresively. - if (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || - hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)) { - return true; - } - } else { - // There are separate configuration parameters to control whether to - // merge for a map-only job - // or for a map-reduce job - if (currTask.getWork() instanceof MapredWork) { - ReduceWork reduceWork = ((MapredWork) currTask.getWork()).getReduceWork(); - boolean mergeMapOnly = - hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && reduceWork == null; - boolean mergeMapRed = - hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && - reduceWork != null; - if (mergeMapOnly || mergeMapRed) { - return true; - } - } else { - return false; - } - } + private static boolean isMergeRequiredForMr(HiveConf hconf, + FileSinkOperator fsOp, Task currTask) { + if (fsOp.getConf().isLinkedFileSink()) { + // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the + // number of reducers are few, so the number of files anyway are small. + // However, with this optimization, we are increasing the number of files + // possibly by a big margin. So, merge aggresively. + return (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || + hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)); + } + // There are separate configuration parameters to control whether to + // merge for a map-only job + // or for a map-reduce job + if (currTask.getWork() instanceof MapredWork) { + ReduceWork reduceWork = ((MapredWork) currTask.getWork()).getReduceWork(); + boolean mergeMapOnly = + hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && reduceWork == null; + boolean mergeMapRed = + hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && + reduceWork != null; + if (mergeMapOnly || mergeMapRed) { + return true; } } return false; @@ -1911,33 +1941,39 @@ public static Path createMoveTask(Task currTask, boolean Path dest = null; + FileSinkDesc fileSinkDesc = fsOp.getConf(); + boolean isMmTable = fileSinkDesc.isMmTable(); if (chDir) { - FileSinkDesc fileSinkDesc = fsOp.getConf(); - dest = fileSinkDesc.getFinalDirName(); - - // generate the temporary file - // it must be on the same file system as the current destination - Context baseCtx = parseCtx.getContext(); - - // Create the required temporary file in the HDFS location if the destination - // path of the FileSinkOperator table is a blobstore path. - Path tmpDir = baseCtx.getTempDirForPath(fileSinkDesc.getDestPath(), true); - - // Change all the linked file sink descriptors - if (fileSinkDesc.isLinkedFileSink()) { - for (FileSinkDesc fsConf:fileSinkDesc.getLinkedFileSinkDesc()) { - fsConf.setParentDir(tmpDir); - fsConf.setDirName(new Path(tmpDir, fsConf.getDirName().getName())); + dest = fileSinkDesc.getMergeInputDirName(); + if (!isMmTable) { + // generate the temporary file + // it must be on the same file system as the current destination + Context baseCtx = parseCtx.getContext(); + + // Create the required temporary file in the HDFS location if the destination + // path of the FileSinkOperator table is a blobstore path. + Path tmpDir = baseCtx.getTempDirForPath(fileSinkDesc.getDestPath(), true); + + // Change all the linked file sink descriptors + if (fileSinkDesc.isLinkedFileSink()) { + for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) { + fsConf.setDirName(new Path(tmpDir, fsConf.getDirName().getName())); + Utilities.LOG14535.info("createMoveTask setting tmpDir for LinkedFileSink chDir " + fsConf.getDirName() + "; dest was " + fileSinkDesc.getDestPath()); + } + } else { + fileSinkDesc.setDirName(tmpDir); + Utilities.LOG14535.info("createMoveTask setting tmpDir chDir " + tmpDir + "; dest was " + fileSinkDesc.getDestPath()); } - } else { - fileSinkDesc.setDirName(tmpDir); } } Task mvTask = null; if (!chDir) { - mvTask = GenMapRedUtils.findMoveTask(mvTasks, fsOp); + // TODO# is it correct to always use MM dir in MM case here? Where does MoveTask point? + Utilities.LOG14535.info("Looking for MoveTask from createMoveTask"); + mvTask = GenMapRedUtils.findMoveTaskForFsopOutput( + mvTasks, fsOp.getConf().getFinalDirName(), fsOp.getConf().isMmTable()); } // Set the move task to be dependent on the current task diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java index dd679db..9d2e031 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java @@ -32,6 +32,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; @@ -190,7 +191,9 @@ public static NodeProcessor getDefaultProc() { String fullScanMsg = ""; // check if input pruning is possible - if (sampleDescr.getInputPruning()) { + // TODO: this relies a lot on having one file per bucket. No support for MM tables for now. + boolean isMmTable = MetaStoreUtils.isInsertOnlyTable(part.getTable().getParameters()); + if (sampleDescr.getInputPruning() && !isMmTable) { LOG.trace("numerator = " + num); LOG.trace("denominator = " + den); LOG.trace("bucket count = " + bucketCount); @@ -217,7 +220,7 @@ public static NodeProcessor getDefaultProc() { } } else { // need to do full scan - fullScanMsg = "Tablesample not on clustered columns"; + fullScanMsg = isMmTable ? "MM table" : "Tablesample not on clustered columns"; } LOG.warn(fullScanMsg + ", using full table scan"); Path[] ret = part.getPath(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index 7b2a1a9..0fe1ea3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -342,6 +342,12 @@ else if (getGbyKeyType(cgbyOp) == GbyKeyType.CONSTANT && rowCnt == 0) { return null; // todo we can collapse this part of tree into single TS } + /* TODO# seems to be removed in master? + Table tbl = tsOp.getConf().getTableMetadata(); + if (AcidUtils.isFullAcidTable(tbl)) { + Logger.info("Table " + tbl.getTableName() + " is ACID table. Skip StatsOptimizer."); + return null; + }*/ List oneRow = new ArrayList(); Hive hive = Hive.get(pctx.getConf()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java index 391cfda..d3e360e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java @@ -49,11 +49,14 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Operator factory for the rule processors for inferring bucketing/sorting columns. */ public class BucketingSortingOpProcFactory { + private static final Logger LOG = LoggerFactory.getLogger(BucketingSortingOpProcFactory.class); public static class DefaultInferrer implements NodeProcessor { @@ -460,6 +463,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx; FileSinkOperator fop = (FileSinkOperator)nd; + if (fop.getConf().isMmTable()) { + // See the comment inside updatePartitionBucketSortColumns. + LOG.warn("Currently, inferring buckets is not going to work for MM tables (by design)."); + } Operator parent = getParent(stack); List bucketCols = bctx.getBucketedCols(parent); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java index e0ccd04..3a3f513 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java @@ -256,7 +256,7 @@ public static void processSkewJoin(JoinOperator joinOp, Operator ts = GenMapRedUtils.createTemporaryTableScanOperator( joinOp.getCompilationOpContext(), rowSchemaList.get((byte)k)); - ((TableScanOperator)ts).setTableDesc(tableDescList.get((byte)k)); + ((TableScanOperator)ts).setTableDescSkewJoin(tableDescList.get((byte)k)); parentOps[k] = ts; } Operator tblScan_op = parentOps[i]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java index 6b9d5b5..4e31ba6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java @@ -231,7 +231,7 @@ public static void processSkewJoin(JoinOperator joinOp, Task ts = GenMapRedUtils.createTemporaryTableScanOperator( joinOp.getCompilationOpContext(), rowSchemaList.get((byte) k)); - ((TableScanOperator) ts).setTableDesc(tableDescList.get((byte) k)); + ((TableScanOperator) ts).setTableDescSkewJoin(tableDescList.get((byte) k)); parentOps[k] = ts; } @@ -363,7 +363,7 @@ private static void insertSHTS(byte tag, TableScanOperator tableScan, MapWork bi HashTableDummyDesc desc = new HashTableDummyDesc(); HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get( tableScan.getCompilationOpContext(), desc); - dummyOp.getConf().setTbl(tableScan.getTableDesc()); + dummyOp.getConf().setTbl(tableScan.getTableDescSkewJoin()); MapJoinOperator mapJoinOp = (MapJoinOperator) tableScan.getChildOperators().get(0); mapJoinOp.replaceParent(tableScan, dummyOp); List> mapJoinChildren = diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java index 9ca815c..af3175e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java @@ -209,7 +209,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object.. if (parent.getSchema() == null) { if (parent instanceof TableScanOperator) { - tbl = ((TableScanOperator) parent).getTableDesc(); + tbl = ((TableScanOperator) parent).getTableDescSkewJoin(); } else { throw new SemanticException("Expected parent operator of type TableScanOperator." + "Found " + parent.getClass().getName() + " instead."); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java index 2def168..5b89059 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SamplingOptimizer.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimizer.physical; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; @@ -27,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; @@ -59,6 +61,18 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { if (!(operator instanceof TableScanOperator)) { continue; } + TableScanOperator tsop = (TableScanOperator)operator; + Table tbl = tsop.getConf().getTableMetadata(); + if (tbl == null) { + continue; + } + if (MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())) { + // Not supported for MM tables - sampler breaks separate MM dirs into splits, resulting in + // mismatch when the downstream task looks at them again assuming they are MM table roots. + // We could somehow unset the MM flag for the main job when the sampler succeeds, since the + // sampler will limit the input to the the correct directories, but we don't care about MR. + continue; + } ReduceSinkOperator child = OperatorUtils.findSingleOperator(operator, ReduceSinkOperator.class); if (child == null || diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java index f48d118..2f9783e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java @@ -38,7 +38,10 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.LoadFileDesc; +import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.slf4j.Logger; /** * An implementation of PhysicalPlanResolver. It iterator each task with a rule @@ -46,6 +49,8 @@ * it will try to add a conditional task associated a list of skew join tasks. */ public class SkewJoinResolver implements PhysicalPlanResolver { + private final static Logger LOG = org.slf4j.LoggerFactory.getLogger(SkewJoinResolver.class); + @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { Dispatcher disp = new SkewJoinTaskDispatcher(pctx); @@ -78,8 +83,26 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) return null; } - SkewJoinProcCtx skewJoinProcContext = new SkewJoinProcCtx(task, - physicalContext.getParseContext()); + ParseContext pc = physicalContext.getParseContext(); + if (pc.getLoadTableWork() != null) { + for (LoadTableDesc ltd : pc.getLoadTableWork()) { + if (!ltd.isMmTable()) continue; + // See the path in FSOP that calls fs.exists on finalPath. + LOG.debug("Not using skew join because the destination table " + + ltd.getTable().getTableName() + " is an insert_only table"); + return null; + } + } + if (pc.getLoadFileWork() != null) { + for (LoadFileDesc lfd : pc.getLoadFileWork()) { + if (!lfd.isMmCtas()) continue; + LOG.debug("Not using skew join because the destination table " + + lfd.getDestinationCreateTable() + " is an insert_only table"); + return null; + } + } + + SkewJoinProcCtx skewJoinProcContext = new SkewJoinProcCtx(task, pc); Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("R1", diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 13d7730..87e4ee8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1005,6 +1005,7 @@ private boolean canUseRowDeserializeFor(String inputFileFormatClassName) { setOperatorIssue("Alias " + alias + " not present in aliases " + aliases); return new ImmutablePair(false, false); } + // TODO: should this use getPartitionDescFromPathRecursively? PartitionDesc partDesc = pathToPartitionInfo.get(path); if (partDesc.getVectorPartitionDesc() != null) { // We've seen this already. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java index 81e99fc..e036cd7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java @@ -118,7 +118,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // get potential reentrant index queries from each index Map queryContexts = new HashMap(); // make sure we have an index on the table being scanned - TableDesc tblDesc = operator.getTableDesc(); + TableDesc tblDesc = operator.getTableDescSkewJoin(); Map> indexesByType = new HashMap>(); for (Index indexOnTable : indexes) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java index 2a7f3d4..3a38a6d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -221,7 +222,7 @@ private void pushOperatorsAboveUnion(UnionOperator union, FileSinkDesc fileSinkDesc = (FileSinkDesc) fileSinkOp.getConf().clone(); fileSinkDesc.setDirName(new Path(parentDirName, parent.getIdentifier())); fileSinkDesc.setLinkedFileSink(true); - fileSinkDesc.setParentDir(parentDirName); + Utilities.LOG14535.info("Created LinkedFileSink for union " + fileSinkDesc.getDirName() + "; parent " + parentDirName); parent.setChildOperators(null); Operator tmpFileSinkOp = OperatorFactory.getAndMakeChild(fileSinkDesc, parent.getSchema(), parent); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTablePartMergeFilesDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTablePartMergeFilesDesc.java index bdb872a..7670b86 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTablePartMergeFilesDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTablePartMergeFilesDesc.java @@ -25,6 +25,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -39,6 +40,7 @@ private List inputDir = new ArrayList(); private Path outputDir = null; private Class inputFormatClass; + private TableDesc tableDesc; public AlterTablePartMergeFilesDesc(String tableName, HashMap partSpec) { @@ -102,4 +104,11 @@ public void setInputFormatClass(Class inputFormatClass) { this.inputFormatClass = inputFormatClass; } + public void setTableDesc(TableDesc tableDesc) { + this.tableDesc = tableDesc; + } + + public TableDesc getTableDesc() { + return tableDesc; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 3ad30c4..fb5d566 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -120,7 +120,8 @@ */ protected Set acidFileSinks = new HashSet(); - // whether any ACID table is involved in a query + // whether any ACID table or Insert-only (mm) table is involved in a query + // They both require DbTxnManager and both need to recordValidTxns when acquiring locks in Driver protected boolean acidInQuery; public static final int HIVE_COLUMN_ORDER_ASC = 1; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 230ca47..65c7477 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; import org.apache.hadoop.hive.metastore.api.SkewedInfo; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; @@ -182,6 +183,7 @@ private final Set reservedPartitionValues; private final HiveAuthorizationTaskFactory hiveAuthorizationTaskFactory; + private WriteEntity alterTableOutput; static { TokenToTypeName.put(HiveParser.TOK_BOOLEAN, serdeConstants.BOOLEAN_TYPE_NAME); @@ -687,6 +689,7 @@ private void analyzeShowRolePrincipals(ASTNode ast) throws SemanticException { } private void analyzeShowRoles(ASTNode ast) throws SemanticException { + @SuppressWarnings("unchecked") Task roleDDLTask = (Task) hiveAuthorizationTaskFactory .createShowRolesTask(ast, ctx.getResFile(), getInputs(), getOutputs()); @@ -984,6 +987,10 @@ private void analyzeTruncateTable(ASTNode ast) throws SemanticException { if (indexes != null && indexes.size() > 0) { throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_INDEXED_TABLE.getMsg()); } + // It would be possible to support this, but this is such a pointless command. + if (MetaStoreUtils.isInsertOnlyTable(table.getParameters())) { + throw new SemanticException("Truncating MM table columns not presently supported"); + } List bucketCols = null; Class inputFormatClass = null; @@ -1082,10 +1089,10 @@ private void analyzeTruncateTable(ASTNode ast) throws SemanticException { Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc); truncateTblDesc.setOutputDir(queryTmpdir); LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, - partSpec == null ? new HashMap() : partSpec); + partSpec == null ? new HashMap() : partSpec, null); ltd.setLbCtx(lbCtx); - Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), - conf); + @SuppressWarnings("unchecked") + Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf); truncateTask.addDependentTask(moveTsk); // Recalculate the HDFS stats if auto gather stats is set @@ -1434,10 +1441,11 @@ private void analyzeAlterTableProps(String[] qualified, HashMap alterTblDesc.setEnvironmentContext(environmentContext); alterTblDesc.setOldName(tableName); - addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc); + boolean isPotentialMmSwitch = mapProp.containsKey(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL) + || mapProp.containsKey(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); + addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc, isPotentialMmSwitch); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), - alterTblDesc), conf)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); } private void analyzeAlterTableSerdeProps(ASTNode ast, String tableName, @@ -1497,16 +1505,21 @@ private void analyzeAlterTableFileFormat(ASTNode ast, String tableName, private void addInputsOutputsAlterTable(String tableName, Map partSpec, AlterTableTypes op) throws SemanticException { - addInputsOutputsAlterTable(tableName, partSpec, null, op); + addInputsOutputsAlterTable(tableName, partSpec, null, op, false); + } + + private void addInputsOutputsAlterTable(String tableName, Map partSpec, + AlterTableDesc desc, boolean doForceExclusive) throws SemanticException { + addInputsOutputsAlterTable(tableName, partSpec, desc, desc.getOp(), doForceExclusive); } private void addInputsOutputsAlterTable(String tableName, Map partSpec, AlterTableDesc desc) throws SemanticException { - addInputsOutputsAlterTable(tableName, partSpec, desc, desc.getOp()); + addInputsOutputsAlterTable(tableName, partSpec, desc, desc.getOp(), false); } private void addInputsOutputsAlterTable(String tableName, Map partSpec, - AlterTableDesc desc, AlterTableTypes op) throws SemanticException { + AlterTableDesc desc, AlterTableTypes op, boolean doForceExclusive) throws SemanticException { boolean isCascade = desc != null && desc.getIsCascade(); boolean alterPartitions = partSpec != null && !partSpec.isEmpty(); //cascade only occurs at table level then cascade to partition level @@ -1523,11 +1536,13 @@ private void addInputsOutputsAlterTable(String tableName, Map pa } // Determine the lock type to acquire - WriteEntity.WriteType writeType = WriteEntity.determineAlterTableWriteType(op); + WriteEntity.WriteType writeType = doForceExclusive + ? WriteType.DDL_EXCLUSIVE : WriteEntity.determineAlterTableWriteType(op); if (!alterPartitions) { inputs.add(new ReadEntity(tab)); - outputs.add(new WriteEntity(tab, writeType)); + alterTableOutput = new WriteEntity(tab, writeType); + outputs.add(alterTableOutput); //do not need the lock for partitions since they are covered by the table lock if (isCascade) { for (Partition part : getPartitions(tab, partSpec, false)) { @@ -1616,6 +1631,11 @@ private void analyzeAlterTablePartMergeFiles(ASTNode ast, try { tblObj = getTable(tableName); + // TODO: we should probably block all ACID tables here. + if (MetaStoreUtils.isInsertOnlyTable(tblObj.getParameters())) { + throw new SemanticException("Merge is not supported for MM tables"); + } + mergeDesc.setTableDesc(Utilities.getTableDesc(tblObj)); List bucketCols = null; Class inputFormatClass = null; @@ -1703,11 +1723,11 @@ private void analyzeAlterTablePartMergeFiles(ASTNode ast, TableDesc tblDesc = Utilities.getTableDesc(tblObj); Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc); mergeDesc.setOutputDir(queryTmpdir); + // No need to handle MM tables - unsupported path. LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, - partSpec == null ? new HashMap<>() : partSpec); + partSpec == null ? new HashMap<>() : partSpec, null); ltd.setLbCtx(lbCtx); - Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), - conf); + Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf); mergeTask.addDependentTask(moveTsk); if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java index 76331fc..0e3c185 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java @@ -145,7 +145,8 @@ private EximUtil() { */ public static URI getValidatedURI(HiveConf conf, String dcPath) throws SemanticException { try { - boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE); + boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE) + || conf.getBoolVar(HiveConf.ConfVars.HIVEEXIMTESTMODE); URI uri = new Path(dcPath).toUri(); FileSystem fs = FileSystem.get(uri, conf); // Get scheme from FileSystem @@ -201,7 +202,8 @@ static void validateTable(org.apache.hadoop.hive.ql.metadata.Table table) throws public static String relativeToAbsolutePath(HiveConf conf, String location) throws SemanticException { try { - boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE); + boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE) + || conf.getBoolVar(HiveConf.ConfVars.HIVEEXIMTESTMODE);; if (testMode) { URI uri = new Path(location).toUri(); FileSystem fs = FileSystem.get(uri, conf); @@ -211,6 +213,9 @@ public static String relativeToAbsolutePath(HiveConf conf, String location) if (!path.startsWith("/")) { path = (new Path(System.getProperty("test.tmp.dir"), path)).toUri().getPath(); } + if (StringUtils.isEmpty(scheme)) { + scheme = "pfile"; + } try { uri = new URI(scheme, authority, path, null, null); } catch (URISyntaxException e) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java index b8c6ea9..54ee7ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java @@ -18,11 +18,45 @@ Licensed to the Apache Software Foundation (ASF) under one package org.apache.hadoop.hive.ql.parse; + +import org.apache.hadoop.hive.ql.metadata.HiveException; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.Serializable; +import java.net.URI; +import java.util.HashSet; +import java.util.List; + +import org.antlr.runtime.tree.Tree; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.QueryState; +import org.apache.hadoop.hive.ql.exec.ReplCopyTask; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.antlr.runtime.tree.Tree; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.PartitionIterable; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.CopyWork; +import org.slf4j.Logger; import org.apache.hadoop.hive.ql.parse.repl.dump.TableExport; /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 1b0a2f0..8f94679 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -315,8 +315,8 @@ public static void removeUnionOperators(GenTezProcContext context, BaseWork work linked.add(desc); desc.setDirName(new Path(path, "" + linked.size())); + Utilities.LOG14535.info("removing union - new desc with " + desc.getDirName() + "; parent " + path); desc.setLinkedFileSink(true); - desc.setParentDir(path); desc.setLinkedFileSinkDesc(linked); } @@ -383,6 +383,7 @@ public static void processFileSink(GenTezProcContext context, FileSinkOperator f // If underlying data is RCFile or OrcFile, RCFileBlockMerge task or // OrcFileStripeMerge task would be created. LOG.info("using CombineHiveInputformat for the merge job"); + Utilities.LOG14535.info("will generate MR work for merging files from " + fileSink.getConf().getDirName() + " to " + finalName); GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, context.dependencyTask, context.moveTask, hconf, context.currentTask); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index aa4c660..a41cc8b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -49,11 +49,13 @@ import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; +import org.apache.hadoop.hive.ql.exec.ImportCommitWork; import org.apache.hadoop.hive.ql.exec.ReplCopyTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -62,6 +64,8 @@ import org.apache.hadoop.hive.ql.parse.repl.load.MetaData; import org.apache.hadoop.hive.ql.parse.repl.load.UpdatedMetaDataTracker; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; +import org.apache.hadoop.hive.ql.plan.CopyWork; +import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.ImportTableDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; @@ -131,6 +135,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { parsedTableName = dbTablePair.getValue(); // get partition metadata if partition specified if (child.getChildCount() == 2) { + @SuppressWarnings("unused") // TODO: wtf? ASTNode partspec = (ASTNode) child.getChild(1); isPartSpecSet = true; parsePartitionSpec(child, parsedPartSpec); @@ -229,6 +234,7 @@ public static boolean prepareImport(boolean isImportCmd, } catch (Exception e) { throw new HiveException(e); } + boolean isSourceMm = MetaStoreUtils.isInsertOnlyTable(tblDesc.getTblProps()); if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){ tblDesc.setReplicationSpec(replicationSpec); @@ -236,6 +242,9 @@ public static boolean prepareImport(boolean isImportCmd, } if (isExternalSet){ + if (isSourceMm) { + throw new SemanticException("Cannot import an MM table as external"); + } tblDesc.setExternal(isExternalSet); // This condition-check could have been avoided, but to honour the old // default of not calling if it wasn't set, we retain that behaviour. @@ -303,16 +312,24 @@ public static boolean prepareImport(boolean isImportCmd, tableExists = true; } - if (!replicationSpec.isInReplicationScope()){ + Long txnId = SessionState.get().getTxnMgr().getCurrentTxnId(); + int stmtId = 0; + //todo due to the master merge, tblDesc is no longer CreateTableDesc, but ImportTableDesc + /* + if (txnId != null) { + tblDesc.setInitialMmWriteId(txnId); + } + */ + if (!replicationSpec.isInReplicationScope()) { createRegularImportTasks( tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, - fromURI, fs, wh, x); + fromURI, fs, wh, x, txnId, stmtId, isSourceMm); } else { createReplImportTasks( tblDesc, partitionDescs, replicationSpec, waitOnPrecursor, table, - fromURI, fs, wh, x, updatedMetadata); + fromURI, fs, wh, x, txnId, stmtId, isSourceMm, updatedMetadata); } return tableExists; } @@ -344,16 +361,37 @@ private static ImportTableDesc getBaseCreateTableDescFromTable(String dbName, return tblDesc; } + private static Task loadTable(URI fromURI, Table table, boolean replace, Path tgtPath, - ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x) { + ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x, + Long txnId, int stmtId, boolean isSourceMm) { Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME); - Path tmpPath = x.getCtx().getExternalTmpPath(tgtPath); - Task copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, dataPath, tmpPath, x.getConf()); - LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, - Utilities.getTableDesc(table), new TreeMap(), - replace); - Task loadTableTask = TaskFactory.get(new MoveWork(x.getInputs(), - x.getOutputs(), loadTableWork, null, false), x.getConf()); + Path destPath = !MetaStoreUtils.isInsertOnlyTable(table.getParameters()) ? x.getCtx().getExternalTmpPath(tgtPath) + : new Path(tgtPath, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); + Utilities.LOG14535.info("adding import work for table with source location: " + + dataPath + "; table: " + tgtPath + "; copy destination " + destPath + "; mm " + + txnId + " (src " + isSourceMm + ") for " + (table == null ? "a new table" : table.getTableName())); + + Task copyTask = null; + if (replicationSpec.isInReplicationScope()) { + if (isSourceMm || txnId != null) { + // TODO: ReplCopyTask is completely screwed. Need to support when it's not as screwed. + throw new RuntimeException( + "Not supported right now because Replication is completely screwed"); + } + copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, dataPath, destPath, x.getConf()); + } else { + CopyWork cw = new CopyWork(dataPath, destPath, false); + cw.setSkipSourceMmDirs(isSourceMm); + copyTask = TaskFactory.get(cw, x.getConf()); + } + + LoadTableDesc loadTableWork = new LoadTableDesc(destPath, + Utilities.getTableDesc(table), new TreeMap(), replace, txnId); + loadTableWork.setTxnId(txnId); + loadTableWork.setStmtId(stmtId); + MoveWork mv = new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false); + Task loadTableTask = TaskFactory.get(mv, x.getConf()); copyTask.addDependentTask(loadTableTask); x.getTasks().add(copyTask); return loadTableTask; @@ -390,14 +428,16 @@ private static ImportTableDesc getBaseCreateTableDescFromTable(String dbName, } private static Task addSinglePartition(URI fromURI, FileSystem fs, ImportTableDesc tblDesc, - Table table, Warehouse wh, - AddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x) + Table table, Warehouse wh, AddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec, + EximUtil.SemanticAnalyzerWrapperContext x, Long txnId, int stmtId, boolean isSourceMm, + Task commitTask) throws MetaException, IOException, HiveException { AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0); if (tblDesc.isExternal() && tblDesc.getLocation() == null) { x.getLOG().debug("Importing in-place: adding AddPart for partition " + partSpecToString(partSpec.getPartSpec())); // addPartitionDesc already has the right partition location + @SuppressWarnings("unchecked") Task addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc), x.getConf()); return addPartTask; @@ -408,21 +448,47 @@ private static ImportTableDesc getBaseCreateTableDescFromTable(String dbName, + partSpecToString(partSpec.getPartSpec()) + " with source location: " + srcLocation); Path tgtLocation = new Path(partSpec.getLocation()); - Path tmpPath = x.getCtx().getExternalTmpPath(tgtLocation); - Task copyTask = ReplCopyTask.getLoadCopyTask( - replicationSpec, new Path(srcLocation), tmpPath, x.getConf()); + Path destPath = !MetaStoreUtils.isInsertOnlyTable(table.getParameters()) ? x.getCtx().getExternalTmpPath(tgtLocation) + : new Path(tgtLocation, AcidUtils.deltaSubdir(txnId, txnId, stmtId)); + Path moveTaskSrc = !MetaStoreUtils.isInsertOnlyTable(table.getParameters()) ? destPath : tgtLocation; + Utilities.LOG14535.info("adding import work for partition with source location: " + + srcLocation + "; target: " + tgtLocation + "; copy dest " + destPath + "; mm " + + txnId + " (src " + isSourceMm + ") for " + partSpecToString(partSpec.getPartSpec())); + + + Task copyTask = null; + if (replicationSpec.isInReplicationScope()) { + if (isSourceMm || txnId != null) { + // TODO: ReplCopyTask is completely screwed. Need to support when it's not as screwed. + throw new RuntimeException( + "Not supported right now because Replication is completely screwed"); + } + copyTask = ReplCopyTask.getLoadCopyTask( + replicationSpec, new Path(srcLocation), destPath, x.getConf()); + } else { + CopyWork cw = new CopyWork(new Path(srcLocation), destPath, false); + cw.setSkipSourceMmDirs(isSourceMm); + copyTask = TaskFactory.get(cw, x.getConf()); + } + Task addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc), x.getConf()); - LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, - Utilities.getTableDesc(table), - partSpec.getPartSpec(), replicationSpec.isReplace()); + LoadTableDesc loadTableWork = new LoadTableDesc(moveTaskSrc, Utilities.getTableDesc(table), + partSpec.getPartSpec(), replicationSpec.isReplace(), txnId); + loadTableWork.setTxnId(txnId); + loadTableWork.setStmtId(stmtId); loadTableWork.setInheritTableSpecs(false); + // Do not commit the write ID from each task; need to commit once. + // TODO: we should just change the import to use a single MoveTask, like dynparts. + loadTableWork.setIntermediateInMmWrite(txnId != null); Task loadPartTask = TaskFactory.get(new MoveWork( - x.getInputs(), x.getOutputs(), loadTableWork, null, false), - x.getConf()); + x.getInputs(), x.getOutputs(), loadTableWork, null, false), x.getConf()); copyTask.addDependentTask(loadPartTask); addPartTask.addDependentTask(loadPartTask); x.getTasks().add(copyTask); + if (commitTask != null) { + loadPartTask.addDependentTask(commitTask); + } return addPartTask; } } @@ -589,13 +655,11 @@ public static void checkTable(Table table, ImportTableDesc tableDesc, Class replaced = HiveFileFormatUtils .getOutputFormatSubstitute(origin); if (replaced == null) { - throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE - .getMsg()); + throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); } importedofc = replaced.getCanonicalName(); } catch(Exception e) { - throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE - .getMsg()); + throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); } if ((!existingifc.equals(importedifc)) || (!existingofc.equals(importedofc))) { @@ -711,36 +775,36 @@ private static String checkParams(Map map1, * @param wh */ private static void createRegularImportTasks( - ImportTableDesc tblDesc, - List partitionDescs, - boolean isPartSpecSet, - ReplicationSpec replicationSpec, - Table table, URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x) + ImportTableDesc tblDesc, List partitionDescs, boolean isPartSpecSet, + ReplicationSpec replicationSpec, Table table, URI fromURI, FileSystem fs, Warehouse wh, + EximUtil.SemanticAnalyzerWrapperContext x, Long txnId, int stmtId, boolean isSourceMm) throws HiveException, URISyntaxException, IOException, MetaException { - if (table != null){ + if (table != null) { if (table.isPartitioned()) { x.getLOG().debug("table partitioned"); + Task ict = createImportCommitTask( + table.getDbName(), table.getTableName(), txnId, stmtId, x.getConf(), + MetaStoreUtils.isInsertOnlyTable(table.getParameters())); for (AddPartitionDesc addPartitionDesc : partitionDescs) { Map partSpec = addPartitionDesc.getPartition(0).getPartSpec(); org.apache.hadoop.hive.ql.metadata.Partition ptn = null; if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) { x.getTasks().add(addSinglePartition( - fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x)); + fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, txnId, stmtId, isSourceMm, ict)); } else { throw new SemanticException( ErrorMsg.PARTITION_EXISTS.getMsg(partSpecToString(partSpec))); } } - } else { x.getLOG().debug("table non-partitioned"); // ensure if destination is not empty only for regular import Path tgtPath = new Path(table.getDataLocation().toString()); FileSystem tgtFs = FileSystem.get(tgtPath.toUri(), x.getConf()); checkTargetLocationEmpty(tgtFs, tgtPath, replicationSpec, x.getLOG()); - loadTable(fromURI, table, false, tgtPath, replicationSpec,x); + loadTable(fromURI, table, false, tgtPath, replicationSpec, x, txnId, stmtId, isSourceMm); } // Set this to read because we can't overwrite any existing partitions x.getOutputs().add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK)); @@ -756,9 +820,12 @@ private static void createRegularImportTasks( x.getOutputs().add(new WriteEntity(parentDb, WriteEntity.WriteType.DDL_SHARED)); if (isPartitioned(tblDesc)) { + Task ict = createImportCommitTask( + tblDesc.getDatabaseName(), tblDesc.getTableName(), txnId, stmtId, x.getConf(), + MetaStoreUtils.isInsertOnlyTable(tblDesc.getTblProps())); for (AddPartitionDesc addPartitionDesc : partitionDescs) { - t.addDependentTask( - addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x)); + t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, + replicationSpec, x, txnId, stmtId, isSourceMm, ict)); } } else { x.getLOG().debug("adding dependent CopyWork/MoveWork for table"); @@ -775,13 +842,27 @@ private static void createRegularImportTasks( } FileSystem tgtFs = FileSystem.get(tablePath.toUri(), x.getConf()); checkTargetLocationEmpty(tgtFs, tablePath, replicationSpec,x.getLOG()); - t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x)); + if (isSourceMm) { // since target table doesn't exist, it should inherit soruce table's properties + Map tblproperties = table.getParameters(); + tblproperties.put("transactional", "true"); + tblproperties.put("transactional_properties", "insert_only"); + table.setParameters(tblproperties); + } + t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x, txnId, stmtId, isSourceMm)); } } x.getTasks().add(t); } } + private static Task createImportCommitTask( + String dbName, String tblName, Long txnId, int stmtId, HiveConf conf, boolean isMmTable) { + @SuppressWarnings("unchecked") + Task ict = (!isMmTable) ? null : TaskFactory.get( + new ImportCommitWork(dbName, tblName, txnId, stmtId), conf); + return ict; + } + /** * Create tasks for repl import */ @@ -790,10 +871,11 @@ private static void createReplImportTasks( List partitionDescs, ReplicationSpec replicationSpec, boolean waitOnPrecursor, Table table, URI fromURI, FileSystem fs, Warehouse wh, - EximUtil.SemanticAnalyzerWrapperContext x, + EximUtil.SemanticAnalyzerWrapperContext x, Long txnId, int stmtId, boolean isSourceMm, UpdatedMetaDataTracker updatedMetadata) throws HiveException, URISyntaxException, IOException, MetaException { + Task dr = null; WriteEntity.WriteType lockType = WriteEntity.WriteType.DDL_NO_LOCK; // Normally, on import, trying to create a table or a partition in a db that does not yet exist @@ -865,17 +947,20 @@ private static void createReplImportTasks( if (!replicationSpec.isMetadataOnly()) { if (isPartitioned(tblDesc)) { + Task ict = createImportCommitTask( + tblDesc.getDatabaseName(), tblDesc.getTableName(), txnId, stmtId, x.getConf(), + MetaStoreUtils.isInsertOnlyTable(tblDesc.getTblProps())); for (AddPartitionDesc addPartitionDesc : partitionDescs) { addPartitionDesc.setReplicationSpec(replicationSpec); t.addDependentTask( - addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x)); + addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, txnId, stmtId, isSourceMm, ict)); if (updatedMetadata != null) { updatedMetadata.addPartition(addPartitionDesc.getPartition(0).getPartSpec()); } } } else { x.getLOG().debug("adding dependent CopyWork/MoveWork for table"); - t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()),replicationSpec, x)); + t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x, txnId, stmtId, isSourceMm)); } } // Simply create @@ -888,11 +973,13 @@ private static void createReplImportTasks( addPartitionDesc.setReplicationSpec(replicationSpec); Map partSpec = addPartitionDesc.getPartition(0).getPartSpec(); org.apache.hadoop.hive.ql.metadata.Partition ptn = null; - + Task ict = replicationSpec.isMetadataOnly() ? null : createImportCommitTask( + tblDesc.getDatabaseName(), tblDesc.getTableName(), txnId, stmtId, x.getConf(), + MetaStoreUtils.isInsertOnlyTable(tblDesc.getTblProps())); if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) { if (!replicationSpec.isMetadataOnly()){ x.getTasks().add(addSinglePartition( - fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x)); + fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, txnId, stmtId, isSourceMm, ict)); if (updatedMetadata != null) { updatedMetadata.addPartition(addPartitionDesc.getPartition(0).getPartSpec()); } @@ -903,7 +990,7 @@ private static void createReplImportTasks( if (replicationSpec.allowReplacementInto(ptn.getParameters())){ if (!replicationSpec.isMetadataOnly()){ x.getTasks().add(addSinglePartition( - fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x)); + fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, txnId, stmtId, isSourceMm, ict)); } else { x.getTasks().add(alterSinglePartition( fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, ptn, x)); @@ -928,7 +1015,8 @@ private static void createReplImportTasks( x.getLOG().debug("table non-partitioned"); if (!replicationSpec.isMetadataOnly()) { // repl-imports are replace-into unless the event is insert-into - loadTable(fromURI, table, replicationSpec.isReplace(), new Path(fromURI), replicationSpec, x); + loadTable(fromURI, table, replicationSpec.isReplace(), new Path(fromURI), + replicationSpec, x, txnId, stmtId, isSourceMm); } else { x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java index 653b657..f31775e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java @@ -84,7 +84,6 @@ public IndexUpdater(LoadTableDesc loadTableWork, Set inputs, } private void doIndexUpdate(List tblIndexes) throws HiveException { - Driver driver = new Driver(this.conf); for (Index idx : tblIndexes) { StringBuilder sb = new StringBuilder(); sb.append("ALTER INDEX "); @@ -93,9 +92,7 @@ private void doIndexUpdate(List tblIndexes) throws HiveException { sb.append(idx.getDbName()).append('.'); sb.append(idx.getOrigTableName()); sb.append(" REBUILD"); - driver.compile(sb.toString(), false); - tasks.addAll(driver.getPlan().getRootTasks()); - inputs.addAll(driver.getPlan().getInputs()); + compileRebuild(sb.toString()); } } @@ -108,8 +105,7 @@ private void doIndexUpdate(List tblIndexes, Map } } - private void doIndexUpdate(Index index, Map partSpec) throws - HiveException { + private void doIndexUpdate(Index index, Map partSpec) { StringBuilder ps = new StringBuilder(); boolean first = true; ps.append("("); @@ -133,14 +129,19 @@ private void doIndexUpdate(Index index, Map partSpec) throws sb.append(" PARTITION "); sb.append(ps.toString()); sb.append(" REBUILD"); + compileRebuild(sb.toString()); + } + + private void compileRebuild(String query) { Driver driver = new Driver(this.conf); - driver.compile(sb.toString(), false); + driver.compile(query, false); tasks.addAll(driver.getPlan().getRootTasks()); inputs.addAll(driver.getPlan().getInputs()); } - private boolean containsPartition(Index index, Map partSpec) - throws HiveException { + + private boolean containsPartition(Index index, + Map partSpec) throws HiveException { String[] qualified = Utilities.getDbTableName(index.getDbName(), index.getIndexTableName()); Table indexTable = hive.getTable(qualified[0], qualified[1]); List parts = hive.getPartitions(indexTable, partSpec); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index fa79700..5e70863 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -20,8 +20,6 @@ import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; - import java.io.IOException; import java.io.Serializable; import java.net.URI; @@ -38,6 +36,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -51,9 +50,11 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.mapred.InputFormat; import com.google.common.collect.Lists; @@ -215,12 +216,12 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { if(ts.tableHandle.isStoredAsSubDirectories()) { throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg()); } - List parts = ts.tableHandle.getPartitionKeys(); if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) { throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg()); } + List bucketCols = ts.tableHandle.getBucketCols(); if (bucketCols != null && !bucketCols.isEmpty()) { String error = StrictChecks.checkBucketing(conf); @@ -228,7 +229,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { + " and use 'insert... select' to allow Hive to enforce bucketing. " + error); } - if(AcidUtils.isAcidTable(ts.tableHandle)) { + if(AcidUtils.isAcidTable(ts.tableHandle) && !MetaStoreUtils.isInsertOnlyTable(ts.tableHandle.getParameters())) { throw new SemanticException(ErrorMsg.LOAD_DATA_ON_ACID_TABLE, ts.tableHandle.getCompleteName()); } // make sure the arguments make sense @@ -274,10 +275,18 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } } + Long txnId = null; + int stmtId = 0; + Table tbl = ts.tableHandle; + if (MetaStoreUtils.isInsertOnlyTable(tbl.getParameters())) { + txnId = SessionState.get().getTxnMgr().getCurrentTxnId(); + } LoadTableDesc loadTableWork; loadTableWork = new LoadTableDesc(new Path(fromURI), - Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite); + Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite, txnId); + loadTableWork.setTxnId(txnId); + loadTableWork.setStmtId(stmtId); if (preservePartitionSpecs){ // Note : preservePartitionSpecs=true implies inheritTableSpecs=false but // but preservePartitionSpecs=false(default) here is not sufficient enough diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index 9ab42f2..2ad3a40 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -343,14 +343,6 @@ public void setNameToSplitSample(HashMap nameToSplitSample) } /** - * @param loadTableWork - * the loadTableWork to set - */ - public void setLoadTableWork(List loadTableWork) { - this.loadTableWork = loadTableWork; - } - - /** * @return the loadFileWork */ public List getLoadFileWork() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java index b6d7ee8..9ac60b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; @@ -234,7 +235,8 @@ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext aggregationKey = aggregationKeyBuffer.toString(); // scan work - PartialScanWork scanWork = new PartialScanWork(inputPaths); + PartialScanWork scanWork = new PartialScanWork(inputPaths, + Utilities.getTableDesc(tableScan.getConf().getTableMetadata())); scanWork.setMapperCannotSpanPartns(true); scanWork.setAggKey(aggregationKey); scanWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir(), parseContext.getConf()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1c74779..a899706 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -253,6 +253,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { + public static final String DUMMY_DATABASE = "_dummy_database"; public static final String DUMMY_TABLE = "_dummy_table"; public static final String SUBQUERY_TAG_1 = "-subquery1"; @@ -275,7 +276,6 @@ private final Map joinContext; private final Map smbMapJoinContext; private final HashMap topToTable; - private final Map fsopToTable; private final List reduceSinkOperatorsAddedByEnforceBucketingSorting; private final HashMap> topToTableProps; private QB qb; @@ -376,7 +376,6 @@ public SemanticAnalyzer(QueryState queryState) throws SemanticException { smbMapJoinContext = new HashMap(); // Must be deterministic order map for consistent q-test output across Java versions topToTable = new LinkedHashMap(); - fsopToTable = new HashMap(); reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList(); topToTableProps = new HashMap>(); destTableId = 1; @@ -440,7 +439,6 @@ protected void reset(boolean clearCache) { opToPartToSkewedPruner.clear(); opToSamplePruner.clear(); nameToSplitSample.clear(); - fsopToTable.clear(); resultSchema = null; createVwDesc = null; viewsExpanded = null; @@ -2025,6 +2023,7 @@ private void getMetaData(QB qb, ReadEntity parentInput) throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias)); } } + if (tab.isView()) { if (qb.getParseInfo().isAnalyzeCommand()) { throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg()); @@ -6721,7 +6720,8 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, nullOrder.append(sortOrder == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC ? 'a' : 'z'); } input = genReduceSinkPlan(input, partnCols, sortCols, order.toString(), nullOrder.toString(), - maxReducers, (AcidUtils.isAcidTable(dest_tab) ? getAcidType(dest) : AcidUtils.Operation.NOT_ACID)); + maxReducers, (AcidUtils.isFullAcidTable(dest_tab) ? + getAcidType(table_desc.getOutputFileFormatClass(), dest) : AcidUtils.Operation.NOT_ACID)); reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)input.getParentOperators().get(0)); ctx.setMultiFileSpray(multiFileSpray); ctx.setNumFiles(numFiles); @@ -6799,79 +6799,38 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) LoadTableDesc ltd = null; ListBucketingCtx lbCtx = null; Map partSpec = null; + boolean isMmTable = false, isMmCtas = false; + Long txnId = null; switch (dest_type.intValue()) { case QBMetaData.DEST_TABLE: { dest_tab = qbm.getDestTableForAlias(dest); - destTableIsAcid = AcidUtils.isAcidTable(dest_tab); + destTableIsAcid = AcidUtils.isFullAcidTable(dest_tab); destTableIsTemporary = dest_tab.isTemporary(); // Is the user trying to insert into a external tables - if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && - (dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE))) { - throw new SemanticException( - ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName())); - } + checkExternalTable(dest_tab); partSpec = qbm.getPartSpecForAlias(dest); dest_path = dest_tab.getPath(); - // If the query here is an INSERT_INTO and the target is an immutable table, - // verify that our destination is empty before proceeding - if (dest_tab.isImmutable() && - qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(),dest_tab.getTableName())){ - try { - FileSystem fs = dest_path.getFileSystem(conf); - if (! MetaStoreUtils.isDirEmpty(fs,dest_path)){ - LOG.warn("Attempted write into an immutable table : " - + dest_tab.getTableName() + " : " + dest_path); - throw new SemanticException( - ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName())); - } - } catch (IOException ioe) { - LOG.warn("Error while trying to determine if immutable table has any data : " - + dest_tab.getTableName() + " : " + dest_path); - throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage())); - } - } - - // check for partition - List parts = dest_tab.getPartitionKeys(); - if (parts != null && parts.size() > 0) { // table is partitioned - if (partSpec == null || partSpec.size() == 0) { // user did NOT specify partition - throw new SemanticException(generateErrorMessage( - qb.getParseInfo().getDestForClause(dest), - ErrorMsg.NEED_PARTITION_ERROR.getMsg())); - } - dpCtx = qbm.getDPCtx(dest); - if (dpCtx == null) { - dest_tab.validatePartColumnNames(partSpec, false); - dpCtx = new DynamicPartitionCtx(dest_tab, partSpec, - conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), - conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE)); - qbm.setDPCtx(dest, dpCtx); - } + checkImmutableTable(qb, dest_tab, dest_path, false); - if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP - throw new SemanticException(generateErrorMessage( - qb.getParseInfo().getDestForClause(dest), - ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg())); - } - if (dpCtx.getSPPath() != null) { - dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath()); - } - if ((dest_tab.getNumBuckets() > 0)) { - dpCtx.setNumBuckets(dest_tab.getNumBuckets()); - } + // Check for dynamic partitions. + dpCtx = checkDynPart(qb, qbm, dest_tab, partSpec, dest); + if (dpCtx != null && dpCtx.getSPPath() != null) { + dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath()); } boolean isNonNativeTable = dest_tab.isNonNative(); - if (isNonNativeTable) { + isMmTable = MetaStoreUtils.isInsertOnlyTable(dest_tab.getParameters()); + if (isNonNativeTable || isMmTable) { queryTmpdir = dest_path; } else { queryTmpdir = ctx.getTempDirForPath(dest_path, true); } + Utilities.LOG14535.info("create filesink w/DEST_TABLE specifying " + queryTmpdir + " from " + dest_path); if (dpCtx != null) { // set the root of the temporary path where dynamic partition columns will populate dpCtx.setRootPath(queryTmpdir); @@ -6898,7 +6857,15 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); checkAcidConstraints(qb, table_desc, dest_tab); } - ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp); + if (MetaStoreUtils.isInsertOnlyTable(table_desc.getProperties())) { + acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); + } + if (isMmTable) { + txnId = SessionState.get().getTxnMgr().getCurrentTxnId(); + } + boolean isReplace = !qb.getParseInfo().isInsertIntoTable( + dest_tab.getDbName(), dest_tab.getTableName()); + ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp, isReplace, txnId); // For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old // deltas and base and leave them up to the cleaner to clean up ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -6915,45 +6882,8 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) createInsertDesc(dest_tab, overwrite); } - WriteEntity output = null; - - // Here only register the whole table for post-exec hook if no DP present - // in the case of DP, we will register WriteEntity in MoveTask when the - // list of dynamically created partitions are known. - if ((dpCtx == null || dpCtx.getNumDPCols() == 0)) { - output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest)); - if (!outputs.add(output)) { - throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES - .getMsg(dest_tab.getTableName())); - } - } - if ((dpCtx != null) && (dpCtx.getNumDPCols() >= 0)) { - // No static partition specified - if (dpCtx.getNumSPCols() == 0) { - output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest), false); - outputs.add(output); - output.setDynamicPartitionWrite(true); - } - // part of the partition specified - // Create a DummyPartition in this case. Since, the metastore does not store partial - // partitions currently, we need to store dummy partitions - else { - try { - String ppath = dpCtx.getSPPath(); - ppath = ppath.substring(0, ppath.length() - 1); - DummyPartition p = - new DummyPartition(dest_tab, dest_tab.getDbName() - + "@" + dest_tab.getTableName() + "@" + ppath, - partSpec); - output = new WriteEntity(p, getWriteType(dest), false); - output.setDynamicPartitionWrite(true); - outputs.add(output); - } catch (HiveException e) { - throw new SemanticException(e.getMessage(), e); - } - } - } - + WriteEntity output = generateTableWriteEntity( + dest, dest_tab, partSpec, ltd, dpCtx, isNonNativeTable); ctx.getLoadTableOutputMap().put(ltd, output); break; } @@ -6961,41 +6891,23 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) dest_part = qbm.getDestPartitionForAlias(dest); dest_tab = dest_part.getTable(); - destTableIsAcid = AcidUtils.isAcidTable(dest_tab); - if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && - dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE)) { - throw new SemanticException( - ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName())); - } + destTableIsAcid = AcidUtils.isFullAcidTable(dest_tab); + + checkExternalTable(dest_tab); Path tabPath = dest_tab.getPath(); Path partPath = dest_part.getDataLocation(); - // If the query here is an INSERT_INTO and the target is an immutable table, - // verify that our destination is empty before proceeding - if (dest_tab.isImmutable() && - qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(),dest_tab.getTableName())){ - try { - FileSystem fs = partPath.getFileSystem(conf); - if (! MetaStoreUtils.isDirEmpty(fs,partPath)){ - LOG.warn("Attempted write into an immutable table partition : " - + dest_tab.getTableName() + " : " + partPath); - throw new SemanticException( - ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName())); - } - } catch (IOException ioe) { - LOG.warn("Error while trying to determine if immutable table partition has any data : " - + dest_tab.getTableName() + " : " + partPath); - throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage())); - } - } + checkImmutableTable(qb, dest_tab, partPath, true); // if the table is in a different dfs than the partition, // replace the partition's dfs with the table's dfs. dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri() .getAuthority(), partPath.toUri().getPath()); - queryTmpdir = ctx.getTempDirForPath(dest_path, true); + isMmTable = MetaStoreUtils.isInsertOnlyTable(dest_tab.getParameters()); + queryTmpdir = isMmTable ? dest_path : ctx.getTempDirForPath(dest_path, true); + Utilities.LOG14535.info("create filesink w/DEST_PARTITION specifying " + queryTmpdir + " from " + dest_path); table_desc = Utilities.getTableDesc(dest_tab); // Add sorting/bucketing if needed @@ -7013,7 +6925,13 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); checkAcidConstraints(qb, table_desc, dest_tab); } - ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp); + if (MetaStoreUtils.isInsertOnlyTable(dest_part.getTable().getParameters())) { + acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest); + } + if (isMmTable) { + txnId = SessionState.get().getTxnMgr().getCurrentTxnId(); + } + ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp, txnId); // For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old // deltas and base and leave them up to the cleaner to clean up ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), @@ -7035,24 +6953,6 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) case QBMetaData.DEST_DFS_FILE: { dest_path = new Path(qbm.getDestFileForAlias(dest)); - if (isLocal) { - // for local directory - we always write to map-red intermediate - // store and then copy to local fs - queryTmpdir = ctx.getMRTmpPath(); - } else { - // otherwise write to the file system implied by the directory - // no copy is required. we may want to revisit this policy in future - - try { - Path qPath = FileUtils.makeQualified(dest_path, conf); - queryTmpdir = ctx.getTempDirForPath(qPath, true); - } catch (Exception e) { - throw new SemanticException("Error creating temporary folder on: " - + dest_path, e); - } - } - String cols = ""; - String colTypes = ""; ArrayList colInfos = inputRR.getColumnInfos(); // CTAS case: the file output format and serde are defined by the create @@ -7060,68 +6960,42 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) List field_schemas = null; CreateTableDesc tblDesc = qb.getTableDesc(); CreateViewDesc viewDesc = qb.getViewDesc(); + boolean isCtas = false; if (tblDesc != null) { field_schemas = new ArrayList(); destTableIsTemporary = tblDesc.isTemporary(); destTableIsMaterialization = tblDesc.isMaterialization(); + if (!destTableIsTemporary && MetaStoreUtils.isInsertOnlyTable(tblDesc.getTblProps(), true)) { + isMmTable = isMmCtas = true; + txnId = SessionState.get().getTxnMgr().getCurrentTxnId(); + tblDesc.setInitialMmWriteId(txnId); + } } else if (viewDesc != null) { field_schemas = new ArrayList(); destTableIsTemporary = false; } - boolean first = true; - for (ColumnInfo colInfo : colInfos) { - String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); - - if (nm[1] != null) { // non-null column alias - colInfo.setAlias(nm[1]); - } - - String colName = colInfo.getInternalName(); //default column name - if (field_schemas != null) { - FieldSchema col = new FieldSchema(); - if (!("".equals(nm[0])) && nm[1] != null) { - colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase(); // remove `` - } - colName = fixCtasColumnName(colName); - col.setName(colName); - String typeName = colInfo.getType().getTypeName(); - // CTAS should NOT create a VOID type - if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) { - throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE - .getMsg(colName)); - } - col.setType(typeName); - field_schemas.add(col); - } - - if (!first) { - cols = cols.concat(","); - colTypes = colTypes.concat(":"); - } - - first = false; - cols = cols.concat(colName); - - // Replace VOID type with string when the output is a temp table or - // local files. - // A VOID type can be generated under the query: - // - // select NULL from tt; - // or - // insert overwrite local directory "abc" select NULL from tt; - // - // where there is no column type to which the NULL value should be - // converted. - // - String tName = colInfo.getType().getTypeName(); - if (tName.equals(serdeConstants.VOID_TYPE_NAME)) { - colTypes = colTypes.concat(serdeConstants.STRING_TYPE_NAME); - } else { - colTypes = colTypes.concat(tName); + if (isLocal) { + assert !isMmTable; + // for local directory - we always write to map-red intermediate + // store and then copy to local fs + queryTmpdir = ctx.getMRTmpPath(); + } else { + // otherwise write to the file system implied by the directory + // no copy is required. we may want to revisit this policy in future + try { + Path qPath = FileUtils.makeQualified(dest_path, conf); + queryTmpdir = isMmTable ? qPath : ctx.getTempDirForPath(qPath, true); + Utilities.LOG14535.info("Setting query directory " + queryTmpdir + " from " + dest_path + " (" + isMmTable + ")"); + } catch (Exception e) { + throw new SemanticException("Error creating temporary folder on: " + + dest_path, e); } } + ColsAndTypes ct = deriveFileSinkColTypes(inputRR, field_schemas); + String cols = ct.cols, colTypes = ct.colTypes; + // update the create table descriptor with the resulting schema. if (tblDesc != null) { tblDesc.setCols(new ArrayList(field_schemas)); @@ -7138,8 +7012,9 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) } boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE); - loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, dest_path, isDfsDir, cols, - colTypes)); + // Create LFD even for MM CTAS - it's a no-op move, but it still seems to be used for stats. + loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, + queryTmpdir, dest_path, isDfsDir, cols, colTypes, isMmCtas)); if (tblDesc == null) { if (viewDesc != null) { @@ -7226,23 +7101,136 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) genPartnCols(dest, input, qb, table_desc, dest_tab, rsCtx); } - FileSinkDesc fileSinkDesc = new FileSinkDesc( - queryTmpdir, - table_desc, - conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), - currentTableId, - rsCtx.isMultiFileSpray(), - canBeMerged, - rsCtx.getNumFiles(), - rsCtx.getTotalFiles(), - rsCtx.getPartnCols(), - dpCtx, - dest_path); + FileSinkDesc fileSinkDesc = createFileSinkDesc(dest, table_desc, dest_part, + dest_path, currentTableId, destTableIsAcid, destTableIsTemporary, + destTableIsMaterialization, queryTmpdir, rsCtx, dpCtx, lbCtx, fsRS, + canBeMerged, dest_tab, txnId, isMmCtas); + if (isMmCtas) { + // Add FSD so that the LoadTask compilation could fix up its path to avoid the move. + tableDesc.setWriter(fileSinkDesc); + } + + if (SessionState.get().isHiveServerQuery() && + null != table_desc && + table_desc.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && + HiveConf.getBoolVar(conf,HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) { + fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(true); + } else { + fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(false); + } + + Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( + fileSinkDesc, fsRS, input), inputRR); + + handleLineage(ltd, output); + + if (LOG.isDebugEnabled()) { + LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " + + dest_path + " row schema: " + inputRR.toString()); + } + + FileSinkOperator fso = (FileSinkOperator) output; + fso.getConf().setTable(dest_tab); + // the following code is used to collect column stats when + // hive.stats.autogather=true + // and it is an insert overwrite or insert into table + if (dest_tab != null && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) + && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) + && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) { + if (dest_type.intValue() == QBMetaData.DEST_TABLE) { + genAutoColumnStatsGatheringPipeline(qb, table_desc, partSpec, input, qb.getParseInfo() + .isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())); + } else if (dest_type.intValue() == QBMetaData.DEST_PARTITION) { + genAutoColumnStatsGatheringPipeline(qb, table_desc, dest_part.getSpec(), input, qb + .getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())); + + } + } + return output; + } + + private ColsAndTypes deriveFileSinkColTypes( + RowResolver inputRR, List field_schemas) throws SemanticException { + ColsAndTypes result = new ColsAndTypes("", ""); + ArrayList colInfos = inputRR.getColumnInfos(); + boolean first = true; + for (ColumnInfo colInfo : colInfos) { + String[] nm = inputRR.reverseLookup(colInfo.getInternalName()); + + if (nm[1] != null) { // non-null column alias + colInfo.setAlias(nm[1]); + } + + String colName = colInfo.getInternalName(); //default column name + if (field_schemas != null) { + FieldSchema col = new FieldSchema(); + if (!("".equals(nm[0])) && nm[1] != null) { + colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase(); // remove `` + } + colName = fixCtasColumnName(colName); + col.setName(colName); + String typeName = colInfo.getType().getTypeName(); + // CTAS should NOT create a VOID type + if (typeName.equals(serdeConstants.VOID_TYPE_NAME)) { + throw new SemanticException(ErrorMsg.CTAS_CREATES_VOID_TYPE.getMsg(colName)); + } + col.setType(typeName); + field_schemas.add(col); + } + + if (!first) { + result.cols = result.cols.concat(","); + result.colTypes = result.colTypes.concat(":"); + } + + first = false; + result.cols = result.cols.concat(colName); + + // Replace VOID type with string when the output is a temp table or + // local files. + // A VOID type can be generated under the query: + // + // select NULL from tt; + // or + // insert overwrite local directory "abc" select NULL from tt; + // + // where there is no column type to which the NULL value should be + // converted. + // + String tName = colInfo.getType().getTypeName(); + if (tName.equals(serdeConstants.VOID_TYPE_NAME)) { + result.colTypes = result.colTypes.concat(serdeConstants.STRING_TYPE_NAME); + } else { + result.colTypes = result.colTypes.concat(tName); + } + } + return result; + } + + private FileSinkDesc createFileSinkDesc(String dest, TableDesc table_desc, + Partition dest_part, Path dest_path, int currentTableId, + boolean destTableIsAcid, boolean destTableIsTemporary, + boolean destTableIsMaterialization, Path queryTmpdir, + SortBucketRSCtx rsCtx, DynamicPartitionCtx dpCtx, ListBucketingCtx lbCtx, + RowSchema fsRS, boolean canBeMerged, Table dest_tab, Long mmWriteId, boolean isMmCtas) throws SemanticException { + FileSinkDesc fileSinkDesc = new FileSinkDesc(queryTmpdir, table_desc, + conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT), currentTableId, rsCtx.isMultiFileSpray(), + canBeMerged, rsCtx.getNumFiles(), rsCtx.getTotalFiles(), rsCtx.getPartnCols(), dpCtx, + dest_path, mmWriteId, isMmCtas); boolean isHiveServerQuery = SessionState.get().isHiveServerQuery(); fileSinkDesc.setHiveServerQuery(isHiveServerQuery); // If this is an insert, update, or delete on an ACID table then mark that so the // FileSinkOperator knows how to properly write to it. + boolean isDestInsertOnly = (dest_part != null && dest_part.getTable() != null && + MetaStoreUtils.isInsertOnlyTable(dest_part.getTable().getParameters())) + || (table_desc != null && MetaStoreUtils.isInsertOnlyTable(table_desc.getProperties())); + + if (isDestInsertOnly) { + fileSinkDesc.setWriteType(Operation.INSERT); + acidFileSinks.add(fileSinkDesc); + } + if (destTableIsAcid) { AcidUtils.Operation wt = updating(dest) ? AcidUtils.Operation.UPDATE : (deleting(dest) ? AcidUtils.Operation.DELETE : AcidUtils.Operation.INSERT); @@ -7288,19 +7276,11 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) } else if (dpCtx != null) { fileSinkDesc.setStaticSpec(dpCtx.getSPPath()); } + return fileSinkDesc; + } - if (isHiveServerQuery && - null != table_desc && - table_desc.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && - HiveConf.getBoolVar(conf,HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) { - fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(true); - } else { - fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(false); - } - - Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - fileSinkDesc, fsRS, input), inputRR); - + private void handleLineage(LoadTableDesc ltd, Operator output) + throws SemanticException { if (ltd != null && SessionState.get() != null) { SessionState.get().getLineageState() .mapDirToOp(ltd.getSourcePath(), output); @@ -7318,33 +7298,114 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) SessionState.get().getLineageState() .mapDirToOp(tlocation, output); } + } - if (LOG.isDebugEnabled()) { - LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " - + dest_path + " row schema: " + inputRR.toString()); + private WriteEntity generateTableWriteEntity(String dest, Table dest_tab, + Map partSpec, LoadTableDesc ltd, + DynamicPartitionCtx dpCtx, boolean isNonNativeTable) + throws SemanticException { + WriteEntity output = null; + + // Here only register the whole table for post-exec hook if no DP present + // in the case of DP, we will register WriteEntity in MoveTask when the + // list of dynamically created partitions are known. + if ((dpCtx == null || dpCtx.getNumDPCols() == 0)) { + output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest)); + if (!outputs.add(output)) { + throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES + .getMsg(dest_tab.getTableName())); + } } - FileSinkOperator fso = (FileSinkOperator) output; - fso.getConf().setTable(dest_tab); - fsopToTable.put(fso, dest_tab); - // the following code is used to collect column stats when - // hive.stats.autogather=true - // and it is an insert overwrite or insert into table - if (dest_tab != null && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) - && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) - && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) { - if (dest_type.intValue() == QBMetaData.DEST_TABLE) { - genAutoColumnStatsGatheringPipeline(qb, table_desc, partSpec, input, qb.getParseInfo() - .isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())); - } else if (dest_type.intValue() == QBMetaData.DEST_PARTITION) { - genAutoColumnStatsGatheringPipeline(qb, table_desc, dest_part.getSpec(), input, qb - .getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName())); - + if ((dpCtx != null) && (dpCtx.getNumDPCols() >= 0)) { + // No static partition specified + if (dpCtx.getNumSPCols() == 0) { + output = new WriteEntity(dest_tab, determineWriteType(ltd, isNonNativeTable, dest), false); + outputs.add(output); + output.setDynamicPartitionWrite(true); + } + // part of the partition specified + // Create a DummyPartition in this case. Since, the metastore does not store partial + // partitions currently, we need to store dummy partitions + else { + try { + String ppath = dpCtx.getSPPath(); + ppath = ppath.substring(0, ppath.length() - 1); + DummyPartition p = + new DummyPartition(dest_tab, dest_tab.getDbName() + + "@" + dest_tab.getTableName() + "@" + ppath, + partSpec); + output = new WriteEntity(p, getWriteType(dest), false); + output.setDynamicPartitionWrite(true); + outputs.add(output); + } catch (HiveException e) { + throw new SemanticException(e.getMessage(), e); + } } } return output; } + private void checkExternalTable(Table dest_tab) throws SemanticException { + if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_INSERT_INTO_EXTERNAL_TABLES)) && + (dest_tab.getTableType().equals(TableType.EXTERNAL_TABLE))) { + throw new SemanticException( + ErrorMsg.INSERT_EXTERNAL_TABLE.getMsg(dest_tab.getTableName())); + } + } + + private void checkImmutableTable(QB qb, Table dest_tab, Path dest_path, boolean isPart) + throws SemanticException { + // If the query here is an INSERT_INTO and the target is an immutable table, + // verify that our destination is empty before proceeding + if (!dest_tab.isImmutable() || !qb.getParseInfo().isInsertIntoTable( + dest_tab.getDbName(), dest_tab.getTableName())) { + return; + } + try { + FileSystem fs = dest_path.getFileSystem(conf); + if (! MetaStoreUtils.isDirEmpty(fs,dest_path)){ + LOG.warn("Attempted write into an immutable table : " + + dest_tab.getTableName() + " : " + dest_path); + throw new SemanticException( + ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(dest_tab.getTableName())); + } + } catch (IOException ioe) { + LOG.warn("Error while trying to determine if immutable table " + + (isPart ? "partition " : "") + "has any data : " + dest_tab.getTableName() + + " : " + dest_path); + throw new SemanticException(ErrorMsg.INSERT_INTO_IMMUTABLE_TABLE.getMsg(ioe.getMessage())); + } + } + + private DynamicPartitionCtx checkDynPart(QB qb, QBMetaData qbm, Table dest_tab, + Map partSpec, String dest) throws SemanticException { + List parts = dest_tab.getPartitionKeys(); + if (parts == null || parts.isEmpty()) return null; // table is not partitioned + if (partSpec == null || partSpec.size() == 0) { // user did NOT specify partition + throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), + ErrorMsg.NEED_PARTITION_ERROR.getMsg())); + } + DynamicPartitionCtx dpCtx = qbm.getDPCtx(dest); + if (dpCtx == null) { + dest_tab.validatePartColumnNames(partSpec, false); + dpCtx = new DynamicPartitionCtx(dest_tab, partSpec, + conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), + conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE)); + qbm.setDPCtx(dest, dpCtx); + } + + if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)) { // allow DP + throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), + ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg())); + } + if ((dest_tab.getNumBuckets() > 0)) { + dpCtx.setNumBuckets(dest_tab.getNumBuckets()); + } + return dpCtx; + } + + private void createInsertDesc(Table table, boolean overwrite) { Task[] tasks = new Task[this.rootTasks.size()]; tasks = this.rootTasks.toArray(tasks); @@ -7398,9 +7459,6 @@ These props are now enabled elsewhere (see commit diffs). It would be better in if (table.getSortCols() != null && table.getSortCols().size() > 0) { throw new SemanticException(ErrorMsg.ACID_NO_SORTED_BUCKETS, table.getTableName()); } - - - } /** @@ -10535,7 +10593,7 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String if (partitions != null) { for (Partition partn : partitions) { // inputs.add(new ReadEntity(partn)); // is this needed at all? - LOG.info("XXX: adding part: "+partn); + LOG.info("XXX: adding part: "+partn); outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK)); } } @@ -11928,7 +11986,7 @@ public void validate() throws SemanticException { if (p != null) { tbl = p.getTable(); } - if (tbl != null && AcidUtils.isAcidTable(tbl)) { + if (tbl != null && (AcidUtils.isFullAcidTable(tbl) || MetaStoreUtils.isInsertOnlyTable(tbl.getParameters()))) { acidInQuery = true; checkAcidTxnManager(tbl); } @@ -11991,7 +12049,7 @@ public void validate() throws SemanticException { tbl = writeEntity.getTable(); } - if (tbl != null && AcidUtils.isAcidTable(tbl)) { + if (tbl != null && (AcidUtils.isFullAcidTable(tbl) || MetaStoreUtils.isInsertOnlyTable(tbl.getParameters()))) { acidInQuery = true; checkAcidTxnManager(tbl); } @@ -12342,7 +12400,7 @@ ASTNode analyzeCreateTable( } } - if(location != null && location.length() != 0) { + if (location != null && location.length() != 0) { Path locPath = new Path(location); FileSystem curFs = null; FileStatus locStats = null; @@ -12351,7 +12409,7 @@ ASTNode analyzeCreateTable( if(curFs != null) { locStats = curFs.getFileStatus(locPath); } - if(locStats != null && locStats.isDir()) { + if (locStats != null && locStats.isDir()) { FileStatus[] lStats = curFs.listStatus(locPath); if(lStats != null && lStats.length != 0) { // Don't throw an exception if the target location only contains the staging-dirs @@ -12373,7 +12431,6 @@ ASTNode analyzeCreateTable( } tblProps = addDefaultProperties(tblProps); - tableDesc = new CreateTableDesc(qualifiedTabName[0], dbDotTab, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, @@ -13704,4 +13761,12 @@ public void setLoadFileWork(List loadFileWork) { this.loadFileWork = loadFileWork; } + private static final class ColsAndTypes { + public ColsAndTypes(String cols, String colTypes) { + this.cols = cols; + this.colTypes = colTypes; + } + public String cols; + public String colTypes; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 08a8f00..356ab6f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -20,16 +20,13 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; -import java.util.LinkedList; import java.util.List; -import java.util.Queue; import java.util.Set; -import java.util.Stack; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; @@ -42,7 +39,6 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -54,7 +50,6 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; -import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; @@ -62,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.CreateViewDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; @@ -232,45 +228,15 @@ public void compile(final ParseContext pCtx, final List> leafTasks = new LinkedHashSet>(); - getLeafTasks(rootTasks, leafTasks); - if (isCStats) { - genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0); - } else { - for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx - .getColumnStatsAutoGatherContexts()) { - if (!columnStatsAutoGatherContext.isInsertInto()) { - genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), - columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0); - } else { - int numBitVector; - try { - numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); - } catch (Exception e) { - throw new SemanticException(e.getMessage()); - } - genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), - columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, numBitVector); - } - } - } + createColumnStatsTasks(pCtx, rootTasks, loadFileWork, isCStats, outerQueryLimit); } decideExecMode(rootTasks, ctx, globalLimitCtx); @@ -364,6 +309,81 @@ public void compile(final ParseContext pCtx, final List> rootTasks, + List loadFileWork, boolean isCStats, int outerQueryLimit) + throws SemanticException { + Set> leafTasks = new LinkedHashSet>(); + getLeafTasks(rootTasks, leafTasks); + if (isCStats) { + genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0); + } else { + for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx + .getColumnStatsAutoGatherContexts()) { + if (!columnStatsAutoGatherContext.isInsertInto()) { + genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), + columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0); + } else { + int numBitVector; + try { + numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); + } catch (Exception e) { + throw new SemanticException(e.getMessage()); + } + genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), + columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, numBitVector); + } + } + } + } + + private Path getDefaultCtasLocation(final ParseContext pCtx) throws SemanticException { + try { + String protoName = null; + if (pCtx.getQueryProperties().isCTAS()) { + protoName = pCtx.getCreateTable().getTableName(); + } else if (pCtx.getQueryProperties().isMaterializedView()) { + protoName = pCtx.getCreateViewDesc().getViewName(); + } + String[] names = Utilities.getDbTableName(protoName); + if (!db.databaseExists(names[0])) { + throw new SemanticException("ERROR: The database " + names[0] + " does not exist."); + } + Warehouse wh = new Warehouse(conf); + return wh.getDefaultTablePath(db.getDatabase(names[0]), names[1]); + } catch (HiveException e) { + throw new SemanticException(e); + } catch (MetaException e) { + throw new SemanticException(e); + } + } + private void patchUpAfterCTASorMaterializedView(final List> rootTasks, final HashSet outputs, Task createTask) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java index 9753f9e..025f222 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java @@ -207,7 +207,7 @@ public void setError(String error, ASTNode errorSrcNode) { if (LOG.isDebugEnabled()) { // Logger the callstack from which the error has been set. LOG.debug("Setting error: [" + error + "] from " - + ((errorSrcNode == null) ? "null" : errorSrcNode.toStringTree()), new Exception()); + + ((errorSrcNode == null) ? "null" : errorSrcNode.toStringTree())/*, new Exception()*/); } this.error = error; this.errorSrcNode = errorSrcNode; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java index 4078d2a..24c8baf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java @@ -406,6 +406,7 @@ public static Path createMoveTask(Task currTask, boolean HiveConf hconf, DependencyCollectionTask dependencyTask) { Path dest = null; + FileSinkDesc fileSinkDesc = fsOp.getConf(); if (chDir) { dest = fsOp.getConf().getFinalDirName(); @@ -416,7 +417,6 @@ public static Path createMoveTask(Task currTask, boolean Path tmpDir = baseCtx.getExternalTmpPath(dest); - FileSinkDesc fileSinkDesc = fsOp.getConf(); // Change all the linked file sink descriptors if (fileSinkDesc.getLinkedFileSinkDesc() != null) { for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) { @@ -430,7 +430,7 @@ public static Path createMoveTask(Task currTask, boolean Task mvTask = null; if (!chDir) { - mvTask = GenMapRedUtils.findMoveTask(mvTasks, fsOp); + mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fileSinkDesc.getFinalDirName(), false); } // Set the move task to be dependent on the current task diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java index a2876e1..1c220b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; import org.apache.hadoop.hive.ql.lib.Node; @@ -184,7 +185,8 @@ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext aggregationKey = aggregationKeyBuffer.toString(); // scan work - PartialScanWork scanWork = new PartialScanWork(inputPaths); + PartialScanWork scanWork = new PartialScanWork(inputPaths, + Utilities.getTableDesc(tableScan.getConf().getTableMetadata())); scanWork.setMapperCannotSpanPartns(true); scanWork.setAggKey(aggregationKey); scanWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir(), parseContext.getConf()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java index 4587c32..96f722c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java @@ -39,6 +39,7 @@ private static final long serialVersionUID = 1L; + // TODO# this is completely broken, esp. w/load into bucketed tables (should perhaps be forbidden for MM tables) // table alias (small) --> input file name (big) --> target file names (small) private Map>> aliasBucketFileNameMapping; private String mapJoinBigTableAlias; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java index b454a2a..a0e024c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -32,6 +33,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.Utilities; /** * Conditional task resolution interface. This is invoked at run time to get the @@ -75,14 +77,6 @@ public String getDir() { } /** - * @param dir - * the dir to set - */ - public void setDir(String dir) { - this.dir = dir; - } - - /** * @return the listTasks */ public List> getListTasks() { @@ -120,8 +114,7 @@ public void setLbCtx(ListBucketingCtx lbCtx) { } } - public List> getTasks(HiveConf conf, - Object objCtx) { + public List> getTasks(HiveConf conf, Object objCtx) { ConditionalResolverMergeFilesCtx ctx = (ConditionalResolverMergeFilesCtx) objCtx; String dirName = ctx.getDir(); @@ -178,6 +171,8 @@ public void setLbCtx(ListBucketingCtx lbCtx) { if(lbLevel == 0) { // static partition without list bucketing long totalSz = getMergeSize(inpFs, dirPath, avgConditionSize); + Utilities.LOG14535.info("merge resolve simple case - totalSz " + totalSz + " from " + dirPath); + if (totalSz >= 0) { // add the merge job setupMapRedWork(conf, work, trgtSize, totalSz); resTsks.add(mrTask); @@ -191,6 +186,7 @@ public void setLbCtx(ListBucketingCtx lbCtx) { } } } else { + Utilities.LOG14535.info("Resolver returning movetask for " + dirPath); resTsks.add(mvTask); } } catch (IOException e) { @@ -233,6 +229,7 @@ private void generateActualTasks(HiveConf conf, List mrTask, Task mrAndMvTask, Path dirPath, FileSystem inpFs, ConditionalResolverMergeFilesCtx ctx, MapWork work, int dpLbLevel) throws IOException { + Utilities.LOG14535.info("generateActualTasks for " + dirPath); DynamicPartitionCtx dpCtx = ctx.getDPCtx(); // get list of dynamic partitions FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(dirPath, dpLbLevel, inpFs); @@ -243,6 +240,7 @@ private void generateActualTasks(HiveConf conf, List 0) { + // Note: this path should be specific to concatenate; never executed in a select query. // modify the existing move task as it is already in the candidate running tasks // running the MoveTask and MR task in parallel may @@ -326,10 +332,13 @@ private void generateActualTasks(HiveConf conf, List fullPartSpec = new LinkedHashMap<>(dpCtx.getPartSpec()); - Warehouse.makeSpecFromName(fullPartSpec, status[i].getPath()); - PartitionDesc pDesc = new PartitionDesc(tblDesc, fullPartSpec); - return pDesc; + LinkedHashMap fullPartSpec = new LinkedHashMap<>( dpCtx.getPartSpec()); + // Require all the directories to be present with some values. + if (!Warehouse.makeSpecFromName(fullPartSpec, status[i].getPath(), + new HashSet<>(dpCtx.getPartSpec().keySet()))) { + return null; + } + return new PartitionDesc(tblDesc, fullPartSpec); } private void setupMapRedWork(HiveConf conf, MapWork mWork, long targetSize, long totalSize) { @@ -337,6 +346,7 @@ private void setupMapRedWork(HiveConf conf, MapWork mWork, long targetSize, long mWork.setMinSplitSize(targetSize); mWork.setMinSplitSizePerNode(targetSize); mWork.setMinSplitSizePerRack(targetSize); + mWork.setIsMergeFromResolver(true); } private static class AverageSize { @@ -365,6 +375,7 @@ private AverageSize getAverageSize(FileSystem inpFs, Path dirPath) { long totalSz = 0; int numFiles = 0; for (FileStatus fStat : fStats) { + Utilities.LOG14535.info("Resolver looking at " + fStat.getPath()); if (fStat.isDir()) { AverageSize avgSzDir = getAverageSize(inpFs, fStat.getPath()); if (avgSzDir.getTotalSize() < 0) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CopyWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CopyWork.java index 9a4e782..c08911f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CopyWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CopyWork.java @@ -30,9 +30,10 @@ @Explain(displayName = "Copy", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class CopyWork implements Serializable { private static final long serialVersionUID = 1L; - private Path fromPath; - private Path toPath; + private Path[] fromPath; + private Path[] toPath; private boolean errorOnSrcEmpty; + private boolean isSkipMmDirs = false; public CopyWork() { } @@ -42,18 +43,45 @@ public CopyWork(final Path fromPath, final Path toPath) { } public CopyWork(final Path fromPath, final Path toPath, boolean errorOnSrcEmpty) { + this(new Path[] { fromPath }, new Path[] { toPath }); + this.setErrorOnSrcEmpty(errorOnSrcEmpty); + } + + public CopyWork(final Path[] fromPath, final Path[] toPath) { + if (fromPath.length != toPath.length) { + throw new RuntimeException( + "Cannot copy " + fromPath.length + " paths into " + toPath.length + " paths"); + } this.fromPath = fromPath; this.toPath = toPath; - this.setErrorOnSrcEmpty(errorOnSrcEmpty); } - + + // Keep backward compat in explain for single-file copy tasks. @Explain(displayName = "source", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) - public Path getFromPath() { - return fromPath; + public Path getFromPathExplain() { + return (fromPath == null || fromPath.length > 1) ? null : fromPath[0]; } @Explain(displayName = "destination", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) - public Path getToPath() { + public Path getToPathExplain() { + return (toPath == null || toPath.length > 1) ? null : toPath[0]; + } + + @Explain(displayName = "sources", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + public Path[] getFromPathsExplain() { + return (fromPath != null && fromPath.length > 1) ? fromPath : null; + } + + @Explain(displayName = "destinations", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + public Path[] getToPathsExplain() { + return (toPath != null && toPath.length > 1) ? toPath : null; + } + + public Path[] getFromPaths() { + return fromPath; + } + + public Path[] getToPaths() { return toPath; } @@ -65,4 +93,14 @@ public boolean isErrorOnSrcEmpty() { return errorOnSrcEmpty; } + /** Whether the copy should ignore MM directories in the source, and copy their content to + * destination directly, rather than copying the directories themselves. */ + public void setSkipSourceMmDirs(boolean isMm) { + this.isSkipMmDirs = isMm; + } + + public boolean doSkipSourceMmDirs() { + return isSkipMmDirs ; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java index 4320421..0db6d21 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java @@ -101,6 +101,10 @@ List foreignKeys; List uniqueConstraints; List notNullConstraints; + private Long initialMmWriteId; // Initial MM write ID for CTAS and import. + // The FSOP configuration for the FSOP that is going to write initial data during ctas. + // This is not needed beyond compilation, so it is transient. + private transient FileSinkDesc writer; public CreateTableDesc() { } @@ -859,5 +863,23 @@ public Table toTable(HiveConf conf) throws HiveException { return tbl; } + public void setInitialMmWriteId(Long mmWriteId) { + this.initialMmWriteId = mmWriteId; + } + + public Long getInitialMmWriteId() { + return initialMmWriteId; + } + + + public FileSinkDesc getAndUnsetWriter() { + FileSinkDesc fsd = writer; + writer = null; + return fsd; + } + + public void setWriter(FileSinkDesc writer) { + this.writer = writer; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileMergeDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileMergeDesc.java index 7ec1bdd..80f7c16 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileMergeDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileMergeDesc.java @@ -28,6 +28,9 @@ private int listBucketingDepth; private boolean hasDynamicPartitions; private boolean isListBucketingAlterTableConcatenate; + private Long txnId; + private int stmtId; + private boolean isMmTable; public FileMergeDesc(DynamicPartitionCtx dynPartCtx, Path outputDir) { this.dpCtx = dynPartCtx; @@ -73,4 +76,28 @@ public boolean isListBucketingAlterTableConcatenate() { public void setListBucketingAlterTableConcatenate(boolean isListBucketingAlterTableConcatenate) { this.isListBucketingAlterTableConcatenate = isListBucketingAlterTableConcatenate; } + + public Long getTxnId() { + return txnId; + } + + public void setTxnId(Long txnId) { + this.txnId = txnId; + } + + public int getStmtId() { + return stmtId; + } + + public void setStmtId(int stmtId) { + this.stmtId = stmtId; + } + + public boolean getIsMmTable() { + return isMmTable; + } + + public void setIsMmTable(boolean isMmTable) { + this.isMmTable = isMmTable; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index a3df166..4732f0a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -23,6 +23,7 @@ import java.util.Objects; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -82,7 +83,6 @@ // the sub-queries write to sub-directories of a common directory. So, the file sink // descriptors for subq1 and subq2 are linked. private boolean linkedFileSink = false; - private Path parentDir; transient private List linkedFileSinkDesc; private boolean statsReliable; @@ -97,6 +97,9 @@ private transient Table table; private Path destPath; private boolean isHiveServerQuery; + private Long mmWriteId; + private boolean isMerge; + private boolean isMmCtas; /** * Whether is a HiveServer query, and the destination table is @@ -115,7 +118,8 @@ public FileSinkDesc() { public FileSinkDesc(final Path dirName, final TableDesc tableInfo, final boolean compressed, final int destTableId, final boolean multiFileSpray, final boolean canBeMerged, final int numFiles, final int totalFiles, - final ArrayList partitionCols, final DynamicPartitionCtx dpCtx, Path destPath) { + final ArrayList partitionCols, final DynamicPartitionCtx dpCtx, Path destPath, + Long mmWriteId, boolean isMmCtas) { this.dirName = dirName; this.tableInfo = tableInfo; @@ -129,6 +133,8 @@ public FileSinkDesc(final Path dirName, final TableDesc tableInfo, this.dpCtx = dpCtx; this.dpSortState = DPSortState.NONE; this.destPath = destPath; + this.mmWriteId = mmWriteId; + this.isMmCtas = isMmCtas; } public FileSinkDesc(final Path dirName, final TableDesc tableInfo, @@ -150,20 +156,20 @@ public FileSinkDesc(final Path dirName, final TableDesc tableInfo, public Object clone() throws CloneNotSupportedException { FileSinkDesc ret = new FileSinkDesc(dirName, tableInfo, compressed, destTableId, multiFileSpray, canBeMerged, numFiles, totalFiles, - partitionCols, dpCtx, destPath); + partitionCols, dpCtx, destPath, mmWriteId, isMmCtas); ret.setCompressCodec(compressCodec); ret.setCompressType(compressType); ret.setGatherStats(gatherStats); ret.setStaticSpec(staticSpec); ret.setStatsAggPrefix(statsKeyPref); ret.setLinkedFileSink(linkedFileSink); - ret.setParentDir(parentDir); ret.setLinkedFileSinkDesc(linkedFileSinkDesc); ret.setStatsReliable(statsReliable); ret.setDpSortState(dpSortState); ret.setWriteType(writeType); ret.setTransactionId(txnId); ret.setStatsTmpDir(statsTmpDir); + ret.setIsMerge(isMerge); return ret; } @@ -193,7 +199,17 @@ public void setDirName(final Path dirName) { } public Path getFinalDirName() { - return linkedFileSink ? parentDir : dirName; + return linkedFileSink ? dirName.getParent() : dirName; + } + + /** getFinalDirName that takes into account MM, but not DP, LB or buckets. */ + public Path getMergeInputDirName() { + Path root = getFinalDirName(); + if (isMmTable()) { + return new Path(root, AcidUtils.deltaSubdir(txnId, txnId, 0)); + } else { + return root; + } } @Explain(displayName = "table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) @@ -265,6 +281,14 @@ public void setTemporary(boolean temporary) { this.temporary = temporary; } + public boolean isMmTable() { + if (getTable() != null) { + return MetaStoreUtils.isInsertOnlyTable(table.getParameters()); + } else { // Dynamic Partition Insert case + return MetaStoreUtils.isInsertOnlyTable(getTableInfo().getProperties()); + } + } + public boolean isMaterialization() { return materialization; } @@ -393,11 +417,7 @@ public void setLinkedFileSink(boolean linkedFileSink) { } public Path getParentDir() { - return parentDir; - } - - public void setParentDir(Path parentDir) { - this.parentDir = parentDir; + return dirName.getParent(); } public boolean isStatsReliable() { @@ -497,6 +517,22 @@ public void setStatsTmpDir(String statsCollectionTempDir) { this.statsTmpDir = statsCollectionTempDir; } + public void setMmWriteId(Long mmWriteId) { + this.mmWriteId = mmWriteId; + } + + public void setIsMerge(boolean b) { + this.isMerge = b; + } + + public boolean isMerge() { + return isMerge; + } + + public boolean isMmCtas() { + return isMmCtas; + } + public class FileSinkOperatorExplainVectorization extends OperatorExplainVectorization { public FileSinkOperatorExplainVectorization(VectorDesc vectorDesc) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java index 03202fb..6fad710 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java @@ -22,6 +22,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.PTFUtils; +import org.apache.hadoop.hive.ql.exec.Utilities; /** * LoadFileDesc. @@ -35,6 +36,7 @@ private String columns; private String columnTypes; private String destinationCreateTable; + private boolean isMmCtas; public LoadFileDesc() { } @@ -47,12 +49,13 @@ public LoadFileDesc(final LoadFileDesc o) { this.columns = o.columns; this.columnTypes = o.columnTypes; this.destinationCreateTable = o.destinationCreateTable; + this.isMmCtas = o.isMmCtas; } public LoadFileDesc(final CreateTableDesc createTableDesc, final CreateViewDesc createViewDesc, final Path sourcePath, final Path targetDir, final boolean isDfsDir, - final String columns, final String columnTypes) { - this(sourcePath, targetDir, isDfsDir, columns, columnTypes); + final String columns, final String columnTypes, boolean isMmCtas) { + this(sourcePath, targetDir, isDfsDir, columns, columnTypes, isMmCtas); if (createTableDesc != null && createTableDesc.getDatabaseName() != null && createTableDesc.getTableName() != null) { destinationCreateTable = (createTableDesc.getTableName().contains(".") ? "" : createTableDesc @@ -65,14 +68,15 @@ public LoadFileDesc(final CreateTableDesc createTableDesc, final CreateViewDesc } } - public LoadFileDesc(final Path sourcePath, final Path targetDir, - final boolean isDfsDir, final String columns, final String columnTypes) { - + public LoadFileDesc(final Path sourcePath, final Path targetDir, final boolean isDfsDir, + final String columns, final String columnTypes, boolean isMmCtas) { super(sourcePath); + Utilities.LOG14535.info("creating LFD from " + sourcePath + " to " + targetDir); this.targetDir = targetDir; this.isDfsDir = isDfsDir; this.columns = columns; this.columnTypes = columnTypes; + this.isMmCtas = isMmCtas; } @Explain(displayName = "destination") @@ -129,4 +133,8 @@ public void setColumnTypes(String columnTypes) { public String getDestinationCreateTable(){ return destinationCreateTable; } + + public boolean isMmCtas() { + return isMmCtas; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java index 2b01712..5e19729 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadMultiFilesDesc.java @@ -36,15 +36,23 @@ private String columns; private String columnTypes; private transient List srcDirs; + private transient List targetPrefixes; public LoadMultiFilesDesc() { } public LoadMultiFilesDesc(final List sourceDirs, final List targetDir, final boolean isDfsDir, final String columns, final String columnTypes) { + this(sourceDirs, targetDir, null, isDfsDir, columns, columnTypes); + } + + public LoadMultiFilesDesc(final List sourceDirs, final List targetDir, + List targetPrefixes, final boolean isDfsDir, final String columns, + final String columnTypes) { this.srcDirs = sourceDirs; this.targetDirs = targetDir; + this.targetPrefixes = targetPrefixes; this.isDfsDir = isDfsDir; this.columns = columns; this.columnTypes = columnTypes; @@ -106,4 +114,8 @@ public String getColumnTypes() { public void setColumnTypes(String columnTypes) { this.columnTypes = columnTypes; } + + public List getTargetPrefixes() { + return targetPrefixes; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java index aa77850..3201dc9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java @@ -23,6 +23,8 @@ import java.util.Map; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -41,10 +43,13 @@ // Need to remember whether this is an acid compliant operation, and if so whether it is an // insert, update, or delete. private AcidUtils.Operation writeType; + private Long txnId; + private int stmtId; // TODO: the below seems like they should just be combined into partitionDesc private org.apache.hadoop.hive.ql.plan.TableDesc table; private Map partitionSpec; // NOTE: this partitionSpec has to be ordered map + private boolean commitMmWriteId = true; public LoadTableDesc(final LoadTableDesc o) { super(o.getSourcePath()); @@ -62,9 +67,11 @@ public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, final Map partitionSpec, final boolean replace, - final AcidUtils.Operation writeType) { + final AcidUtils.Operation writeType, Long txnId) { super(sourcePath); - init(table, partitionSpec, replace, writeType); + Utilities.LOG14535.info("creating part LTD from " + sourcePath + " to " + + ((table.getProperties() == null) ? "null" : table.getTableName())); + init(table, partitionSpec, replace, writeType, txnId); } /** @@ -77,15 +84,16 @@ public LoadTableDesc(final Path sourcePath, public LoadTableDesc(final Path sourcePath, final TableDesc table, final Map partitionSpec, - final boolean replace) { - this(sourcePath, table, partitionSpec, replace, AcidUtils.Operation.NOT_ACID); + final boolean replace, + final Long txnId) { + this(sourcePath, table, partitionSpec, replace, AcidUtils.Operation.NOT_ACID, txnId); } public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, final Map partitionSpec, - final AcidUtils.Operation writeType) { - this(sourcePath, table, partitionSpec, true, writeType); + final AcidUtils.Operation writeType, Long txnId) { + this(sourcePath, table, partitionSpec, true, writeType, txnId); } /** @@ -96,20 +104,22 @@ public LoadTableDesc(final Path sourcePath, */ public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, - final Map partitionSpec) { - this(sourcePath, table, partitionSpec, true, AcidUtils.Operation.NOT_ACID); + final Map partitionSpec, Long txnId) { + this(sourcePath, table, partitionSpec, true, AcidUtils.Operation.NOT_ACID, txnId); } public LoadTableDesc(final Path sourcePath, final org.apache.hadoop.hive.ql.plan.TableDesc table, final DynamicPartitionCtx dpCtx, - final AcidUtils.Operation writeType) { + final AcidUtils.Operation writeType, + boolean isReplace, Long txnId) { super(sourcePath); + Utilities.LOG14535.info("creating LTD from " + sourcePath + " to " + table.getTableName()/*, new Exception()*/); this.dpCtx = dpCtx; if (dpCtx != null && dpCtx.getPartSpec() != null && partitionSpec == null) { - init(table, dpCtx.getPartSpec(), true, writeType); + init(table, dpCtx.getPartSpec(), isReplace, writeType, txnId); } else { - init(table, new LinkedHashMap(), true, writeType); + init(table, new LinkedHashMap(), isReplace, writeType, txnId); } } @@ -117,11 +127,12 @@ private void init( final org.apache.hadoop.hive.ql.plan.TableDesc table, final Map partitionSpec, final boolean replace, - AcidUtils.Operation writeType) { + AcidUtils.Operation writeType, Long txnId) { this.table = table; this.partitionSpec = partitionSpec; this.replace = replace; this.writeType = writeType; + this.txnId = txnId; } @Explain(displayName = "table", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) @@ -147,6 +158,15 @@ public boolean getReplace() { return replace; } + @Explain(displayName = "micromanaged table") + public Boolean isMmTableExplain() { + return isMmTable() ? true : null; + } + + public boolean isMmTable() { + return MetaStoreUtils.isInsertOnlyTable(table.getProperties()); + } + public void setReplace(boolean replace) { this.replace = replace; } @@ -184,4 +204,28 @@ public void setLbCtx(ListBucketingCtx lbCtx) { public AcidUtils.Operation getWriteType() { return writeType; } + + public Long getTxnId() { + return txnId; + } + + public void setTxnId(Long txnId) { + this.txnId = txnId; + } + + public int getStmtId() { + return stmtId; + } + + public void setStmtId(int stmtId) { + this.stmtId = stmtId; + } + + public void setIntermediateInMmWrite(boolean b) { + this.commitMmWriteId = !b; + } + + public boolean isCommitMmWrite() { + return commitMmWriteId; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 2e63260..0011d11 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -58,6 +58,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.JobConf; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Interner; /** @@ -157,6 +158,8 @@ /** Whether LLAP IO will be used for inputs. */ private String llapIoDesc; + private boolean isMergeFromResolver; + public MapWork() {} public MapWork(String name) { @@ -374,6 +377,7 @@ public void setNumMapTasks(Integer numMapTasks) { } @SuppressWarnings("nls") + @VisibleForTesting public void addMapWork(Path path, String alias, Operator work, PartitionDesc pd) { StringInternUtils.internUriStringsInPath(path); @@ -722,6 +726,14 @@ public VectorizedRowBatch getVectorizedRowBatch() { return vectorizedRowBatch; } + public void setIsMergeFromResolver(boolean b) { + this.isMergeFromResolver = b; + } + + public boolean isMergeFromResolver() { + return this.isMergeFromResolver; + } + /* * Whether the HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT variable * (hive.vectorized.use.vectorized.input.format) was true when the Vectorizer class evaluated diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java index 8ce211f..50adc42 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java @@ -23,6 +23,7 @@ import java.util.List; import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -55,11 +56,12 @@ * List of inserted partitions */ protected List movedParts; + private boolean isNoop; public MoveWork() { } - public MoveWork(HashSet inputs, HashSet outputs) { + private MoveWork(HashSet inputs, HashSet outputs) { this.inputs = inputs; this.outputs = outputs; } @@ -68,6 +70,8 @@ public MoveWork(HashSet inputs, HashSet outputs, final LoadTableDesc loadTableWork, final LoadFileDesc loadFileWork, boolean checkFileFormat, boolean srcLocal) { this(inputs, outputs); + Utilities.LOG14535.info("Creating MoveWork " + System.identityHashCode(this) + + " with " + loadTableWork + "; " + loadFileWork); this.loadTableWork = loadTableWork; this.loadFileWork = loadFileWork; this.checkFileFormat = checkFileFormat; @@ -77,10 +81,7 @@ public MoveWork(HashSet inputs, HashSet outputs, public MoveWork(HashSet inputs, HashSet outputs, final LoadTableDesc loadTableWork, final LoadFileDesc loadFileWork, boolean checkFileFormat) { - this(inputs, outputs); - this.loadTableWork = loadTableWork; - this.loadFileWork = loadFileWork; - this.checkFileFormat = checkFileFormat; + this(inputs, outputs, loadTableWork, loadFileWork, checkFileFormat, false); } public MoveWork(final MoveWork o) { @@ -152,4 +153,12 @@ public void setSrcLocal(boolean srcLocal) { this.srcLocal = srcLocal; } + // TODO# temporary test flag + public void setNoop(boolean b) { + this.isNoop = true; + } + + public boolean isNoop() { + return this.isNoop; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 157a697..bf8ee75 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -75,6 +75,8 @@ public void setBaseFileName(String baseFileName) { public PartitionDesc() { } + private final static org.slf4j.Logger LOG = org.slf4j.LoggerFactory.getLogger(PartitionDesc.class); + public PartitionDesc(final TableDesc table, final LinkedHashMap partSpec) { this.tableDesc = table; setPartSpec(partSpec); @@ -389,4 +391,13 @@ public void setVectorPartitionDesc(VectorPartitionDesc vectorPartitionDesc) { public VectorPartitionDesc getVectorPartitionDesc() { return vectorPartitionDesc; } + + @Override + public String toString() { + return "PartitionDesc [tableDesc=" + tableDesc + ", partSpec=" + partSpec + + ", inputFileFormatClass=" + inputFileFormatClass + + ", outputFileFormatClass=" + outputFileFormatClass + ", properties=" + + properties + ", baseFileName=" + baseFileName + + ", vectorPartitionDesc=" + vectorPartitionDesc + "]"; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index d82973c..52b8707 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -447,20 +447,6 @@ public static TableDesc getTableDesc(CreateViewDesc crtViewDesc, String cols, St } /** - * Generate the table descriptor of MetadataTypedColumnsetSerDe with the - * separatorCode. MetaDataTypedColumnsetSerDe is used because LazySimpleSerDe - * does not support a table with a single column "col" with type - * "array". - */ - public static TableDesc getDefaultTableDesc(String separatorCode) { - return new TableDesc( - TextInputFormat.class, IgnoreKeyTextOutputFormat.class, Utilities - .makeProperties( - org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT,separatorCode, - serdeConstants.SERIALIZATION_LIB,MetadataTypedColumnsetSerDe.class.getName())); - } - - /** * Generate the table descriptor for reduce key. */ public static TableDesc getReduceKeyTableDesc(List fieldSchemas, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index 8b7339d..249aaff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -162,8 +162,7 @@ public String getSerdeClassName() { @Explain(displayName = "name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getTableName() { - return properties - .getProperty(hive_metastoreConstants.META_TABLE_NAME); + return properties.getProperty(hive_metastoreConstants.META_TABLE_NAME); } @Explain(displayName = "input format") @@ -234,4 +233,11 @@ public boolean equals(Object o) { jobProperties.equals(target.jobProperties)); return ret; } + + @Override + public String toString() { + return "TableDesc [inputFileFormatClass=" + inputFileFormatClass + + ", outputFileFormatClass=" + outputFileFormatClass + ", properties=" + + properties + ", jobProperties=" + jobProperties + "]"; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index a88d061..ca20afb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -132,7 +132,7 @@ public TableScanDesc(final String alias, List vcs, Table tblMetad this.alias = alias; this.virtualCols = vcs; this.tableMetadata = tblMetadata; - isAcidTable = AcidUtils.isAcidTable(this.tableMetadata); + isAcidTable = AcidUtils.isFullAcidTable(this.tableMetadata); if (isAcidTable) { acidOperationalProperties = AcidUtils.getAcidOperationalProperties(this.tableMetadata); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java index 07df15a..03444c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.stats.StatsAggregator; import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; import org.slf4j.Logger; @@ -49,7 +50,7 @@ public boolean connect(StatsCollectionContext scc) { List statsDirs = scc.getStatsTmpDirs(); assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs; Path statsDir = new Path(statsDirs.get(0)); - LOG.debug("About to read stats from : " + statsDir); + Utilities.LOG14535.info("About to read stats from : " + statsDir); statsMap = new HashMap>(); try { @@ -69,13 +70,13 @@ public boolean accept(Path file) { } finally { SerializationUtilities.releaseKryo(kryo); } - LOG.info("Read stats : " +statsMap); + Utilities.LOG14535.info("Read stats : " +statsMap); statsList.add(statsMap); in.close(); } return true; } catch (IOException e) { - LOG.error("Failed to read stats from filesystem ", e); + Utilities.LOG14535.error("Failed to read stats from filesystem ", e); return false; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java index 5b4f1fb..a92465f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; import org.apache.hadoop.hive.ql.stats.StatsPublisher; import org.slf4j.Logger; @@ -76,7 +77,7 @@ public boolean connect(StatsCollectionContext context) { @Override public boolean publishStat(String partKV, Map stats) { - LOG.debug("Putting in map : " + partKV + "\t" + stats); + Utilities.LOG14535.info("Putting in map : " + partKV + "\t" + stats); // we need to do new hashmap, since stats object is reused across calls. Map cpy = new HashMap(stats); Map statMap = statsMap.get(partKV); @@ -105,7 +106,7 @@ public boolean closeConnection(StatsCollectionContext context) { statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX + conf.getInt("mapred.task.partition", 0)); } - LOG.debug("About to create stats file for this task : " + statsFile); + Utilities.LOG14535.info("About to create stats file for this task : " + statsFile); Output output = new Output(statsFile.getFileSystem(conf).create(statsFile,true)); LOG.debug("Created file : " + statsFile); LOG.debug("Writing stats in it : " + statsMap); @@ -118,7 +119,7 @@ public boolean closeConnection(StatsCollectionContext context) { output.close(); return true; } catch (IOException e) { - LOG.error("Failed to persist stats on filesystem",e); + Utilities.LOG14535.error("Failed to persist stats on filesystem",e); return false; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 04ef7fc..2f8c33c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.common.ValidCompactorTxnList; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.CompactionType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; @@ -71,6 +72,7 @@ import org.apache.hadoop.mapred.TaskAttemptContext; import org.apache.hadoop.mapred.lib.NullOutputFormat; import org.apache.hadoop.util.StringUtils; +import org.apache.hive.common.util.Ref; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -205,6 +207,16 @@ void run(HiveConf conf, String jobName, Table t, StorageDescriptor sd, if(conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST) && conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODEFAILCOMPACTION)) { throw new RuntimeException(HiveConf.ConfVars.HIVETESTMODEFAILCOMPACTION.name() + "=true"); } + + // For MM tables we don't need to launch MR jobs as there is no compaction needed. + // We just need to delete the directories for aborted transactions. + if (MetaStoreUtils.isInsertOnlyTable(t.getParameters())) { + LOG.debug("Going to delete directories for aborted transactions for MM table " + + t.getDbName() + "." + t.getTableName()); + removeFiles(conf, sd.getLocation(), txns, t); + return; + } + JobConf job = createBaseJobConf(conf, jobName, t, sd, txns, ci); // Figure out and encode what files we need to read. We do this here (rather than in @@ -352,6 +364,30 @@ private void setColumnTypes(JobConf job, List cols) { HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, HiveInputFormat.class.getName()); } + // Remove the directories for aborted transactions only + private void removeFiles(HiveConf conf, String location, ValidTxnList txnList, Table t) + throws IOException { + AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(location), conf, txnList, + Ref.from(false), false, t.getParameters()); + // For MM table, we only want to delete delta dirs for aborted txns. + List abortedDirs = dir.getAbortedDirectories(); + List filesToDelete = new ArrayList<>(abortedDirs.size()); + for (FileStatus stat : abortedDirs) { + filesToDelete.add(stat.getPath()); + } + if (filesToDelete.size() < 1) { + LOG.warn("Hmm, nothing to delete in the worker for directory " + location + + ", that hardly seems right."); + return; + } + LOG.info("About to remove " + filesToDelete.size() + " aborted directories from " + location); + FileSystem fs = filesToDelete.get(0).getFileSystem(conf); + for (Path dead : filesToDelete) { + LOG.debug("Going to delete path " + dead.toString()); + fs.delete(dead, true); + } + } + public JobConf getMrJob() { return mrJob; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorThread.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorThread.java index 4d6e24e..92d9f28 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorThread.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorThread.java @@ -65,7 +65,6 @@ public void setHiveConf(HiveConf conf) { @Override public void setThreadId(int threadId) { this.threadId = threadId; - } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java index af4a1da..c52bd3e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.CompactionRequest; import org.apache.hadoop.hive.metastore.api.CompactionResponse; import org.apache.hadoop.hive.metastore.api.CompactionType; @@ -251,6 +252,11 @@ public CompactionType run() throws Exception { private CompactionType determineCompactionType(CompactionInfo ci, ValidTxnList txns, StorageDescriptor sd, Map tblproperties) throws IOException, InterruptedException { + + if (MetaStoreUtils.isInsertOnlyTable(tblproperties)) { + return CompactionType.MINOR; + } + boolean noBase = false; Path location = new Path(sd.getLocation()); FileSystem fs = location.getFileSystem(conf); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index 4f1c7d8..fbe71c9 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -430,7 +430,6 @@ private static void pause(int timeMillis) { } } - @Test public void exchangePartition() throws Exception { runStatementOnDriver("create database ex1"); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 21b4a2c..bd6e6a0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -90,7 +90,8 @@ NONACIDORCTBL("nonAcidOrcTbl"), NONACIDPART("nonAcidPart", "p"), NONACIDPART2("nonAcidPart2", "p2"), - ACIDNESTEDPART("acidNestedPart", "p,q"); + ACIDNESTEDPART("acidNestedPart", "p,q"), + MMTBL("mmTbl"); private final String name; private final String partitionColumns; @@ -152,6 +153,7 @@ protected void setUpWithTableProperties(String tableProperties) throws Exception runStatementOnDriver("create table " + Table.ACIDNESTEDPART + "(a int, b int) partitioned by (p int, q int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES (" + tableProperties + ")"); + runStatementOnDriver("create table " + Table.MMTBL + "(a int, b int) TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')"); } protected void dropTables() throws Exception { @@ -757,7 +759,7 @@ public void testNonAcidToAcidConversion3() throws Exception { FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(buckets); if (numDelta == 1) { - Assert.assertEquals("delta_0000022_0000022_0000", status[i].getPath().getName()); + Assert.assertEquals("delta_0000024_0000024_0000", status[i].getPath().getName()); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } else if (numDelta == 2) { @@ -843,7 +845,7 @@ public void testNonAcidToAcidConversion3() throws Exception { status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, status.length); - Assert.assertEquals("base_0000023", status[0].getPath().getName()); + Assert.assertEquals("base_0000025", status[0].getPath().getName()); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(buckets); Assert.assertEquals(1, buckets.length); @@ -1990,6 +1992,64 @@ public void testInsertOverwrite2() throws Exception { } /** + * Test compaction for Micro-managed table + * 1. Regular compaction shouldn't impact any valid subdirectories of MM tables + * 2. Compactions will only remove subdirectories for aborted transactions of MM tables, if any + * @throws Exception + */ + @Test + public void testMmTableCompaction() throws Exception { + // 1. Insert some rows into MM table + runStatementOnDriver("insert into " + Table.MMTBL + "(a,b) values(1,2)"); + runStatementOnDriver("insert into " + Table.MMTBL + "(a,b) values(3,4)"); + // There should be 2 delta directories + verifyDirAndResult(2); + + // 2. Perform a MINOR compaction. Since nothing was aborted, subdirs should stay. + runStatementOnDriver("alter table "+ Table.MMTBL + " compact 'MINOR'"); + runWorker(hiveConf); + verifyDirAndResult(2); + + // 3. Let a transaction be aborted + hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, true); + runStatementOnDriver("insert into " + Table.MMTBL + "(a,b) values(5,6)"); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, false); + // There should be 3 delta directories. The new one is the aborted one. + verifyDirAndResult(3); + + // 4. Perform a MINOR compaction again. This time it will remove the subdir for aborted transaction. + runStatementOnDriver("alter table "+ Table.MMTBL + " compact 'MINOR'"); + runWorker(hiveConf); + // The worker should remove the subdir for aborted transaction + verifyDirAndResult(2); + + // 5. Run Cleaner. Shouldn't impact anything. + runCleaner(hiveConf); + verifyDirAndResult(2); + } + + private void verifyDirAndResult(int expectedDeltas) throws Exception { + FileSystem fs = FileSystem.get(hiveConf); + // Verify the content of subdirs + FileStatus[] status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + + (Table.MMTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + int sawDeltaTimes = 0; + for (int i = 0; i < status.length; i++) { + Assert.assertTrue(status[i].getPath().getName().matches("delta_.*")); + sawDeltaTimes++; + FileStatus[] files = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); + Assert.assertEquals(1, files.length); + Assert.assertTrue(files[0].getPath().getName().equals("000000_0")); + } + Assert.assertEquals(expectedDeltas, sawDeltaTimes); + + // Verify query result + int [][] resultData = new int[][] {{1,2}, {3,4}}; + List rs = runStatementOnDriver("select a,b from " + Table.MMTBL); + Assert.assertEquals(stringifyValues(resultData), rs); + } + + /** * takes raw data and turns it into a string as if from Driver.getResults() * sorts rows in dictionary order */ diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java index b4898e2..61eab3d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java @@ -140,7 +140,7 @@ db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true); db.createTable(src, cols, null, TextInputFormat.class, HiveIgnoreKeyTextOutputFormat.class); - db.loadTable(hadoopDataFile[i], src, false, true, false, false, false); + db.loadTable(hadoopDataFile[i], src, false, true, false, false, false, null, 0, false); i++; } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java index a8d7c9c..4938e2f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java @@ -285,7 +285,8 @@ private FileSinkOperator getFileSink(AcidUtils.Operation writeType, partColMap.put(PARTCOL_NAME, null); DynamicPartitionCtx dpCtx = new DynamicPartitionCtx(null, partColMap, "Sunday", 100); //todo: does this need the finalDestination? - desc = new FileSinkDesc(basePath, tableDesc, false, 1, false, false, 1, 1, partCols, dpCtx, null); + desc = new FileSinkDesc(basePath, tableDesc, false, 1, false, + false, 1, 1, partCols, dpCtx, null, null, false); } else { desc = new FileSinkDesc(basePath, tableDesc, false); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java index 06e4f98..76618ff 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java @@ -189,7 +189,7 @@ public void testOriginalDeltas() throws Exception { new MockFile("mock:/tbl/part1/delta_050_100/bucket_0", 0, new byte[0]), new MockFile("mock:/tbl/part1/delta_101_101/bucket_0", 0, new byte[0])); AcidUtils.Directory dir = - AcidUtils.getAcidState(new TestInputOutputFormat.MockPath(fs, + AcidUtils.getAcidState(new MockPath(fs, "mock:/tbl/part1"), conf, new ValidReadTxnList("100:" + Long.MAX_VALUE + ":")); assertEquals(null, dir.getBaseDirectory()); List obsolete = dir.getObsolete(); @@ -231,7 +231,7 @@ public void testBaseDeltas() throws Exception { new MockFile("mock:/tbl/part1/delta_050_105/bucket_0", 0, new byte[0]), new MockFile("mock:/tbl/part1/delta_90_120/bucket_0", 0, new byte[0])); AcidUtils.Directory dir = - AcidUtils.getAcidState(new TestInputOutputFormat.MockPath(fs, + AcidUtils.getAcidState(new MockPath(fs, "mock:/tbl/part1"), conf, new ValidReadTxnList("100:" + Long.MAX_VALUE + ":")); assertEquals("mock:/tbl/part1/base_49", dir.getBaseDirectory().toString()); List obsolete = dir.getObsolete(); @@ -527,7 +527,7 @@ public void testBaseWithDeleteDeltas() throws Exception { new MockFile("mock:/tbl/part1/delete_delta_050_105/bucket_0", 0, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_110_110/bucket_0", 0, new byte[0])); AcidUtils.Directory dir = - AcidUtils.getAcidState(new TestInputOutputFormat.MockPath(fs, + AcidUtils.getAcidState(new MockPath(fs, "mock:/tbl/part1"), conf, new ValidReadTxnList("100:" + Long.MAX_VALUE + ":")); assertEquals("mock:/tbl/part1/base_49", dir.getBaseDirectory().toString()); List obsolete = dir.getObsolete(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java index dae85a6..72910d0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java @@ -46,19 +46,19 @@ public void testGetPartitionDescFromPathRecursively() throws IOException { // first group PartitionDesc ret = null; - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part3"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part3 not found.", partDesc_3, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("/tbl/par1/part2/part3"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part3 not found.", partDesc_3, ret); boolean exception = false; try { - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part3"), IOPrepareCache.get().allocatePartitionDescMap()); } catch (IOException e) { @@ -69,17 +69,17 @@ public void testGetPartitionDescFromPathRecursively() throws IOException { exception = false; // second group - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part4"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part4 not found.", partDesc_4, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("/tbl/par1/part2/part4"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part4 not found.", partDesc_4, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part4"), IOPrepareCache.get().allocatePartitionDescMap()); @@ -87,24 +87,24 @@ public void testGetPartitionDescFromPathRecursively() throws IOException { ret); // third group - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part5"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part5 not found.", partDesc_5, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("/tbl/par1/part2/part5"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part5 not found.", partDesc_5, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part5"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("hdfs:///tbl/par1/part2/part5 not found", partDesc_5, ret); // fourth group try { - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part6"), IOPrepareCache.get().allocatePartitionDescMap()); } catch (IOException e) { @@ -114,12 +114,12 @@ public void testGetPartitionDescFromPathRecursively() throws IOException { exception); exception = false; - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("/tbl/par1/part2/part6"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part6 not found.", partDesc_6, ret); - ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part6"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("hdfs:///tbl/par1/part2/part6 not found.", partDesc_6, ret); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 4c30732..ccd7d8e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -46,15 +46,7 @@ import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FSInputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -2619,6 +2611,7 @@ public void testDoAs() throws Exception { conf.setClass("fs.mock.impl", MockFileSystem.class, FileSystem.class); String badUser = UserGroupInformation.getCurrentUser().getShortUserName() + "-foo"; MockFileSystem.setBlockedUgi(badUser); + // TODO: could we instead get FS from path here and add normal files for every UGI? MockFileSystem.clearGlobalFiles(); OrcInputFormat.Context.resetThreadPool(); // We need the size above to take effect. try { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java index 91eb033..0580959 100755 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java @@ -326,8 +326,8 @@ private void validateTable(Table tbl, String tableName) throws MetaException { tbl.setCreateTime(ft.getTTable().getCreateTime()); tbl.getParameters().put(hive_metastoreConstants.DDL_TIME, ft.getParameters().get(hive_metastoreConstants.DDL_TIME)); - assertTrue("Tables doesn't match: " + tableName, ft.getTTable() - .equals(tbl.getTTable())); + assertTrue("Tables doesn't match: " + tableName + " (" + ft.getTTable() + + "; " + tbl.getTTable() + ")", ft.getTTable().equals(tbl.getTTable())); assertEquals("SerializationLib is not set correctly", tbl .getSerializationLib(), ft.getSerializationLib()); assertEquals("Serde is not set correctly", tbl.getDeserializer() diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java index e7ce234..91c4b44 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java @@ -85,17 +85,20 @@ public void testMovePathsThatCannotBeMerged() { GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork)); reset(mockWork); - when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condInputPath, condOutputPath, false, "", "")); + when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( + condInputPath, condOutputPath, false, "", "", false)); assertFalse("Merging paths is not allowed when both conditional output path is not equals to MoveWork input path.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork)); reset(mockWork); - when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, new Path("unused"), false, "", "")); + when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( + condOutputPath, new Path("unused"), false, "", "", false)); assertFalse("Merging paths is not allowed when conditional input path is not a BlobStore path.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, new Path("hdfs://hdfs-path"), condOutputPath, mockWork)); reset(mockWork); - when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, new Path("hdfs://hdfs-path"), false, "", "")); + when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( + condOutputPath, new Path("hdfs://hdfs-path"), false, "", "", false)); assertFalse("Merging paths is not allowed when MoveWork output path is not a BlobStore path.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork)); } @@ -107,7 +110,8 @@ public void testMovePathsThatCanBeMerged() { final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003"); final MoveWork mockWork = mock(MoveWork.class); - when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", "")); + when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( + condOutputPath, targetMoveWorkPath, false, "", "", false)); assertTrue("Merging BlobStore paths should be allowed.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork)); @@ -131,7 +135,8 @@ public void testMergePathValidMoveWorkReturnsNewMoveWork() { MoveWork newWork; // test using loadFileWork - when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", "")); + when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( + condOutputPath, targetMoveWorkPath, false, "", "", false)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork); assertNotNull(newWork); assertNotEquals(newWork, mockWork); @@ -141,7 +146,8 @@ public void testMergePathValidMoveWorkReturnsNewMoveWork() { // test using loadTableWork TableDesc tableDesc = new TableDesc(); reset(mockWork); - when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc(condOutputPath, tableDesc, null)); + when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc( + condOutputPath, tableDesc, null, null)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork); assertNotNull(newWork); assertNotEquals(newWork, mockWork); @@ -276,7 +282,7 @@ private FileSinkOperator createFileSinkOperator(Path finalDirName) { private Task createMoveTask(Path source, Path destination) { Task moveTask = mock(MoveTask.class); MoveWork moveWork = new MoveWork(); - moveWork.setLoadFileWork(new LoadFileDesc(source, destination, true, null, null)); + moveWork.setLoadFileWork(new LoadFileDesc(source, destination, true, null, null, false)); when(moveTask.getWork()).thenReturn(moveWork); diff --git a/ql/src/test/queries/clientnegative/mm_bucket_convert.q b/ql/src/test/queries/clientnegative/mm_bucket_convert.q new file mode 100644 index 0000000..2ded047 --- /dev/null +++ b/ql/src/test/queries/clientnegative/mm_bucket_convert.q @@ -0,0 +1,18 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + +drop table bucket0_mm; +drop table bucket1_mm; +create table bucket0_mm(key int, id int) clustered by (key) into 2 buckets + tblproperties("transactional"="true", "transactional_properties"="insert_only"); +create table bucket1_mm(key int, id int) clustered by (key) into 2 buckets + tblproperties("transactional"="true", "transactional_properties"="insert_only"); + +set hive.strict.checks.bucketing=false; +alter table bucket0_mm unset tblproperties('transactional_properties', 'transactional'); +set hive.strict.checks.bucketing=true; +alter table bucket1_mm unset tblproperties('transactional_properties', 'transactional'); + + diff --git a/ql/src/test/queries/clientnegative/mm_concatenate.q b/ql/src/test/queries/clientnegative/mm_concatenate.q new file mode 100644 index 0000000..4b13c60 --- /dev/null +++ b/ql/src/test/queries/clientnegative/mm_concatenate.q @@ -0,0 +1,8 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +create table concat_mm (id int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only"); + +insert into table concat_mm select key from src limit 10; + +alter table concat_mm concatenate; diff --git a/ql/src/test/queries/clientnegative/mm_truncate_cols.q b/ql/src/test/queries/clientnegative/mm_truncate_cols.q new file mode 100644 index 0000000..3335ed8 --- /dev/null +++ b/ql/src/test/queries/clientnegative/mm_truncate_cols.q @@ -0,0 +1,3 @@ +CREATE TABLE mm_table(key int, value string) stored as rcfile tblproperties ("transactional"="true", "transactional_properties"="insert_only"); + +TRUNCATE TABLE mm_table COLUMNS (value); diff --git a/ql/src/test/queries/clientpositive/mm_all.q b/ql/src/test/queries/clientpositive/mm_all.q new file mode 100644 index 0000000..e23260f --- /dev/null +++ b/ql/src/test/queries/clientpositive/mm_all.q @@ -0,0 +1,325 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set tez.grouping.min-size=1; +set tez.grouping.max-size=2; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + + +-- Force multiple writers when reading +drop table intermediate; +create table intermediate(key int) partitioned by (p int) stored as orc; +insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2; +insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2; +insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2; + + +drop table part_mm; +create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +explain insert into table part_mm partition(key_mm=455) select key from intermediate; +insert into table part_mm partition(key_mm=455) select key from intermediate; +insert into table part_mm partition(key_mm=456) select key from intermediate; +insert into table part_mm partition(key_mm=455) select key from intermediate; +select * from part_mm order by key, key_mm; + +-- TODO: doesn't work truncate table part_mm partition(key_mm=455); +select * from part_mm order by key, key_mm; +truncate table part_mm; +select * from part_mm order by key, key_mm; +drop table part_mm; + +drop table simple_mm; +create table simple_mm(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table simple_mm select key from intermediate; +select * from simple_mm order by key; +insert into table simple_mm select key from intermediate; +select * from simple_mm order by key; +truncate table simple_mm; +select * from simple_mm; +drop table simple_mm; + + +-- simple DP (no bucketing) +drop table dp_mm; + +set hive.exec.dynamic.partition.mode=nonstrict; + +set hive.merge.mapredfiles=false; +set hive.merge.sparkfiles=false; +set hive.merge.tezfiles=false; + +create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ("transactional"="true", "transactional_properties"="insert_only"); + +insert into table dp_mm partition (key1='123', key2) select key, key from intermediate; + +select * from dp_mm order by key; + +drop table dp_mm; + + +-- union + +create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table union_mm +select temps.p from ( +select key as p from intermediate +union all +select key + 1 as p from intermediate ) temps; + +select * from union_mm order by id; + +insert into table union_mm +select p from +( +select key + 1 as p from intermediate +union all +select key from intermediate +) tab group by p +union all +select key + 2 as p from intermediate; + +select * from union_mm order by id; + +insert into table union_mm +SELECT p FROM +( + SELECT key + 1 as p FROM intermediate + UNION ALL + SELECT key as p FROM ( + SELECT distinct key FROM ( + SELECT key FROM ( + SELECT key + 2 as key FROM intermediate + UNION ALL + SELECT key FROM intermediate + )t1 + group by key)t2 + )t3 +)t4 +group by p; + + +select * from union_mm order by id; +drop table union_mm; + + +create table partunion_mm(id int) partitioned by (key int) tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table partunion_mm partition(key) +select temps.* from ( +select key as p, key from intermediate +union all +select key + 1 as p, key + 1 from intermediate ) temps; + +select * from partunion_mm order by id; +drop table partunion_mm; + + + +create table skew_mm(k1 int, k2 int, k4 int) skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) + stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only"); + +insert into table skew_mm +select key, key, key from intermediate; + +select * from skew_mm order by k2, k1, k4; +drop table skew_mm; + + +create table skew_dp_union_mm(k1 int, k2 int, k4 int) partitioned by (k3 int) +skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only"); + +insert into table skew_dp_union_mm partition (k3) +select key as i, key as j, key as k, key as l from intermediate +union all +select key +1 as i, key +2 as j, key +3 as k, key +4 as l from intermediate; + + +select * from skew_dp_union_mm order by k2, k1, k4; +drop table skew_dp_union_mm; + + + +set hive.merge.orcfile.stripe.level=true; +set hive.merge.tezfiles=true; +set hive.merge.mapfiles=true; +set hive.merge.mapredfiles=true; + + +create table merge0_mm (id int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only"); + +insert into table merge0_mm select key from intermediate; +select * from merge0_mm; + +set tez.grouping.split-count=1; +insert into table merge0_mm select key from intermediate; +set tez.grouping.split-count=0; +select * from merge0_mm; + +drop table merge0_mm; + + +create table merge2_mm (id int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); + +insert into table merge2_mm select key from intermediate; +select * from merge2_mm; + +set tez.grouping.split-count=1; +insert into table merge2_mm select key from intermediate; +set tez.grouping.split-count=0; +select * from merge2_mm; + +drop table merge2_mm; + + +create table merge1_mm (id int) partitioned by (key int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only"); + +insert into table merge1_mm partition (key) select key, key from intermediate; +select * from merge1_mm order by id, key; + +set tez.grouping.split-count=1; +insert into table merge1_mm partition (key) select key, key from intermediate; +set tez.grouping.split-count=0; +select * from merge1_mm order by id, key; + +drop table merge1_mm; + +set hive.merge.tezfiles=false; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- TODO: need to include merge+union+DP, but it's broken for now + + +drop table ctas0_mm; +create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate; +select * from ctas0_mm; +drop table ctas0_mm; + +drop table ctas1_mm; +create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as + select * from intermediate union all select * from intermediate; +select * from ctas1_mm; +drop table ctas1_mm; + + +drop table multi0_1_mm; +drop table multi0_2_mm; +create table multi0_1_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +create table multi0_2_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); + +--from intermediate +--insert overwrite table multi0_1_mm select key, p +--insert overwrite table multi0_2_mm select p, key; +insert into table multi0_1_mm select key, p from intermediate; +insert into table multi0_2_mm select p, key from intermediate; + +select * from multi0_1_mm order by key, key2; +select * from multi0_2_mm order by key, key2; + +set hive.merge.mapredfiles=true; +set hive.merge.sparkfiles=true; +set hive.merge.tezfiles=true; + +--from intermediate +--insert into table multi0_1_mm select p, key +--insert overwrite table multi0_2_mm select key, p; +insert into table multi0_1_mm select p, key from intermediate; +insert into table multi0_2_mm select key, p from intermediate; +select * from multi0_1_mm order by key, key2; +select * from multi0_2_mm order by key, key2; + +set hive.merge.mapredfiles=false; +set hive.merge.sparkfiles=false; +set hive.merge.tezfiles=false; + +drop table multi0_1_mm; +drop table multi0_2_mm; + + +drop table multi1_mm; +create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +from intermediate +insert into table multi1_mm partition(p=1) select p, key +insert into table multi1_mm partition(p=2) select key, p; +select * from multi1_mm order by key, key2, p; +--from intermediate +--insert into table multi1_mm partition(p=2) select p, key +--insert overwrite table multi1_mm partition(p=1) select key, p; +insert into table multi1_mm partition(p=2) select p, key from intermediate; +insert into table multi1_mm partition(p=1) select key, p from intermediate; +select * from multi1_mm order by key, key2, p; + +from intermediate +insert into table multi1_mm partition(p) select p, key, p +insert into table multi1_mm partition(p=1) select key, p; +select key, key2, p from multi1_mm order by key, key2, p; + +from intermediate +insert into table multi1_mm partition(p) select p, key, 1 +insert into table multi1_mm partition(p=1) select key, p; +select key, key2, p from multi1_mm order by key, key2, p; +drop table multi1_mm; + + + + +set datanucleus.cache.collections=false; +set hive.stats.autogather=true; + +drop table stats_mm; +create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +--insert overwrite table stats_mm select key from intermediate; +insert into table stats_mm select key from intermediate; +desc formatted stats_mm; + +insert into table stats_mm select key from intermediate; +desc formatted stats_mm; +drop table stats_mm; + +drop table stats2_mm; +create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src; +desc formatted stats2_mm; +drop table stats2_mm; + + +set hive.optimize.skewjoin=true; +set hive.skewjoin.key=2; +set hive.optimize.metadataonly=false; + +CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT into TABLE skewjoin_mm SELECT src1.key, src2.value; +select count(distinct key) from skewjoin_mm; +drop table skewjoin_mm; + +set hive.optimize.skewjoin=false; + +set hive.optimize.index.filter=true; +set hive.auto.convert.join=false; +CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +INSERT INTO parquet1_mm VALUES(1), (2); +CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +INSERT INTO parquet2_mm VALUES(1, 'value1'); +INSERT INTO parquet2_mm VALUES(1, 'value2'); +select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm + JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id + JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id +where t1.value = 'value1' and t2.value = 'value2'; +drop table parquet1_mm; +drop table parquet2_mm; + +set hive.auto.convert.join=true; + + +DROP TABLE IF EXISTS temp1; +CREATE TEMPORARY TABLE temp1 (a int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only"); +INSERT INTO temp1 SELECT key FROM intermediate; +DESC EXTENDED temp1; +SELECT * FROM temp1; + + +drop table intermediate; + + + diff --git a/ql/src/test/queries/clientpositive/mm_buckets.q b/ql/src/test/queries/clientpositive/mm_buckets.q new file mode 100644 index 0000000..d5a047a --- /dev/null +++ b/ql/src/test/queries/clientpositive/mm_buckets.q @@ -0,0 +1,66 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set tez.grouping.min-size=1; +set tez.grouping.max-size=2; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + + +-- Bucketing tests are slow and some tablesample ones don't work w/o MM + +-- Force multiple writers when reading +drop table intermediate; +create table intermediate(key int) partitioned by (p int) stored as orc; +insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2; +insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2; +insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2; + + + +drop table bucket0_mm; +create table bucket0_mm(key int, id int) +clustered by (key) into 2 buckets +tblproperties("transactional"="true", "transactional_properties"="insert_only"); +insert into table bucket0_mm select key, key from intermediate; +select * from bucket0_mm order by key, id; +select * from bucket0_mm tablesample (bucket 1 out of 2) s; +select * from bucket0_mm tablesample (bucket 2 out of 2) s; +insert into table bucket0_mm select key, key from intermediate; +select * from bucket0_mm order by key, id; +select * from bucket0_mm tablesample (bucket 1 out of 2) s; +select * from bucket0_mm tablesample (bucket 2 out of 2) s; +drop table bucket0_mm; + + +drop table bucket1_mm; +create table bucket1_mm(key int, id int) partitioned by (key2 int) +clustered by (key) sorted by (key) into 2 buckets +tblproperties("transactional"="true", "transactional_properties"="insert_only"); +insert into table bucket1_mm partition (key2) +select key + 1, key, key - 1 from intermediate +union all +select key - 1, key, key + 1 from intermediate; +select * from bucket1_mm order by key, id; +select * from bucket1_mm tablesample (bucket 1 out of 2) s order by key, id; +select * from bucket1_mm tablesample (bucket 2 out of 2) s order by key, id; +drop table bucket1_mm; + + + +drop table bucket2_mm; +create table bucket2_mm(key int, id int) +clustered by (key) into 10 buckets +tblproperties("transactional"="true", "transactional_properties"="insert_only"); +insert into table bucket2_mm select key, key from intermediate where key == 0; +select * from bucket2_mm order by key, id; +select * from bucket2_mm tablesample (bucket 1 out of 10) s order by key, id; +select * from bucket2_mm tablesample (bucket 4 out of 10) s order by key, id; +insert into table bucket2_mm select key, key from intermediate where key in (0, 103); +select * from bucket2_mm; +select * from bucket2_mm tablesample (bucket 1 out of 10) s order by key, id; +select * from bucket2_mm tablesample (bucket 4 out of 10) s order by key, id; +drop table bucket2_mm; + +drop table intermediate; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/mm_conversions.q b/ql/src/test/queries/clientpositive/mm_conversions.q new file mode 100644 index 0000000..62faeac --- /dev/null +++ b/ql/src/test/queries/clientpositive/mm_conversions.q @@ -0,0 +1,86 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set tez.grouping.min-size=1; +set tez.grouping.max-size=2; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +-- Force multiple writers when reading +drop table intermediate; +create table intermediate(key int) partitioned by (p int) stored as orc; +insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 1; +insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 1; +insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 1; + +drop table simple_from_mm1; +create table simple_from_mm1(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table simple_from_mm1 select key from intermediate; +insert into table simple_from_mm1 select key from intermediate; +select * from simple_from_mm1 s1 order by key; +alter table simple_from_mm1 unset tblproperties('transactional_properties', 'transactional'); +select * from simple_from_mm1 s2 order by key; +insert into table simple_from_mm1 select key from intermediate; +select * from simple_from_mm1 s3 order by key; +drop table simple_from_mm1; + +drop table simple_from_mm2; +create table simple_from_mm2(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table simple_from_mm2 select key from intermediate; +insert into table simple_from_mm2 select key from intermediate; +select * from simple_from_mm2 s1 order by key; +alter table simple_from_mm2 set tblproperties("transactional"="false", 'transactional_properties'='false'); +select * from simple_from_mm2 s2 order by key; +insert into table simple_from_mm2 select key from intermediate; +select * from simple_from_mm2 s3 order by key; +drop table simple_from_mm2; + +drop table simple_to_mm; +create table simple_to_mm(key int) stored as orc; +insert into table simple_to_mm select key from intermediate; +select * from simple_to_mm s1 order by key; +alter table simple_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only"); +select * from simple_to_mm s2 order by key; +insert into table simple_to_mm select key from intermediate; +insert into table simple_to_mm select key from intermediate; +select * from simple_to_mm s3 order by key; +drop table simple_to_mm; + +drop table part_from_mm1; +create table part_from_mm1(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table part_from_mm1 partition(key_mm='455') select key from intermediate; +insert into table part_from_mm1 partition(key_mm='455') select key from intermediate; +insert into table part_from_mm1 partition(key_mm='456') select key from intermediate; +select * from part_from_mm1 s1 order by key, key_mm; +alter table part_from_mm1 unset tblproperties('transactional_properties', 'transactional'); +select * from part_from_mm1 s2 order by key, key_mm; +insert into table part_from_mm1 partition(key_mm='456') select key from intermediate; +insert into table part_from_mm1 partition(key_mm='457') select key from intermediate; +select * from part_from_mm1 s3 order by key, key_mm; +drop table part_from_mm1; + +drop table part_from_mm2; +create table part_from_mm2(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +insert into table part_from_mm2 partition(key_mm='456') select key from intermediate;--fails here +insert into table part_from_mm2 partition(key_mm='455') select key from intermediate; +select * from part_from_mm2 s1 order by key, key_mm; +alter table part_from_mm2 set tblproperties("transactional"="false", 'transactional_properties'='false'); +select * from part_from_mm2 s2 order by key, key_mm; +insert into table part_from_mm2 partition(key_mm='457') select key from intermediate; +select * from part_from_mm2 s3 order by key, key_mm; +drop table part_from_mm2; + +drop table part_to_mm; +create table part_to_mm(key int) partitioned by (key_mm int) stored as orc; +insert into table part_to_mm partition(key_mm='455') select key from intermediate; +insert into table part_to_mm partition(key_mm='456') select key from intermediate; +select * from part_to_mm s1 order by key, key_mm; +alter table part_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only"); +select * from part_to_mm s2 order by key, key_mm; +insert into table part_to_mm partition(key_mm='456') select key from intermediate; +insert into table part_to_mm partition(key_mm='457') select key from intermediate; +select * from part_to_mm s3 order by key, key_mm; +drop table part_to_mm; + +drop table intermediate; diff --git a/ql/src/test/queries/clientpositive/mm_exchangepartition.q b/ql/src/test/queries/clientpositive/mm_exchangepartition.q new file mode 100644 index 0000000..0c04136 --- /dev/null +++ b/ql/src/test/queries/clientpositive/mm_exchangepartition.q @@ -0,0 +1,68 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +drop database if exists ex1; +drop database if exists ex2; + +create database ex1; +create database ex2; + +CREATE TABLE ex1.exchange_part_test1 (f1 string) PARTITIONED BY (ds STRING) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only"); +CREATE TABLE ex2.exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only"); +SHOW PARTITIONS ex1.exchange_part_test1; +SHOW PARTITIONS ex2.exchange_part_test2; + +ALTER TABLE ex2.exchange_part_test2 ADD PARTITION (ds='2013-04-05'); +SHOW PARTITIONS ex1.exchange_part_test1; +SHOW PARTITIONS ex2.exchange_part_test2; + +ALTER TABLE ex1.exchange_part_test1 EXCHANGE PARTITION (ds='2013-04-05') WITH TABLE ex2.exchange_part_test2; +SHOW PARTITIONS ex1.exchange_part_test1; +SHOW PARTITIONS ex2.exchange_part_test2; + + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; +DROP TABLE IF EXISTS t4; +DROP TABLE IF EXISTS t5; +DROP TABLE IF EXISTS t6; + +CREATE TABLE t1 (a int) PARTITIONED BY (d1 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only"); +CREATE TABLE t2 (a int) PARTITIONED BY (d1 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only"); +CREATE TABLE t3 (a int) PARTITIONED BY (d1 int, d2 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only"); +CREATE TABLE t4 (a int) PARTITIONED BY (d1 int, d2 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only"); +CREATE TABLE t5 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only"); +CREATE TABLE t6 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only"); +set hive.mapred.mode=nonstrict; + +INSERT INTO TABLE t1 PARTITION (d1 = 1) SELECT key FROM src where key = 100 limit 1; +INSERT INTO TABLE t3 PARTITION (d1 = 1, d2 = 1) SELECT key FROM src where key = 100 limit 1; +INSERT INTO TABLE t5 PARTITION (d1 = 1, d2 = 1, d3=1) SELECT key FROM src where key = 100 limit 1; + +SELECT * FROM t1; + +SELECT * FROM t3; + +SELECT * FROM t5; + +ALTER TABLE t2 EXCHANGE PARTITION (d1 = 1) WITH TABLE t1; +SELECT * FROM t1; +SELECT * FROM t2; + +ALTER TABLE t4 EXCHANGE PARTITION (d1 = 1, d2 = 1) WITH TABLE t3; +SELECT * FROM t3; +SELECT * FROM t4; + +ALTER TABLE t6 EXCHANGE PARTITION (d1 = 1, d2 = 1, d3 = 1) WITH TABLE t5; +SELECT * FROM t5; +SELECT * FROM t6; + +DROP DATABASE ex1 CASCADE; +DROP DATABASE ex2 CASCADE; +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; +DROP TABLE t4; +DROP TABLE t5; +DROP TABLE t6; diff --git a/ql/src/test/queries/clientpositive/mm_exim.q b/ql/src/test/queries/clientpositive/mm_exim.q new file mode 100644 index 0000000..8a03f4d --- /dev/null +++ b/ql/src/test/queries/clientpositive/mm_exim.q @@ -0,0 +1,98 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set tez.grouping.min-size=1; +set tez.grouping.max-size=2; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + + +drop table intermediate; +create table intermediate(key int) partitioned by (p int) stored as orc; +insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2; +insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2; +insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2; + +drop table intermediate_nonpart; +drop table intermmediate_part; +drop table intermmediate_nonpart; +create table intermediate_nonpart(key int, p int); +insert into intermediate_nonpart select * from intermediate; +create table intermmediate_nonpart(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +insert into intermmediate_nonpart select * from intermediate; +create table intermmediate(key int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +insert into table intermmediate partition(p) select key, p from intermediate; + +set hive.exim.test.mode=true; + +export table intermediate_nonpart to 'ql/test/data/exports/intermediate_nonpart'; +export table intermmediate_nonpart to 'ql/test/data/exports/intermmediate_nonpart'; +export table intermediate to 'ql/test/data/exports/intermediate_part'; +export table intermmediate to 'ql/test/data/exports/intermmediate_part'; + +drop table intermediate_nonpart; +drop table intermmediate_part; +drop table intermmediate_nonpart; + +-- non-MM export to MM table, with and without partitions + +drop table import0_mm; +create table import0_mm(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +import table import0_mm from 'ql/test/data/exports/intermediate_nonpart'; +select * from import0_mm order by key, p; +drop table import0_mm; + + + +drop table import1_mm; +create table import1_mm(key int) partitioned by (p int) + stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only"); +import table import1_mm from 'ql/test/data/exports/intermediate_part'; +select * from import1_mm order by key, p; +drop table import1_mm; + + +-- MM export into new MM table, non-part and part + +drop table import2_mm; +import table import2_mm from 'ql/test/data/exports/intermmediate_nonpart'; +desc import2_mm; +select * from import2_mm order by key, p; +drop table import2_mm; + +drop table import3_mm; +import table import3_mm from 'ql/test/data/exports/intermmediate_part'; +desc import3_mm; +select * from import3_mm order by key, p; +drop table import3_mm; + +-- MM export into existing MM table, non-part and partial part + +drop table import4_mm; +create table import4_mm(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +import table import4_mm from 'ql/test/data/exports/intermmediate_nonpart'; +select * from import4_mm order by key, p; +drop table import4_mm; + +drop table import5_mm; +create table import5_mm(key int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only"); +import table import5_mm partition(p=455) from 'ql/test/data/exports/intermmediate_part'; +select * from import5_mm order by key, p; +drop table import5_mm; + +-- MM export into existing non-MM table, non-part and part + +drop table import6_mm; +create table import6_mm(key int, p int); +import table import6_mm from 'ql/test/data/exports/intermmediate_nonpart'; +select * from import6_mm order by key, p; +drop table import6_mm; + +drop table import7_mm; +create table import7_mm(key int) partitioned by (p int); +import table import7_mm from 'ql/test/data/exports/intermmediate_part'; +select * from import7_mm order by key, p; +drop table import7_mm; + +set hive.exim.test.mode=false; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/mm_loaddata.q b/ql/src/test/queries/clientpositive/mm_loaddata.q new file mode 100644 index 0000000..7e5787f --- /dev/null +++ b/ql/src/test/queries/clientpositive/mm_loaddata.q @@ -0,0 +1,53 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set tez.grouping.min-size=1; +set tez.grouping.max-size=2; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + + +drop table load0_mm; +create table load0_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only"); +load data local inpath '../../data/files/kv1.txt' into table load0_mm; +select count(1) from load0_mm; +load data local inpath '../../data/files/kv2.txt' into table load0_mm; +select count(1) from load0_mm; +load data local inpath '../../data/files/kv2.txt' overwrite into table load0_mm; +select count(1) from load0_mm; +drop table load0_mm; + + +drop table intermediate2; +create table intermediate2 (key string, value string) stored as textfile +location 'file:${system:test.tmp.dir}/intermediate2'; +load data local inpath '../../data/files/kv1.txt' into table intermediate2; +load data local inpath '../../data/files/kv2.txt' into table intermediate2; +load data local inpath '../../data/files/kv3.txt' into table intermediate2; + +drop table load1_mm; +create table load1_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only"); +load data inpath 'file:${system:test.tmp.dir}/intermediate2/kv2.txt' into table load1_mm; +load data inpath 'file:${system:test.tmp.dir}/intermediate2/kv1.txt' into table load1_mm; +select count(1) from load1_mm; +load data local inpath '../../data/files/kv1.txt' into table intermediate2; +load data local inpath '../../data/files/kv2.txt' into table intermediate2; +load data local inpath '../../data/files/kv3.txt' into table intermediate2; +load data inpath 'file:${system:test.tmp.dir}/intermediate2/kv*.txt' overwrite into table load1_mm; +select count(1) from load1_mm; +load data local inpath '../../data/files/kv2.txt' into table intermediate2; +load data inpath 'file:${system:test.tmp.dir}/intermediate2/kv2.txt' overwrite into table load1_mm; +select count(1) from load1_mm; +drop table load1_mm; + +drop table load2_mm; +create table load2_mm (key string, value string) + partitioned by (k int, l int) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only"); +load data local inpath '../../data/files/kv1.txt' into table intermediate2; +load data local inpath '../../data/files/kv2.txt' into table intermediate2; +load data local inpath '../../data/files/kv3.txt' into table intermediate2; +load data inpath 'file:${system:test.tmp.dir}/intermediate2/kv*.txt' into table load2_mm partition(k=5, l=5); +select count(1) from load2_mm; +drop table load2_mm; +drop table intermediate2; \ No newline at end of file diff --git a/ql/src/test/results/clientnegative/mm_bucket_convert.q.out b/ql/src/test/results/clientnegative/mm_bucket_convert.q.out new file mode 100644 index 0000000..b732d3e --- /dev/null +++ b/ql/src/test/results/clientnegative/mm_bucket_convert.q.out @@ -0,0 +1,41 @@ +PREHOOK: query: drop table bucket0_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket0_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table bucket1_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket1_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table bucket0_mm(key int, id int) clustered by (key) into 2 buckets + tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket0_mm +POSTHOOK: query: create table bucket0_mm(key int, id int) clustered by (key) into 2 buckets + tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket0_mm +PREHOOK: query: create table bucket1_mm(key int, id int) clustered by (key) into 2 buckets + tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket1_mm +POSTHOOK: query: create table bucket1_mm(key int, id int) clustered by (key) into 2 buckets + tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket1_mm +PREHOOK: query: alter table bucket0_mm unset tblproperties('transactional_properties', 'transactional') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@bucket0_mm +PREHOOK: Output: default@bucket0_mm +POSTHOOK: query: alter table bucket0_mm unset tblproperties('transactional_properties', 'transactional') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@bucket0_mm +POSTHOOK: Output: default@bucket0_mm +PREHOOK: query: alter table bucket1_mm unset tblproperties('transactional_properties', 'transactional') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@bucket1_mm +PREHOOK: Output: default@bucket1_mm +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Converting bucketed tables from MM is not supported by default; copying files from multiple MM directories may potentially break the buckets. You can set hive.strict.checks.bucketing to false for this query if you want to force the conversion. diff --git a/ql/src/test/results/clientnegative/mm_concatenate.q.out b/ql/src/test/results/clientnegative/mm_concatenate.q.out new file mode 100644 index 0000000..5c004b9 --- /dev/null +++ b/ql/src/test/results/clientnegative/mm_concatenate.q.out @@ -0,0 +1,18 @@ +PREHOOK: query: create table concat_mm (id int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@concat_mm +POSTHOOK: query: create table concat_mm (id int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@concat_mm +PREHOOK: query: insert into table concat_mm select key from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@concat_mm +POSTHOOK: query: insert into table concat_mm select key from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@concat_mm +POSTHOOK: Lineage: concat_mm.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Merge is not supported for MM tables diff --git a/ql/src/test/results/clientnegative/mm_truncate_cols.q.out b/ql/src/test/results/clientnegative/mm_truncate_cols.q.out new file mode 100644 index 0000000..62dd222 --- /dev/null +++ b/ql/src/test/results/clientnegative/mm_truncate_cols.q.out @@ -0,0 +1,9 @@ +PREHOOK: query: CREATE TABLE mm_table(key int, value string) stored as rcfile tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@mm_table +POSTHOOK: query: CREATE TABLE mm_table(key int, value string) stored as rcfile tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mm_table +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Truncating MM table columns not presently supported diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out b/ql/src/test/results/clientpositive/llap/mm_all.q.out new file mode 100644 index 0000000..f184ba6 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out @@ -0,0 +1,1435 @@ +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate +POSTHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate +PREHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=455 +POSTHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=455 +POSTHOOK: Lineage: intermediate PARTITION(p=455).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=456 +POSTHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=456 +POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=457 +POSTHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=457 +POSTHOOK: Lineage: intermediate PARTITION(p=457).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: drop table part_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table part_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_mm +POSTHOOK: query: create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_mm +PREHOOK: query: explain insert into table part_mm partition(key_mm=455) select key from intermediate +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table part_mm partition(key_mm=455) select key from intermediate +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: intermediate + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.part_mm + Write Type: INSERT + Execution mode: llap + LLAP IO: all inputs + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + key_mm 455 + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.part_mm + micromanaged table: true + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_mm partition(key_mm=456) select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_mm@key_mm=456 +POSTHOOK: query: insert into table part_mm partition(key_mm=456) select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_mm@key_mm=456 +POSTHOOK: Lineage: part_mm PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_mm order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_mm +PREHOOK: Input: default@part_mm@key_mm=455 +PREHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_mm order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_mm +POSTHOOK: Input: default@part_mm@key_mm=455 +POSTHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +0 455 +0 455 +0 456 +10 455 +10 455 +10 456 +97 455 +97 455 +97 456 +98 455 +98 455 +98 456 +100 455 +100 455 +100 456 +103 455 +103 455 +103 456 +PREHOOK: query: select * from part_mm order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_mm +PREHOOK: Input: default@part_mm@key_mm=455 +PREHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_mm order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_mm +POSTHOOK: Input: default@part_mm@key_mm=455 +POSTHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +0 455 +0 455 +0 456 +10 455 +10 455 +10 456 +97 455 +97 455 +97 456 +98 455 +98 455 +98 456 +100 455 +100 455 +100 456 +103 455 +103 455 +103 456 +PREHOOK: query: truncate table part_mm +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@part_mm@key_mm=455 +PREHOOK: Output: default@part_mm@key_mm=456 +POSTHOOK: query: truncate table part_mm +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: Output: default@part_mm@key_mm=456 +PREHOOK: query: select * from part_mm order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_mm +PREHOOK: Input: default@part_mm@key_mm=455 +PREHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_mm order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_mm +POSTHOOK: Input: default@part_mm@key_mm=455 +POSTHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +PREHOOK: query: drop table part_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_mm +PREHOOK: Output: default@part_mm +POSTHOOK: query: drop table part_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_mm +POSTHOOK: Output: default@part_mm +PREHOOK: query: drop table simple_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table simple_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table simple_mm(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@simple_mm +POSTHOOK: query: create table simple_mm(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@simple_mm +PREHOOK: query: insert into table simple_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_mm +POSTHOOK: query: insert into table simple_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_mm +POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_mm order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_mm order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_mm +#### A masked pattern was here #### +0 +10 +97 +98 +100 +103 +PREHOOK: query: insert into table simple_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_mm +POSTHOOK: query: insert into table simple_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_mm +POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_mm order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_mm order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_mm +#### A masked pattern was here #### +0 +0 +10 +10 +97 +97 +98 +98 +100 +100 +103 +103 +PREHOOK: query: truncate table simple_mm +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@simple_mm +POSTHOOK: query: truncate table simple_mm +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@simple_mm +PREHOOK: query: select * from simple_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_mm +#### A masked pattern was here #### +PREHOOK: query: drop table simple_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@simple_mm +PREHOOK: Output: default@simple_mm +POSTHOOK: query: drop table simple_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@simple_mm +POSTHOOK: Output: default@simple_mm +PREHOOK: query: drop table dp_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table dp_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dp_mm +POSTHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dp_mm +PREHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@dp_mm@key1=123 +POSTHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@dp_mm@key1=123/key2=0 +POSTHOOK: Output: default@dp_mm@key1=123/key2=10 +POSTHOOK: Output: default@dp_mm@key1=123/key2=100 +POSTHOOK: Output: default@dp_mm@key1=123/key2=103 +POSTHOOK: Output: default@dp_mm@key1=123/key2=97 +POSTHOOK: Output: default@dp_mm@key1=123/key2=98 +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=0).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=100).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=103).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=10).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=97).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=98).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from dp_mm order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@dp_mm +PREHOOK: Input: default@dp_mm@key1=123/key2=0 +PREHOOK: Input: default@dp_mm@key1=123/key2=10 +PREHOOK: Input: default@dp_mm@key1=123/key2=100 +PREHOOK: Input: default@dp_mm@key1=123/key2=103 +PREHOOK: Input: default@dp_mm@key1=123/key2=97 +PREHOOK: Input: default@dp_mm@key1=123/key2=98 +#### A masked pattern was here #### +POSTHOOK: query: select * from dp_mm order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dp_mm +POSTHOOK: Input: default@dp_mm@key1=123/key2=0 +POSTHOOK: Input: default@dp_mm@key1=123/key2=10 +POSTHOOK: Input: default@dp_mm@key1=123/key2=100 +POSTHOOK: Input: default@dp_mm@key1=123/key2=103 +POSTHOOK: Input: default@dp_mm@key1=123/key2=97 +POSTHOOK: Input: default@dp_mm@key1=123/key2=98 +#### A masked pattern was here #### +0 123 0 +10 123 10 +97 123 97 +98 123 98 +100 123 100 +103 123 103 +PREHOOK: query: drop table dp_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dp_mm +PREHOOK: Output: default@dp_mm +POSTHOOK: query: drop table dp_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dp_mm +POSTHOOK: Output: default@dp_mm +PREHOOK: query: create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_mm +POSTHOOK: query: create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_mm +PREHOOK: query: insert into table union_mm +select temps.p from ( +select key as p from intermediate +union all +select key + 1 as p from intermediate ) temps +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@union_mm +POSTHOOK: query: insert into table union_mm +select temps.p from ( +select key as p from intermediate +union all +select key + 1 as p from intermediate ) temps +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@union_mm +POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from union_mm order by id +PREHOOK: type: QUERY +PREHOOK: Input: default@union_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from union_mm order by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_mm +#### A masked pattern was here #### +0 +1 +10 +11 +97 +98 +98 +99 +100 +101 +103 +104 +PREHOOK: query: insert into table union_mm +select p from +( +select key + 1 as p from intermediate +union all +select key from intermediate +) tab group by p +union all +select key + 2 as p from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@union_mm +POSTHOOK: query: insert into table union_mm +select p from +( +select key + 1 as p from intermediate +union all +select key from intermediate +) tab group by p +union all +select key + 2 as p from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@union_mm +POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from union_mm order by id +PREHOOK: type: QUERY +PREHOOK: Input: default@union_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from union_mm order by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_mm +#### A masked pattern was here #### +0 +1 +2 +10 +11 +12 +97 +98 +99 +99 +100 +100 +101 +102 +103 +104 +105 +PREHOOK: query: insert into table union_mm +SELECT p FROM +( + SELECT key + 1 as p FROM intermediate + UNION ALL + SELECT key as p FROM ( + SELECT distinct key FROM ( + SELECT key FROM ( + SELECT key + 2 as key FROM intermediate + UNION ALL + SELECT key FROM intermediate + )t1 + group by key)t2 + )t3 +)t4 +group by p +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@union_mm +POSTHOOK: query: insert into table union_mm +SELECT p FROM +( + SELECT key + 1 as p FROM intermediate + UNION ALL + SELECT key as p FROM ( + SELECT distinct key FROM ( + SELECT key FROM ( + SELECT key + 2 as key FROM intermediate + UNION ALL + SELECT key FROM intermediate + )t1 + group by key)t2 + )t3 +)t4 +group by p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@union_mm +POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from union_mm order by id +PREHOOK: type: QUERY +PREHOOK: Input: default@union_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from union_mm order by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_mm +#### A masked pattern was here #### +0 +0 +1 +1 +2 +2 +10 +10 +11 +11 +12 +12 +97 +97 +98 +98 +99 +99 +99 +100 +100 +100 +101 +101 +102 +102 +103 +103 +104 +104 +105 +105 +PREHOOK: query: drop table union_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@union_mm +PREHOOK: Output: default@union_mm +POSTHOOK: query: drop table union_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@union_mm +POSTHOOK: Output: default@union_mm +PREHOOK: query: create table partunion_mm(id int) partitioned by (key int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partunion_mm +POSTHOOK: query: create table partunion_mm(id int) partitioned by (key int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partunion_mm +PREHOOK: query: insert into table partunion_mm partition(key) +select temps.* from ( +select key as p, key from intermediate +union all +select key + 1 as p, key + 1 from intermediate ) temps +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@partunion_mm +POSTHOOK: query: insert into table partunion_mm partition(key) +select temps.* from ( +select key as p, key from intermediate +union all +select key + 1 as p, key + 1 from intermediate ) temps +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +PREHOOK: query: select * from partunion_mm order by id +PREHOOK: type: QUERY +PREHOOK: Input: default@partunion_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from partunion_mm order by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partunion_mm +#### A masked pattern was here #### +PREHOOK: query: drop table partunion_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partunion_mm +PREHOOK: Output: default@partunion_mm +POSTHOOK: query: drop table partunion_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partunion_mm +POSTHOOK: Output: default@partunion_mm +PREHOOK: query: create table skew_mm(k1 int, k2 int, k4 int) skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) + stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skew_mm +POSTHOOK: query: create table skew_mm(k1 int, k2 int, k4 int) skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) + stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skew_mm +PREHOOK: query: insert into table skew_mm +select key, key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@skew_mm +POSTHOOK: query: insert into table skew_mm +select key, key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@skew_mm +POSTHOOK: Lineage: skew_mm.k1 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_mm.k2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_mm.k4 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from skew_mm order by k2, k1, k4 +PREHOOK: type: QUERY +PREHOOK: Input: default@skew_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from skew_mm order by k2, k1, k4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skew_mm +#### A masked pattern was here #### +0 0 0 +10 10 10 +97 97 97 +98 98 98 +100 100 100 +103 103 103 +PREHOOK: query: drop table skew_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skew_mm +PREHOOK: Output: default@skew_mm +POSTHOOK: query: drop table skew_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skew_mm +POSTHOOK: Output: default@skew_mm +PREHOOK: query: create table skew_dp_union_mm(k1 int, k2 int, k4 int) partitioned by (k3 int) +skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skew_dp_union_mm +POSTHOOK: query: create table skew_dp_union_mm(k1 int, k2 int, k4 int) partitioned by (k3 int) +skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skew_dp_union_mm +PREHOOK: query: insert into table skew_dp_union_mm partition (k3) +select key as i, key as j, key as k, key as l from intermediate +union all +select key +1 as i, key +2 as j, key +3 as k, key +4 as l from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@skew_dp_union_mm +POSTHOOK: query: insert into table skew_dp_union_mm partition (k3) +select key as i, key as j, key as k, key as l from intermediate +union all +select key +1 as i, key +2 as j, key +3 as k, key +4 as l from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +PREHOOK: query: select * from skew_dp_union_mm order by k2, k1, k4 +PREHOOK: type: QUERY +PREHOOK: Input: default@skew_dp_union_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from skew_dp_union_mm order by k2, k1, k4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skew_dp_union_mm +#### A masked pattern was here #### +PREHOOK: query: drop table skew_dp_union_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skew_dp_union_mm +PREHOOK: Output: default@skew_dp_union_mm +POSTHOOK: query: drop table skew_dp_union_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skew_dp_union_mm +POSTHOOK: Output: default@skew_dp_union_mm +PREHOOK: query: create table merge0_mm (id int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@merge0_mm +POSTHOOK: query: create table merge0_mm (id int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge0_mm +PREHOOK: query: insert into table merge0_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge0_mm +POSTHOOK: query: insert into table merge0_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge0_mm +POSTHOOK: Lineage: merge0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@merge0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from merge0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge0_mm +#### A masked pattern was here #### +98 +97 +100 +103 +0 +10 +PREHOOK: query: insert into table merge0_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge0_mm +POSTHOOK: query: insert into table merge0_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge0_mm +POSTHOOK: Lineage: merge0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@merge0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from merge0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge0_mm +#### A masked pattern was here #### +98 +97 +100 +103 +0 +10 +98 +97 +100 +103 +0 +10 +PREHOOK: query: drop table merge0_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@merge0_mm +PREHOOK: Output: default@merge0_mm +POSTHOOK: query: drop table merge0_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@merge0_mm +POSTHOOK: Output: default@merge0_mm +PREHOOK: query: create table merge2_mm (id int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@merge2_mm +POSTHOOK: query: create table merge2_mm (id int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge2_mm +PREHOOK: query: insert into table merge2_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge2_mm +POSTHOOK: query: insert into table merge2_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge2_mm +POSTHOOK: Lineage: merge2_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge2_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@merge2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from merge2_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge2_mm +#### A masked pattern was here #### +98 +97 +100 +103 +0 +10 +PREHOOK: query: insert into table merge2_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge2_mm +POSTHOOK: query: insert into table merge2_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge2_mm +POSTHOOK: Lineage: merge2_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge2_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@merge2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from merge2_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge2_mm +#### A masked pattern was here #### +98 +97 +100 +103 +0 +10 +98 +97 +100 +103 +0 +10 +PREHOOK: query: drop table merge2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@merge2_mm +PREHOOK: Output: default@merge2_mm +POSTHOOK: query: drop table merge2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@merge2_mm +POSTHOOK: Output: default@merge2_mm +PREHOOK: query: create table merge1_mm (id int) partitioned by (key int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@merge1_mm +POSTHOOK: query: create table merge1_mm (id int) partitioned by (key int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge1_mm +PREHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge1_mm +POSTHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge1_mm@key=0 +POSTHOOK: Output: default@merge1_mm@key=10 +POSTHOOK: Output: default@merge1_mm@key=100 +POSTHOOK: Output: default@merge1_mm@key=103 +POSTHOOK: Output: default@merge1_mm@key=97 +POSTHOOK: Output: default@merge1_mm@key=98 +POSTHOOK: Lineage: merge1_mm PARTITION(key=0).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=100).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=103).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=10).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=97).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=98).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge1_mm order by id, key +PREHOOK: type: QUERY +PREHOOK: Input: default@merge1_mm +PREHOOK: Input: default@merge1_mm@key=0 +PREHOOK: Input: default@merge1_mm@key=10 +PREHOOK: Input: default@merge1_mm@key=100 +PREHOOK: Input: default@merge1_mm@key=103 +PREHOOK: Input: default@merge1_mm@key=97 +PREHOOK: Input: default@merge1_mm@key=98 +#### A masked pattern was here #### +POSTHOOK: query: select * from merge1_mm order by id, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge1_mm +POSTHOOK: Input: default@merge1_mm@key=0 +POSTHOOK: Input: default@merge1_mm@key=10 +POSTHOOK: Input: default@merge1_mm@key=100 +POSTHOOK: Input: default@merge1_mm@key=103 +POSTHOOK: Input: default@merge1_mm@key=97 +POSTHOOK: Input: default@merge1_mm@key=98 +#### A masked pattern was here #### +0 0 +10 10 +97 97 +98 98 +100 100 +103 103 +PREHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge1_mm +POSTHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge1_mm@key=0 +POSTHOOK: Output: default@merge1_mm@key=10 +POSTHOOK: Output: default@merge1_mm@key=100 +POSTHOOK: Output: default@merge1_mm@key=103 +POSTHOOK: Output: default@merge1_mm@key=97 +POSTHOOK: Output: default@merge1_mm@key=98 +POSTHOOK: Lineage: merge1_mm PARTITION(key=0).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=100).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=103).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=10).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=97).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=98).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge1_mm order by id, key +PREHOOK: type: QUERY +PREHOOK: Input: default@merge1_mm +PREHOOK: Input: default@merge1_mm@key=0 +PREHOOK: Input: default@merge1_mm@key=10 +PREHOOK: Input: default@merge1_mm@key=100 +PREHOOK: Input: default@merge1_mm@key=103 +PREHOOK: Input: default@merge1_mm@key=97 +PREHOOK: Input: default@merge1_mm@key=98 +#### A masked pattern was here #### +POSTHOOK: query: select * from merge1_mm order by id, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge1_mm +POSTHOOK: Input: default@merge1_mm@key=0 +POSTHOOK: Input: default@merge1_mm@key=10 +POSTHOOK: Input: default@merge1_mm@key=100 +POSTHOOK: Input: default@merge1_mm@key=103 +POSTHOOK: Input: default@merge1_mm@key=97 +POSTHOOK: Input: default@merge1_mm@key=98 +#### A masked pattern was here #### +0 0 +0 0 +10 10 +10 10 +97 97 +97 97 +98 98 +98 98 +100 100 +100 100 +103 103 +103 103 +PREHOOK: query: drop table merge1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@merge1_mm +PREHOOK: Output: default@merge1_mm +POSTHOOK: query: drop table merge1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@merge1_mm +POSTHOOK: Output: default@merge1_mm +PREHOOK: query: drop table load0_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table load0_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table load0_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@load0_mm +POSTHOOK: query: create table load0_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@load0_mm +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_mm +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_mm +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_mm +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_mm +PREHOOK: query: select count(1) from load0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_mm +#### A masked pattern was here #### +500 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_mm +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_mm +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_mm +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_mm +PREHOOK: query: select count(1) from load0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_mm +#### A masked pattern was here #### +1000 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' overwrite into table load0_mm +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_mm +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' overwrite into table load0_mm +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_mm +PREHOOK: query: select count(1) from load0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_mm +#### A masked pattern was here #### +500 +PREHOOK: query: drop table load0_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@load0_mm +PREHOOK: Output: default@load0_mm +POSTHOOK: query: drop table load0_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@load0_mm +POSTHOOK: Output: default@load0_mm +PREHOOK: query: drop table intermediate2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermediate2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate2 (key string, value string) stored as textfile +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: create table intermediate2 (key string, value string) stored as textfile +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: drop table load1_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table load1_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table load1_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@load1_mm +POSTHOOK: query: create table load1_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@load1_mm +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load1_mm +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load1_mm +PREHOOK: query: select count(1) from load1_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load1_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load1_mm +#### A masked pattern was here #### +1000 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load1_mm +PREHOOK: query: select count(1) from load1_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load1_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load1_mm +#### A masked pattern was here #### +1050 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load1_mm +PREHOOK: query: select count(1) from load1_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load1_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load1_mm +#### A masked pattern was here #### +500 +PREHOOK: query: drop table load1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@load1_mm +PREHOOK: Output: default@load1_mm +POSTHOOK: query: drop table load1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@load1_mm +POSTHOOK: Output: default@load1_mm +PREHOOK: query: drop table load2_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table load2_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table load2_mm (key string, value string) + partitioned by (k int, l int) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@load2_mm +POSTHOOK: query: create table load2_mm (key string, value string) + partitioned by (k int, l int) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@load2_mm +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load2_mm +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load2_mm +POSTHOOK: Output: default@load2_mm@k=5/l=5 +PREHOOK: query: select count(1) from load2_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load2_mm +PREHOOK: Input: default@load2_mm@k=5/l=5 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load2_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load2_mm +POSTHOOK: Input: default@load2_mm@k=5/l=5 +#### A masked pattern was here #### +1025 +PREHOOK: query: drop table load2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@load2_mm +PREHOOK: Output: default@load2_mm +POSTHOOK: query: drop table load2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@load2_mm +POSTHOOK: Output: default@load2_mm +PREHOOK: query: drop table intermediate2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermediate2 +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: drop table intermediate2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermediate2 +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: drop table multi0_1_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table multi0_1_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table multi0_2_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table multi0_2_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table multi0_1_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@multi0_1_mm +POSTHOOK: query: create table multi0_1_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@multi0_1_mm +PREHOOK: query: create table multi0_2_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@multi0_2_mm +POSTHOOK: query: create table multi0_2_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@multi0_2_mm +PREHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet1_mm +PREHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2) +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet1_mm +POSTHOOK: Lineage: parquet1_mm.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet2_mm +PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1') +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet2_mm +POSTHOOK: Lineage: parquet2_mm.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: parquet2_mm.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2') +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet2_mm +POSTHOOK: Lineage: parquet2_mm.id EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: parquet2_mm.value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm + JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id + JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id +where t1.value = 'value1' and t2.value = 'value2' +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet1_mm +PREHOOK: Input: default@parquet2_mm +#### A masked pattern was here #### +POSTHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm + JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id + JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id +where t1.value = 'value1' and t2.value = 'value2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet1_mm +POSTHOOK: Input: default@parquet2_mm +#### A masked pattern was here #### +1 value1 value2 +PREHOOK: query: drop table parquet1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet1_mm +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: drop table parquet1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet1_mm +POSTHOOK: Output: default@parquet1_mm +PREHOOK: query: drop table parquet2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet2_mm +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: drop table parquet2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet2_mm +POSTHOOK: Output: default@parquet2_mm +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermediate +PREHOOK: Output: default@intermediate +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermediate +POSTHOOK: Output: default@intermediate diff --git a/ql/src/test/results/clientpositive/llap/mm_conversions.q.out b/ql/src/test/results/clientpositive/llap/mm_conversions.q.out new file mode 100644 index 0000000..d95a70e --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/mm_conversions.q.out @@ -0,0 +1,853 @@ +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate +POSTHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate +PREHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=455 +POSTHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=455 +POSTHOOK: Lineage: intermediate PARTITION(p=455).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=456 +POSTHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=456 +POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=457 +POSTHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=457 +POSTHOOK: Lineage: intermediate PARTITION(p=457).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: drop table simple_from_mm1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table simple_from_mm1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table simple_from_mm1(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: create table simple_from_mm1(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@simple_from_mm1 +PREHOOK: query: insert into table simple_from_mm1 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: insert into table simple_from_mm1 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm1 +POSTHOOK: Lineage: simple_from_mm1.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table simple_from_mm1 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: insert into table simple_from_mm1 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm1 +POSTHOOK: Lineage: simple_from_mm1.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_from_mm1 s1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm1 s1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +0 +0 +98 +98 +100 +100 +PREHOOK: query: alter table simple_from_mm1 unset tblproperties('transactional_properties', 'transactional') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@simple_from_mm1 +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: alter table simple_from_mm1 unset tblproperties('transactional_properties', 'transactional') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@simple_from_mm1 +POSTHOOK: Output: default@simple_from_mm1 +PREHOOK: query: select * from simple_from_mm1 s2 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm1 s2 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +0 +0 +98 +98 +100 +100 +PREHOOK: query: insert into table simple_from_mm1 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: insert into table simple_from_mm1 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm1 +POSTHOOK: Lineage: simple_from_mm1.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_from_mm1 s3 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm1 s3 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +0 +0 +0 +98 +98 +98 +100 +100 +100 +PREHOOK: query: drop table simple_from_mm1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@simple_from_mm1 +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: drop table simple_from_mm1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@simple_from_mm1 +POSTHOOK: Output: default@simple_from_mm1 +PREHOOK: query: drop table simple_from_mm2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table simple_from_mm2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table simple_from_mm2(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: create table simple_from_mm2(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@simple_from_mm2 +PREHOOK: query: insert into table simple_from_mm2 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: insert into table simple_from_mm2 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm2 +POSTHOOK: Lineage: simple_from_mm2.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table simple_from_mm2 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: insert into table simple_from_mm2 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm2 +POSTHOOK: Lineage: simple_from_mm2.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_from_mm2 s1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm2 s1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +0 +0 +98 +98 +100 +100 +PREHOOK: query: alter table simple_from_mm2 set tblproperties("transactional"="false", 'transactional_properties'='false') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@simple_from_mm2 +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: alter table simple_from_mm2 set tblproperties("transactional"="false", 'transactional_properties'='false') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@simple_from_mm2 +POSTHOOK: Output: default@simple_from_mm2 +PREHOOK: query: select * from simple_from_mm2 s2 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm2 s2 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +0 +0 +98 +98 +100 +100 +PREHOOK: query: insert into table simple_from_mm2 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: insert into table simple_from_mm2 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm2 +POSTHOOK: Lineage: simple_from_mm2.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_from_mm2 s3 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm2 s3 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +0 +0 +0 +98 +98 +98 +100 +100 +100 +PREHOOK: query: drop table simple_from_mm2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@simple_from_mm2 +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: drop table simple_from_mm2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@simple_from_mm2 +POSTHOOK: Output: default@simple_from_mm2 +PREHOOK: query: drop table simple_to_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table simple_to_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table simple_to_mm(key int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: create table simple_to_mm(key int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@simple_to_mm +PREHOOK: query: insert into table simple_to_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: insert into table simple_to_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_to_mm +POSTHOOK: Lineage: simple_to_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_to_mm s1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_to_mm s1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +0 +98 +100 +PREHOOK: query: alter table simple_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@simple_to_mm +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: alter table simple_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@simple_to_mm +POSTHOOK: Output: default@simple_to_mm +PREHOOK: query: select * from simple_to_mm s2 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_to_mm s2 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +0 +98 +100 +PREHOOK: query: insert into table simple_to_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: insert into table simple_to_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_to_mm +POSTHOOK: Lineage: simple_to_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table simple_to_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: insert into table simple_to_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_to_mm +POSTHOOK: Lineage: simple_to_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_to_mm s3 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_to_mm s3 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +0 +0 +0 +98 +98 +98 +100 +100 +100 +PREHOOK: query: drop table simple_to_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@simple_to_mm +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: drop table simple_to_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@simple_to_mm +POSTHOOK: Output: default@simple_to_mm +PREHOOK: query: drop table part_from_mm1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table part_from_mm1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table part_from_mm1(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_from_mm1 +POSTHOOK: query: create table part_from_mm1(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_from_mm1 +PREHOOK: query: insert into table part_from_mm1 partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm1@key_mm=455 +POSTHOOK: query: insert into table part_from_mm1 partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm1@key_mm=455 +POSTHOOK: Lineage: part_from_mm1 PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_from_mm1 partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm1@key_mm=455 +POSTHOOK: query: insert into table part_from_mm1 partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm1@key_mm=455 +POSTHOOK: Lineage: part_from_mm1 PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_from_mm1 partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm1@key_mm=456 +POSTHOOK: query: insert into table part_from_mm1 partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm1@key_mm=456 +POSTHOOK: Lineage: part_from_mm1 PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_from_mm1 s1 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm1 +PREHOOK: Input: default@part_from_mm1@key_mm=455 +PREHOOK: Input: default@part_from_mm1@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm1 s1 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm1 +POSTHOOK: Input: default@part_from_mm1@key_mm=455 +POSTHOOK: Input: default@part_from_mm1@key_mm=456 +#### A masked pattern was here #### +0 455 +0 455 +0 456 +98 455 +98 455 +98 456 +100 455 +100 455 +100 456 +PREHOOK: query: alter table part_from_mm1 unset tblproperties('transactional_properties', 'transactional') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@part_from_mm1 +PREHOOK: Output: default@part_from_mm1 +POSTHOOK: query: alter table part_from_mm1 unset tblproperties('transactional_properties', 'transactional') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@part_from_mm1 +POSTHOOK: Output: default@part_from_mm1 +PREHOOK: query: select * from part_from_mm1 s2 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm1 +PREHOOK: Input: default@part_from_mm1@key_mm=455 +PREHOOK: Input: default@part_from_mm1@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm1 s2 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm1 +POSTHOOK: Input: default@part_from_mm1@key_mm=455 +POSTHOOK: Input: default@part_from_mm1@key_mm=456 +#### A masked pattern was here #### +0 455 +0 455 +0 456 +98 455 +98 455 +98 456 +100 455 +100 455 +100 456 +PREHOOK: query: insert into table part_from_mm1 partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm1@key_mm=456 +POSTHOOK: query: insert into table part_from_mm1 partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm1@key_mm=456 +POSTHOOK: Lineage: part_from_mm1 PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_from_mm1 partition(key_mm='457') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm1@key_mm=457 +POSTHOOK: query: insert into table part_from_mm1 partition(key_mm='457') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm1@key_mm=457 +POSTHOOK: Lineage: part_from_mm1 PARTITION(key_mm=457).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_from_mm1 s3 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm1 +PREHOOK: Input: default@part_from_mm1@key_mm=455 +PREHOOK: Input: default@part_from_mm1@key_mm=456 +PREHOOK: Input: default@part_from_mm1@key_mm=457 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm1 s3 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm1 +POSTHOOK: Input: default@part_from_mm1@key_mm=455 +POSTHOOK: Input: default@part_from_mm1@key_mm=456 +POSTHOOK: Input: default@part_from_mm1@key_mm=457 +#### A masked pattern was here #### +0 455 +0 455 +0 456 +0 456 +0 457 +98 455 +98 455 +98 456 +98 456 +98 457 +100 455 +100 455 +100 456 +100 456 +100 457 +PREHOOK: query: drop table part_from_mm1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_from_mm1 +PREHOOK: Output: default@part_from_mm1 +POSTHOOK: query: drop table part_from_mm1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_from_mm1 +POSTHOOK: Output: default@part_from_mm1 +PREHOOK: query: drop table part_from_mm2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table part_from_mm2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table part_from_mm2(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_from_mm2 +POSTHOOK: query: create table part_from_mm2(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_from_mm2 +PREHOOK: query: insert into table part_from_mm2 partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm2@key_mm=456 +POSTHOOK: query: insert into table part_from_mm2 partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm2@key_mm=456 +POSTHOOK: Lineage: part_from_mm2 PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: --fails here +insert into table part_from_mm2 partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm2@key_mm=455 +POSTHOOK: query: --fails here +insert into table part_from_mm2 partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm2@key_mm=455 +POSTHOOK: Lineage: part_from_mm2 PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_from_mm2 s1 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm2 +PREHOOK: Input: default@part_from_mm2@key_mm=455 +PREHOOK: Input: default@part_from_mm2@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm2 s1 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm2 +POSTHOOK: Input: default@part_from_mm2@key_mm=455 +POSTHOOK: Input: default@part_from_mm2@key_mm=456 +#### A masked pattern was here #### +0 455 +0 456 +98 455 +98 456 +100 455 +100 456 +PREHOOK: query: alter table part_from_mm2 set tblproperties("transactional"="false", 'transactional_properties'='false') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@part_from_mm2 +PREHOOK: Output: default@part_from_mm2 +POSTHOOK: query: alter table part_from_mm2 set tblproperties("transactional"="false", 'transactional_properties'='false') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@part_from_mm2 +POSTHOOK: Output: default@part_from_mm2 +PREHOOK: query: select * from part_from_mm2 s2 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm2 +PREHOOK: Input: default@part_from_mm2@key_mm=455 +PREHOOK: Input: default@part_from_mm2@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm2 s2 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm2 +POSTHOOK: Input: default@part_from_mm2@key_mm=455 +POSTHOOK: Input: default@part_from_mm2@key_mm=456 +#### A masked pattern was here #### +0 455 +0 456 +98 455 +98 456 +100 455 +100 456 +PREHOOK: query: insert into table part_from_mm2 partition(key_mm='457') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm2@key_mm=457 +POSTHOOK: query: insert into table part_from_mm2 partition(key_mm='457') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm2@key_mm=457 +POSTHOOK: Lineage: part_from_mm2 PARTITION(key_mm=457).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_from_mm2 s3 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm2 +PREHOOK: Input: default@part_from_mm2@key_mm=455 +PREHOOK: Input: default@part_from_mm2@key_mm=456 +PREHOOK: Input: default@part_from_mm2@key_mm=457 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm2 s3 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm2 +POSTHOOK: Input: default@part_from_mm2@key_mm=455 +POSTHOOK: Input: default@part_from_mm2@key_mm=456 +POSTHOOK: Input: default@part_from_mm2@key_mm=457 +#### A masked pattern was here #### +0 455 +0 456 +0 457 +98 455 +98 456 +98 457 +100 455 +100 456 +100 457 +PREHOOK: query: drop table part_from_mm2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_from_mm2 +PREHOOK: Output: default@part_from_mm2 +POSTHOOK: query: drop table part_from_mm2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_from_mm2 +POSTHOOK: Output: default@part_from_mm2 +PREHOOK: query: drop table part_to_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table part_to_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table part_to_mm(key int) partitioned by (key_mm int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_to_mm +POSTHOOK: query: create table part_to_mm(key int) partitioned by (key_mm int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_to_mm +PREHOOK: query: insert into table part_to_mm partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_to_mm@key_mm=455 +POSTHOOK: query: insert into table part_to_mm partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_to_mm@key_mm=455 +POSTHOOK: Lineage: part_to_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_to_mm partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_to_mm@key_mm=456 +POSTHOOK: query: insert into table part_to_mm partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_to_mm@key_mm=456 +POSTHOOK: Lineage: part_to_mm PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_to_mm s1 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_to_mm +PREHOOK: Input: default@part_to_mm@key_mm=455 +PREHOOK: Input: default@part_to_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_to_mm s1 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_to_mm +POSTHOOK: Input: default@part_to_mm@key_mm=455 +POSTHOOK: Input: default@part_to_mm@key_mm=456 +#### A masked pattern was here #### +0 455 +0 456 +98 455 +98 456 +100 455 +100 456 +PREHOOK: query: alter table part_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@part_to_mm +PREHOOK: Output: default@part_to_mm +POSTHOOK: query: alter table part_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@part_to_mm +POSTHOOK: Output: default@part_to_mm +PREHOOK: query: select * from part_to_mm s2 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_to_mm +PREHOOK: Input: default@part_to_mm@key_mm=455 +PREHOOK: Input: default@part_to_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_to_mm s2 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_to_mm +POSTHOOK: Input: default@part_to_mm@key_mm=455 +POSTHOOK: Input: default@part_to_mm@key_mm=456 +#### A masked pattern was here #### +0 455 +0 456 +98 455 +98 456 +100 455 +100 456 +PREHOOK: query: insert into table part_to_mm partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_to_mm@key_mm=456 +POSTHOOK: query: insert into table part_to_mm partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_to_mm@key_mm=456 +POSTHOOK: Lineage: part_to_mm PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_to_mm partition(key_mm='457') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_to_mm@key_mm=457 +POSTHOOK: query: insert into table part_to_mm partition(key_mm='457') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_to_mm@key_mm=457 +POSTHOOK: Lineage: part_to_mm PARTITION(key_mm=457).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_to_mm s3 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_to_mm +PREHOOK: Input: default@part_to_mm@key_mm=455 +PREHOOK: Input: default@part_to_mm@key_mm=456 +PREHOOK: Input: default@part_to_mm@key_mm=457 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_to_mm s3 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_to_mm +POSTHOOK: Input: default@part_to_mm@key_mm=455 +POSTHOOK: Input: default@part_to_mm@key_mm=456 +POSTHOOK: Input: default@part_to_mm@key_mm=457 +#### A masked pattern was here #### +0 455 +0 456 +0 456 +0 457 +98 455 +98 456 +98 456 +98 457 +100 455 +100 456 +100 456 +100 457 +PREHOOK: query: drop table part_to_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_to_mm +PREHOOK: Output: default@part_to_mm +POSTHOOK: query: drop table part_to_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_to_mm +POSTHOOK: Output: default@part_to_mm +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermediate +PREHOOK: Output: default@intermediate +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermediate +POSTHOOK: Output: default@intermediate diff --git a/ql/src/test/results/clientpositive/mm_all.q.out b/ql/src/test/results/clientpositive/mm_all.q.out new file mode 100644 index 0000000..ea60414 --- /dev/null +++ b/ql/src/test/results/clientpositive/mm_all.q.out @@ -0,0 +1,2147 @@ +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate +POSTHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate +PREHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=455 +POSTHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=455 +POSTHOOK: Lineage: intermediate PARTITION(p=455).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=456 +POSTHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=456 +POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=457 +POSTHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=457 +POSTHOOK: Lineage: intermediate PARTITION(p=457).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: drop table part_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table part_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_mm +POSTHOOK: query: create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_mm +PREHOOK: query: explain insert into table part_mm partition(key_mm=455) select key from intermediate +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table part_mm partition(key_mm=455) select key from intermediate +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: intermediate + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.part_mm + Write Type: INSERT + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + key_mm 455 + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.part_mm + micromanaged table: true + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_mm partition(key_mm=456) select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_mm@key_mm=456 +POSTHOOK: query: insert into table part_mm partition(key_mm=456) select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_mm@key_mm=456 +POSTHOOK: Lineage: part_mm PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_mm order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_mm +PREHOOK: Input: default@part_mm@key_mm=455 +PREHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_mm order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_mm +POSTHOOK: Input: default@part_mm@key_mm=455 +POSTHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +0 455 +0 455 +0 456 +10 455 +10 455 +10 456 +97 455 +97 455 +97 456 +98 455 +98 455 +98 456 +100 455 +100 455 +100 456 +103 455 +103 455 +103 456 +PREHOOK: query: select * from part_mm order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_mm +PREHOOK: Input: default@part_mm@key_mm=455 +PREHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_mm order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_mm +POSTHOOK: Input: default@part_mm@key_mm=455 +POSTHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +0 455 +0 455 +0 456 +10 455 +10 455 +10 456 +97 455 +97 455 +97 456 +98 455 +98 455 +98 456 +100 455 +100 455 +100 456 +103 455 +103 455 +103 456 +PREHOOK: query: truncate table part_mm +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@part_mm@key_mm=455 +PREHOOK: Output: default@part_mm@key_mm=456 +POSTHOOK: query: truncate table part_mm +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: Output: default@part_mm@key_mm=456 +PREHOOK: query: select * from part_mm order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_mm +PREHOOK: Input: default@part_mm@key_mm=455 +PREHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_mm order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_mm +POSTHOOK: Input: default@part_mm@key_mm=455 +POSTHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +PREHOOK: query: drop table part_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_mm +PREHOOK: Output: default@part_mm +POSTHOOK: query: drop table part_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_mm +POSTHOOK: Output: default@part_mm +PREHOOK: query: drop table simple_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table simple_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table simple_mm(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@simple_mm +POSTHOOK: query: create table simple_mm(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@simple_mm +PREHOOK: query: insert into table simple_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_mm +POSTHOOK: query: insert into table simple_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_mm +POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_mm order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_mm order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_mm +#### A masked pattern was here #### +0 +10 +97 +98 +100 +103 +PREHOOK: query: insert into table simple_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_mm +POSTHOOK: query: insert into table simple_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_mm +POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_mm order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_mm order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_mm +#### A masked pattern was here #### +0 +0 +10 +10 +97 +97 +98 +98 +100 +100 +103 +103 +PREHOOK: query: truncate table simple_mm +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: default@simple_mm +POSTHOOK: query: truncate table simple_mm +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: default@simple_mm +PREHOOK: query: select * from simple_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_mm +#### A masked pattern was here #### +PREHOOK: query: drop table simple_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@simple_mm +PREHOOK: Output: default@simple_mm +POSTHOOK: query: drop table simple_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@simple_mm +POSTHOOK: Output: default@simple_mm +PREHOOK: query: drop table dp_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table dp_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dp_mm +POSTHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dp_mm +PREHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@dp_mm@key1=123 +POSTHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@dp_mm@key1=123/key2=0 +POSTHOOK: Output: default@dp_mm@key1=123/key2=10 +POSTHOOK: Output: default@dp_mm@key1=123/key2=100 +POSTHOOK: Output: default@dp_mm@key1=123/key2=103 +POSTHOOK: Output: default@dp_mm@key1=123/key2=97 +POSTHOOK: Output: default@dp_mm@key1=123/key2=98 +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=0).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=100).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=103).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=10).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=97).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=98).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from dp_mm order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@dp_mm +PREHOOK: Input: default@dp_mm@key1=123/key2=0 +PREHOOK: Input: default@dp_mm@key1=123/key2=10 +PREHOOK: Input: default@dp_mm@key1=123/key2=100 +PREHOOK: Input: default@dp_mm@key1=123/key2=103 +PREHOOK: Input: default@dp_mm@key1=123/key2=97 +PREHOOK: Input: default@dp_mm@key1=123/key2=98 +#### A masked pattern was here #### +POSTHOOK: query: select * from dp_mm order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dp_mm +POSTHOOK: Input: default@dp_mm@key1=123/key2=0 +POSTHOOK: Input: default@dp_mm@key1=123/key2=10 +POSTHOOK: Input: default@dp_mm@key1=123/key2=100 +POSTHOOK: Input: default@dp_mm@key1=123/key2=103 +POSTHOOK: Input: default@dp_mm@key1=123/key2=97 +POSTHOOK: Input: default@dp_mm@key1=123/key2=98 +#### A masked pattern was here #### +0 123 0 +10 123 10 +97 123 97 +98 123 98 +100 123 100 +103 123 103 +PREHOOK: query: drop table dp_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dp_mm +PREHOOK: Output: default@dp_mm +POSTHOOK: query: drop table dp_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dp_mm +POSTHOOK: Output: default@dp_mm +PREHOOK: query: create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_mm +POSTHOOK: query: create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_mm +PREHOOK: query: insert into table union_mm +select temps.p from ( +select key as p from intermediate +union all +select key + 1 as p from intermediate ) temps +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@union_mm +POSTHOOK: query: insert into table union_mm +select temps.p from ( +select key as p from intermediate +union all +select key + 1 as p from intermediate ) temps +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@union_mm +POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from union_mm order by id +PREHOOK: type: QUERY +PREHOOK: Input: default@union_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from union_mm order by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_mm +#### A masked pattern was here #### +0 +1 +10 +11 +97 +98 +98 +99 +100 +101 +103 +104 +PREHOOK: query: insert into table union_mm +select p from +( +select key + 1 as p from intermediate +union all +select key from intermediate +) tab group by p +union all +select key + 2 as p from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@union_mm +POSTHOOK: query: insert into table union_mm +select p from +( +select key + 1 as p from intermediate +union all +select key from intermediate +) tab group by p +union all +select key + 2 as p from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@union_mm +POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from union_mm order by id +PREHOOK: type: QUERY +PREHOOK: Input: default@union_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from union_mm order by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_mm +#### A masked pattern was here #### +0 +0 +1 +1 +2 +10 +10 +11 +11 +12 +97 +97 +98 +98 +98 +99 +99 +99 +100 +100 +100 +101 +101 +102 +103 +103 +104 +104 +105 +PREHOOK: query: insert into table union_mm +SELECT p FROM +( + SELECT key + 1 as p FROM intermediate + UNION ALL + SELECT key as p FROM ( + SELECT distinct key FROM ( + SELECT key FROM ( + SELECT key + 2 as key FROM intermediate + UNION ALL + SELECT key FROM intermediate + )t1 + group by key)t2 + )t3 +)t4 +group by p +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@union_mm +POSTHOOK: query: insert into table union_mm +SELECT p FROM +( + SELECT key + 1 as p FROM intermediate + UNION ALL + SELECT key as p FROM ( + SELECT distinct key FROM ( + SELECT key FROM ( + SELECT key + 2 as key FROM intermediate + UNION ALL + SELECT key FROM intermediate + )t1 + group by key)t2 + )t3 +)t4 +group by p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@union_mm +POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from union_mm order by id +PREHOOK: type: QUERY +PREHOOK: Input: default@union_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from union_mm order by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_mm +#### A masked pattern was here #### +0 +0 +0 +1 +1 +1 +2 +2 +10 +10 +10 +11 +11 +11 +12 +12 +97 +97 +97 +98 +98 +98 +98 +99 +99 +99 +99 +100 +100 +100 +100 +101 +101 +101 +102 +102 +103 +103 +103 +104 +104 +104 +105 +105 +PREHOOK: query: drop table union_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@union_mm +PREHOOK: Output: default@union_mm +POSTHOOK: query: drop table union_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@union_mm +POSTHOOK: Output: default@union_mm +PREHOOK: query: create table partunion_mm(id int) partitioned by (key int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partunion_mm +POSTHOOK: query: create table partunion_mm(id int) partitioned by (key int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partunion_mm +PREHOOK: query: insert into table partunion_mm partition(key) +select temps.* from ( +select key as p, key from intermediate +union all +select key + 1 as p, key + 1 from intermediate ) temps +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@partunion_mm +POSTHOOK: query: insert into table partunion_mm partition(key) +select temps.* from ( +select key as p, key from intermediate +union all +select key + 1 as p, key + 1 from intermediate ) temps +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@partunion_mm@key=0 +POSTHOOK: Output: default@partunion_mm@key=1 +POSTHOOK: Output: default@partunion_mm@key=10 +POSTHOOK: Output: default@partunion_mm@key=100 +POSTHOOK: Output: default@partunion_mm@key=101 +POSTHOOK: Output: default@partunion_mm@key=103 +POSTHOOK: Output: default@partunion_mm@key=104 +POSTHOOK: Output: default@partunion_mm@key=11 +POSTHOOK: Output: default@partunion_mm@key=97 +POSTHOOK: Output: default@partunion_mm@key=98 +POSTHOOK: Output: default@partunion_mm@key=99 +POSTHOOK: Lineage: partunion_mm PARTITION(key=0).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=100).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=101).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=103).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=104).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=10).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=11).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=1).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=97).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=98).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=99).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from partunion_mm order by id +PREHOOK: type: QUERY +PREHOOK: Input: default@partunion_mm +PREHOOK: Input: default@partunion_mm@key=0 +PREHOOK: Input: default@partunion_mm@key=1 +PREHOOK: Input: default@partunion_mm@key=10 +PREHOOK: Input: default@partunion_mm@key=100 +PREHOOK: Input: default@partunion_mm@key=101 +PREHOOK: Input: default@partunion_mm@key=103 +PREHOOK: Input: default@partunion_mm@key=104 +PREHOOK: Input: default@partunion_mm@key=11 +PREHOOK: Input: default@partunion_mm@key=97 +PREHOOK: Input: default@partunion_mm@key=98 +PREHOOK: Input: default@partunion_mm@key=99 +#### A masked pattern was here #### +POSTHOOK: query: select * from partunion_mm order by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partunion_mm +POSTHOOK: Input: default@partunion_mm@key=0 +POSTHOOK: Input: default@partunion_mm@key=1 +POSTHOOK: Input: default@partunion_mm@key=10 +POSTHOOK: Input: default@partunion_mm@key=100 +POSTHOOK: Input: default@partunion_mm@key=101 +POSTHOOK: Input: default@partunion_mm@key=103 +POSTHOOK: Input: default@partunion_mm@key=104 +POSTHOOK: Input: default@partunion_mm@key=11 +POSTHOOK: Input: default@partunion_mm@key=97 +POSTHOOK: Input: default@partunion_mm@key=98 +POSTHOOK: Input: default@partunion_mm@key=99 +#### A masked pattern was here #### +0 0 +1 1 +10 10 +11 11 +97 97 +98 98 +98 98 +99 99 +100 100 +101 101 +103 103 +104 104 +PREHOOK: query: drop table partunion_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partunion_mm +PREHOOK: Output: default@partunion_mm +POSTHOOK: query: drop table partunion_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partunion_mm +POSTHOOK: Output: default@partunion_mm +PREHOOK: query: create table skew_mm(k1 int, k2 int, k4 int) skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) + stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skew_mm +POSTHOOK: query: create table skew_mm(k1 int, k2 int, k4 int) skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) + stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skew_mm +PREHOOK: query: insert into table skew_mm +select key, key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@skew_mm +POSTHOOK: query: insert into table skew_mm +select key, key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@skew_mm +POSTHOOK: Lineage: skew_mm.k1 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_mm.k2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_mm.k4 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from skew_mm order by k2, k1, k4 +PREHOOK: type: QUERY +PREHOOK: Input: default@skew_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from skew_mm order by k2, k1, k4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skew_mm +#### A masked pattern was here #### +0 0 0 +10 10 10 +97 97 97 +98 98 98 +100 100 100 +103 103 103 +PREHOOK: query: drop table skew_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skew_mm +PREHOOK: Output: default@skew_mm +POSTHOOK: query: drop table skew_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skew_mm +POSTHOOK: Output: default@skew_mm +PREHOOK: query: create table skew_dp_union_mm(k1 int, k2 int, k4 int) partitioned by (k3 int) +skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skew_dp_union_mm +POSTHOOK: query: create table skew_dp_union_mm(k1 int, k2 int, k4 int) partitioned by (k3 int) +skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skew_dp_union_mm +PREHOOK: query: insert into table skew_dp_union_mm partition (k3) +select key as i, key as j, key as k, key as l from intermediate +union all +select key +1 as i, key +2 as j, key +3 as k, key +4 as l from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@skew_dp_union_mm +POSTHOOK: query: insert into table skew_dp_union_mm partition (k3) +select key as i, key as j, key as k, key as l from intermediate +union all +select key +1 as i, key +2 as j, key +3 as k, key +4 as l from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@skew_dp_union_mm@k3=0 +POSTHOOK: Output: default@skew_dp_union_mm@k3=10 +POSTHOOK: Output: default@skew_dp_union_mm@k3=100 +POSTHOOK: Output: default@skew_dp_union_mm@k3=101 +POSTHOOK: Output: default@skew_dp_union_mm@k3=102 +POSTHOOK: Output: default@skew_dp_union_mm@k3=103 +POSTHOOK: Output: default@skew_dp_union_mm@k3=104 +POSTHOOK: Output: default@skew_dp_union_mm@k3=107 +POSTHOOK: Output: default@skew_dp_union_mm@k3=14 +POSTHOOK: Output: default@skew_dp_union_mm@k3=4 +POSTHOOK: Output: default@skew_dp_union_mm@k3=97 +POSTHOOK: Output: default@skew_dp_union_mm@k3=98 +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=0).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=0).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=0).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=100).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=100).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=100).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=101).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=101).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=101).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=102).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=102).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=102).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=103).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=103).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=103).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=104).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=104).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=104).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=107).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=107).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=107).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=10).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=10).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=10).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=14).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=14).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=14).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=4).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=4).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=4).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=97).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=97).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=97).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=98).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=98).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=98).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from skew_dp_union_mm order by k2, k1, k4 +PREHOOK: type: QUERY +PREHOOK: Input: default@skew_dp_union_mm +PREHOOK: Input: default@skew_dp_union_mm@k3=0 +PREHOOK: Input: default@skew_dp_union_mm@k3=10 +PREHOOK: Input: default@skew_dp_union_mm@k3=100 +PREHOOK: Input: default@skew_dp_union_mm@k3=101 +PREHOOK: Input: default@skew_dp_union_mm@k3=102 +PREHOOK: Input: default@skew_dp_union_mm@k3=103 +PREHOOK: Input: default@skew_dp_union_mm@k3=104 +PREHOOK: Input: default@skew_dp_union_mm@k3=107 +PREHOOK: Input: default@skew_dp_union_mm@k3=14 +PREHOOK: Input: default@skew_dp_union_mm@k3=4 +PREHOOK: Input: default@skew_dp_union_mm@k3=97 +PREHOOK: Input: default@skew_dp_union_mm@k3=98 +#### A masked pattern was here #### +POSTHOOK: query: select * from skew_dp_union_mm order by k2, k1, k4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skew_dp_union_mm +POSTHOOK: Input: default@skew_dp_union_mm@k3=0 +POSTHOOK: Input: default@skew_dp_union_mm@k3=10 +POSTHOOK: Input: default@skew_dp_union_mm@k3=100 +POSTHOOK: Input: default@skew_dp_union_mm@k3=101 +POSTHOOK: Input: default@skew_dp_union_mm@k3=102 +POSTHOOK: Input: default@skew_dp_union_mm@k3=103 +POSTHOOK: Input: default@skew_dp_union_mm@k3=104 +POSTHOOK: Input: default@skew_dp_union_mm@k3=107 +POSTHOOK: Input: default@skew_dp_union_mm@k3=14 +POSTHOOK: Input: default@skew_dp_union_mm@k3=4 +POSTHOOK: Input: default@skew_dp_union_mm@k3=97 +POSTHOOK: Input: default@skew_dp_union_mm@k3=98 +#### A masked pattern was here #### +0 0 0 0 +1 2 3 4 +10 10 10 10 +11 12 13 14 +97 97 97 97 +98 98 98 98 +98 99 100 101 +99 100 101 102 +100 100 100 100 +101 102 103 104 +103 103 103 103 +104 105 106 107 +PREHOOK: query: drop table skew_dp_union_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skew_dp_union_mm +PREHOOK: Output: default@skew_dp_union_mm +POSTHOOK: query: drop table skew_dp_union_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skew_dp_union_mm +POSTHOOK: Output: default@skew_dp_union_mm +PREHOOK: query: create table merge0_mm (id int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@merge0_mm +POSTHOOK: query: create table merge0_mm (id int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge0_mm +PREHOOK: query: insert into table merge0_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge0_mm +POSTHOOK: query: insert into table merge0_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge0_mm +POSTHOOK: Lineage: merge0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@merge0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from merge0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge0_mm +#### A masked pattern was here #### +98 +97 +0 +10 +100 +103 +PREHOOK: query: insert into table merge0_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge0_mm +POSTHOOK: query: insert into table merge0_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge0_mm +POSTHOOK: Lineage: merge0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@merge0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from merge0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge0_mm +#### A masked pattern was here #### +98 +97 +0 +10 +100 +103 +98 +97 +0 +10 +100 +103 +PREHOOK: query: drop table merge0_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@merge0_mm +PREHOOK: Output: default@merge0_mm +POSTHOOK: query: drop table merge0_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@merge0_mm +POSTHOOK: Output: default@merge0_mm +PREHOOK: query: create table merge2_mm (id int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@merge2_mm +POSTHOOK: query: create table merge2_mm (id int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge2_mm +PREHOOK: query: insert into table merge2_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge2_mm +POSTHOOK: query: insert into table merge2_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge2_mm +POSTHOOK: Lineage: merge2_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge2_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@merge2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from merge2_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge2_mm +#### A masked pattern was here #### +98 +97 +0 +10 +100 +103 +PREHOOK: query: insert into table merge2_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge2_mm +POSTHOOK: query: insert into table merge2_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge2_mm +POSTHOOK: Lineage: merge2_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge2_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@merge2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from merge2_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge2_mm +#### A masked pattern was here #### +98 +97 +0 +10 +100 +103 +98 +97 +0 +10 +100 +103 +PREHOOK: query: drop table merge2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@merge2_mm +PREHOOK: Output: default@merge2_mm +POSTHOOK: query: drop table merge2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@merge2_mm +POSTHOOK: Output: default@merge2_mm +PREHOOK: query: create table merge1_mm (id int) partitioned by (key int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@merge1_mm +POSTHOOK: query: create table merge1_mm (id int) partitioned by (key int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@merge1_mm +PREHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge1_mm +POSTHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge1_mm@key=0 +POSTHOOK: Output: default@merge1_mm@key=10 +POSTHOOK: Output: default@merge1_mm@key=100 +POSTHOOK: Output: default@merge1_mm@key=103 +POSTHOOK: Output: default@merge1_mm@key=97 +POSTHOOK: Output: default@merge1_mm@key=98 +POSTHOOK: Lineage: merge1_mm PARTITION(key=0).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=100).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=103).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=10).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=97).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=98).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge1_mm order by id, key +PREHOOK: type: QUERY +PREHOOK: Input: default@merge1_mm +PREHOOK: Input: default@merge1_mm@key=0 +PREHOOK: Input: default@merge1_mm@key=10 +PREHOOK: Input: default@merge1_mm@key=100 +PREHOOK: Input: default@merge1_mm@key=103 +PREHOOK: Input: default@merge1_mm@key=97 +PREHOOK: Input: default@merge1_mm@key=98 +#### A masked pattern was here #### +POSTHOOK: query: select * from merge1_mm order by id, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge1_mm +POSTHOOK: Input: default@merge1_mm@key=0 +POSTHOOK: Input: default@merge1_mm@key=10 +POSTHOOK: Input: default@merge1_mm@key=100 +POSTHOOK: Input: default@merge1_mm@key=103 +POSTHOOK: Input: default@merge1_mm@key=97 +POSTHOOK: Input: default@merge1_mm@key=98 +#### A masked pattern was here #### +0 0 +10 10 +97 97 +98 98 +100 100 +103 103 +PREHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@merge1_mm +POSTHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@merge1_mm@key=0 +POSTHOOK: Output: default@merge1_mm@key=10 +POSTHOOK: Output: default@merge1_mm@key=100 +POSTHOOK: Output: default@merge1_mm@key=103 +POSTHOOK: Output: default@merge1_mm@key=97 +POSTHOOK: Output: default@merge1_mm@key=98 +POSTHOOK: Lineage: merge1_mm PARTITION(key=0).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=100).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=103).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=10).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=97).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=98).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from merge1_mm order by id, key +PREHOOK: type: QUERY +PREHOOK: Input: default@merge1_mm +PREHOOK: Input: default@merge1_mm@key=0 +PREHOOK: Input: default@merge1_mm@key=10 +PREHOOK: Input: default@merge1_mm@key=100 +PREHOOK: Input: default@merge1_mm@key=103 +PREHOOK: Input: default@merge1_mm@key=97 +PREHOOK: Input: default@merge1_mm@key=98 +#### A masked pattern was here #### +POSTHOOK: query: select * from merge1_mm order by id, key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@merge1_mm +POSTHOOK: Input: default@merge1_mm@key=0 +POSTHOOK: Input: default@merge1_mm@key=10 +POSTHOOK: Input: default@merge1_mm@key=100 +POSTHOOK: Input: default@merge1_mm@key=103 +POSTHOOK: Input: default@merge1_mm@key=97 +POSTHOOK: Input: default@merge1_mm@key=98 +#### A masked pattern was here #### +0 0 +0 0 +10 10 +10 10 +97 97 +97 97 +98 98 +98 98 +100 100 +100 100 +103 103 +103 103 +PREHOOK: query: drop table merge1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@merge1_mm +PREHOOK: Output: default@merge1_mm +POSTHOOK: query: drop table merge1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@merge1_mm +POSTHOOK: Output: default@merge1_mm +PREHOOK: query: drop table ctas0_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ctas0_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: database:default +PREHOOK: Output: default@ctas0_mm +POSTHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ctas0_mm +POSTHOOK: Lineage: ctas0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: ctas0_mm.p SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +PREHOOK: query: select * from ctas0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@ctas0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from ctas0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ctas0_mm +#### A masked pattern was here #### +98 455 +97 455 +0 456 +10 456 +100 457 +103 457 +PREHOOK: query: drop table ctas0_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ctas0_mm +PREHOOK: Output: default@ctas0_mm +POSTHOOK: query: drop table ctas0_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ctas0_mm +POSTHOOK: Output: default@ctas0_mm +PREHOOK: query: drop table ctas1_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ctas1_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as + select * from intermediate union all select * from intermediate +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: database:default +PREHOOK: Output: default@ctas1_mm +POSTHOOK: query: create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as + select * from intermediate union all select * from intermediate +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ctas1_mm +POSTHOOK: Lineage: ctas1_mm.key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: ctas1_mm.p EXPRESSION [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +PREHOOK: query: select * from ctas1_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@ctas1_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from ctas1_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ctas1_mm +#### A masked pattern was here #### +98 455 +98 455 +97 455 +97 455 +0 456 +0 456 +10 456 +10 456 +100 457 +100 457 +103 457 +103 457 +PREHOOK: query: drop table ctas1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ctas1_mm +PREHOOK: Output: default@ctas1_mm +POSTHOOK: query: drop table ctas1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ctas1_mm +POSTHOOK: Output: default@ctas1_mm +PREHOOK: query: drop table multi0_1_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table multi0_1_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table multi0_2_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table multi0_2_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table multi0_1_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@multi0_1_mm +POSTHOOK: query: create table multi0_1_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@multi0_1_mm +PREHOOK: query: create table multi0_2_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@multi0_2_mm +POSTHOOK: query: create table multi0_2_mm (key int, key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@multi0_2_mm +PREHOOK: query: insert into table multi0_1_mm select key, p from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@multi0_1_mm +POSTHOOK: query: insert into table multi0_1_mm select key, p from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@multi0_1_mm +POSTHOOK: Lineage: multi0_1_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi0_1_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +PREHOOK: query: insert into table multi0_2_mm select p, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@multi0_2_mm +POSTHOOK: query: insert into table multi0_2_mm select p, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@multi0_2_mm +POSTHOOK: Lineage: multi0_2_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi0_2_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from multi0_1_mm order by key, key2 +PREHOOK: type: QUERY +PREHOOK: Input: default@multi0_1_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from multi0_1_mm order by key, key2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@multi0_1_mm +#### A masked pattern was here #### +0 456 +10 456 +97 455 +98 455 +100 457 +103 457 +PREHOOK: query: select * from multi0_2_mm order by key, key2 +PREHOOK: type: QUERY +PREHOOK: Input: default@multi0_2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from multi0_2_mm order by key, key2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@multi0_2_mm +#### A masked pattern was here #### +455 97 +455 98 +456 0 +456 10 +457 100 +457 103 +PREHOOK: query: insert into table multi0_1_mm select p, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@multi0_1_mm +POSTHOOK: query: insert into table multi0_1_mm select p, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@multi0_1_mm +POSTHOOK: Lineage: multi0_1_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi0_1_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table multi0_2_mm select key, p from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@multi0_2_mm +POSTHOOK: query: insert into table multi0_2_mm select key, p from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@multi0_2_mm +POSTHOOK: Lineage: multi0_2_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi0_2_mm.key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +PREHOOK: query: select * from multi0_1_mm order by key, key2 +PREHOOK: type: QUERY +PREHOOK: Input: default@multi0_1_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from multi0_1_mm order by key, key2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@multi0_1_mm +#### A masked pattern was here #### +0 456 +10 456 +97 455 +98 455 +100 457 +103 457 +455 97 +455 98 +456 0 +456 10 +457 100 +457 103 +PREHOOK: query: select * from multi0_2_mm order by key, key2 +PREHOOK: type: QUERY +PREHOOK: Input: default@multi0_2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from multi0_2_mm order by key, key2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@multi0_2_mm +#### A masked pattern was here #### +0 456 +10 456 +97 455 +98 455 +100 457 +103 457 +455 97 +455 98 +456 0 +456 10 +457 100 +457 103 +PREHOOK: query: drop table multi0_1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@multi0_1_mm +PREHOOK: Output: default@multi0_1_mm +POSTHOOK: query: drop table multi0_1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@multi0_1_mm +POSTHOOK: Output: default@multi0_1_mm +PREHOOK: query: drop table multi0_2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@multi0_2_mm +PREHOOK: Output: default@multi0_2_mm +POSTHOOK: query: drop table multi0_2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@multi0_2_mm +POSTHOOK: Output: default@multi0_2_mm +PREHOOK: query: drop table multi1_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table multi1_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@multi1_mm +POSTHOOK: query: create table multi1_mm (key int, key2 int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@multi1_mm +PREHOOK: query: from intermediate +insert into table multi1_mm partition(p=1) select p, key +insert into table multi1_mm partition(p=2) select key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@multi1_mm@p=1 +PREHOOK: Output: default@multi1_mm@p=2 +POSTHOOK: query: from intermediate +insert into table multi1_mm partition(p=1) select p, key +insert into table multi1_mm partition(p=2) select key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@multi1_mm@p=1 +POSTHOOK: Output: default@multi1_mm@p=2 +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +PREHOOK: query: select * from multi1_mm order by key, key2, p +PREHOOK: type: QUERY +PREHOOK: Input: default@multi1_mm +PREHOOK: Input: default@multi1_mm@p=1 +PREHOOK: Input: default@multi1_mm@p=2 +#### A masked pattern was here #### +POSTHOOK: query: select * from multi1_mm order by key, key2, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@multi1_mm +POSTHOOK: Input: default@multi1_mm@p=1 +POSTHOOK: Input: default@multi1_mm@p=2 +#### A masked pattern was here #### +0 456 2 +10 456 2 +97 455 2 +98 455 2 +100 457 2 +103 457 2 +455 97 1 +455 98 1 +456 0 1 +456 10 1 +457 100 1 +457 103 1 +PREHOOK: query: insert into table multi1_mm partition(p=2) select p, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@multi1_mm@p=2 +POSTHOOK: query: insert into table multi1_mm partition(p=2) select p, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@multi1_mm@p=2 +POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=2).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table multi1_mm partition(p=1) select key, p from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@multi1_mm@p=1 +POSTHOOK: query: insert into table multi1_mm partition(p=1) select key, p from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@multi1_mm@p=1 +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +PREHOOK: query: select * from multi1_mm order by key, key2, p +PREHOOK: type: QUERY +PREHOOK: Input: default@multi1_mm +PREHOOK: Input: default@multi1_mm@p=1 +PREHOOK: Input: default@multi1_mm@p=2 +#### A masked pattern was here #### +POSTHOOK: query: select * from multi1_mm order by key, key2, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@multi1_mm +POSTHOOK: Input: default@multi1_mm@p=1 +POSTHOOK: Input: default@multi1_mm@p=2 +#### A masked pattern was here #### +0 456 1 +0 456 2 +10 456 1 +10 456 2 +97 455 1 +97 455 2 +98 455 1 +98 455 2 +100 457 1 +100 457 2 +103 457 1 +103 457 2 +455 97 1 +455 97 2 +455 98 1 +455 98 2 +456 0 1 +456 0 2 +456 10 1 +456 10 2 +457 100 1 +457 100 2 +457 103 1 +457 103 2 +PREHOOK: query: from intermediate +insert into table multi1_mm partition(p) select p, key, p +insert into table multi1_mm partition(p=1) select key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@multi1_mm +PREHOOK: Output: default@multi1_mm@p=1 +POSTHOOK: query: from intermediate +insert into table multi1_mm partition(p) select p, key, p +insert into table multi1_mm partition(p=1) select key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@multi1_mm@p=1 +POSTHOOK: Output: default@multi1_mm@p=455 +POSTHOOK: Output: default@multi1_mm@p=456 +POSTHOOK: Output: default@multi1_mm@p=457 +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=455).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=456).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=457).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=457).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p +PREHOOK: type: QUERY +PREHOOK: Input: default@multi1_mm +PREHOOK: Input: default@multi1_mm@p=1 +PREHOOK: Input: default@multi1_mm@p=2 +PREHOOK: Input: default@multi1_mm@p=455 +PREHOOK: Input: default@multi1_mm@p=456 +PREHOOK: Input: default@multi1_mm@p=457 +#### A masked pattern was here #### +POSTHOOK: query: select key, key2, p from multi1_mm order by key, key2, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@multi1_mm +POSTHOOK: Input: default@multi1_mm@p=1 +POSTHOOK: Input: default@multi1_mm@p=2 +POSTHOOK: Input: default@multi1_mm@p=455 +POSTHOOK: Input: default@multi1_mm@p=456 +POSTHOOK: Input: default@multi1_mm@p=457 +#### A masked pattern was here #### +0 456 1 +0 456 1 +0 456 2 +10 456 1 +10 456 1 +10 456 2 +97 455 1 +97 455 1 +97 455 2 +98 455 1 +98 455 1 +98 455 2 +100 457 1 +100 457 1 +100 457 2 +103 457 1 +103 457 1 +103 457 2 +455 97 1 +455 97 2 +455 97 455 +455 98 1 +455 98 2 +455 98 455 +456 0 1 +456 0 2 +456 0 456 +456 10 1 +456 10 2 +456 10 456 +457 100 1 +457 100 2 +457 100 457 +457 103 1 +457 103 2 +457 103 457 +PREHOOK: query: from intermediate +insert into table multi1_mm partition(p) select p, key, 1 +insert into table multi1_mm partition(p=1) select key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@multi1_mm +PREHOOK: Output: default@multi1_mm@p=1 +POSTHOOK: query: from intermediate +insert into table multi1_mm partition(p) select p, key, 1 +insert into table multi1_mm partition(p=1) select key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@multi1_mm@p=1 +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p +PREHOOK: type: QUERY +PREHOOK: Input: default@multi1_mm +PREHOOK: Input: default@multi1_mm@p=1 +PREHOOK: Input: default@multi1_mm@p=2 +PREHOOK: Input: default@multi1_mm@p=455 +PREHOOK: Input: default@multi1_mm@p=456 +PREHOOK: Input: default@multi1_mm@p=457 +#### A masked pattern was here #### +POSTHOOK: query: select key, key2, p from multi1_mm order by key, key2, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@multi1_mm +POSTHOOK: Input: default@multi1_mm@p=1 +POSTHOOK: Input: default@multi1_mm@p=2 +POSTHOOK: Input: default@multi1_mm@p=455 +POSTHOOK: Input: default@multi1_mm@p=456 +POSTHOOK: Input: default@multi1_mm@p=457 +#### A masked pattern was here #### +0 456 1 +0 456 1 +0 456 1 +0 456 2 +10 456 1 +10 456 1 +10 456 1 +10 456 2 +97 455 1 +97 455 1 +97 455 1 +97 455 2 +98 455 1 +98 455 1 +98 455 1 +98 455 2 +100 457 1 +100 457 1 +100 457 1 +100 457 2 +103 457 1 +103 457 1 +103 457 1 +103 457 2 +455 97 1 +455 97 1 +455 97 2 +455 97 455 +455 98 1 +455 98 1 +455 98 2 +455 98 455 +456 0 1 +456 0 1 +456 0 2 +456 0 456 +456 10 1 +456 10 1 +456 10 2 +456 10 456 +457 100 1 +457 100 1 +457 100 2 +457 100 457 +457 103 1 +457 103 1 +457 103 2 +457 103 457 +PREHOOK: query: drop table multi1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@multi1_mm +PREHOOK: Output: default@multi1_mm +POSTHOOK: query: drop table multi1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@multi1_mm +POSTHOOK: Output: default@multi1_mm +PREHOOK: query: drop table stats_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table stats_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_mm +POSTHOOK: query: create table stats_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_mm +PREHOOK: query: insert into table stats_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@stats_mm +POSTHOOK: query: insert into table stats_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@stats_mm +POSTHOOK: Lineage: stats_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: desc formatted stats_mm +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_mm +POSTHOOK: query: desc formatted stats_mm +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_mm +# col_name data_type comment + +key int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 6 + rawDataSize 13 + totalSize 19 + transactional true + transactional_properties insert_only +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into table stats_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@stats_mm +POSTHOOK: query: insert into table stats_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@stats_mm +POSTHOOK: Lineage: stats_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: desc formatted stats_mm +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_mm +POSTHOOK: query: desc formatted stats_mm +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_mm +# col_name data_type comment + +key int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 12 + rawDataSize 26 + totalSize 38 + transactional true + transactional_properties insert_only +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table stats_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@stats_mm +PREHOOK: Output: default@stats_mm +POSTHOOK: query: drop table stats_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@stats_mm +POSTHOOK: Output: default@stats_mm +PREHOOK: query: drop table stats2_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table stats2_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@stats2_mm +POSTHOOK: query: create table stats2_mm tblproperties("transactional"="true", "transactional_properties"="insert_only") as select array(key, value) from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats2_mm +POSTHOOK: Lineage: stats2_mm._c0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted stats2_mm +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats2_mm +POSTHOOK: query: desc formatted stats2_mm +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats2_mm +# col_name data_type comment + +_c0 array + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 500 + rawDataSize 5312 + totalSize 5812 + transactional true + transactional_properties insert_only +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table stats2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@stats2_mm +PREHOOK: Output: default@stats2_mm +POSTHOOK: query: drop table stats2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@stats2_mm +POSTHOOK: Output: default@stats2_mm +PREHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skewjoin_mm +POSTHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skewjoin_mm +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT into TABLE skewjoin_mm SELECT src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@skewjoin_mm +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT into TABLE skewjoin_mm SELECT src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@skewjoin_mm +POSTHOOK: Lineage: skewjoin_mm.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: skewjoin_mm.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(distinct key) from skewjoin_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@skewjoin_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct key) from skewjoin_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skewjoin_mm +#### A masked pattern was here #### +309 +PREHOOK: query: drop table skewjoin_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skewjoin_mm +PREHOOK: Output: default@skewjoin_mm +POSTHOOK: query: drop table skewjoin_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skewjoin_mm +POSTHOOK: Output: default@skewjoin_mm +PREHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: CREATE TABLE parquet1_mm(id INT) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet1_mm +PREHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2) +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: INSERT INTO parquet1_mm VALUES(1), (2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet1_mm +POSTHOOK: Lineage: parquet1_mm.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: CREATE TABLE parquet2_mm(id INT, value STRING) STORED AS PARQUET tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet2_mm +PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1') +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value1') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet2_mm +POSTHOOK: Lineage: parquet2_mm.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: parquet2_mm.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2') +PREHOOK: type: QUERY +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: INSERT INTO parquet2_mm VALUES(1, 'value2') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@parquet2_mm +POSTHOOK: Lineage: parquet2_mm.id EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: parquet2_mm.value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm + JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id + JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id +where t1.value = 'value1' and t2.value = 'value2' +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet1_mm +PREHOOK: Input: default@parquet2_mm +#### A masked pattern was here #### +POSTHOOK: query: select parquet1_mm.id, t1.value, t2.value FROM parquet1_mm + JOIN parquet2_mm t1 ON parquet1_mm.id=t1.id + JOIN parquet2_mm t2 ON parquet1_mm.id=t2.id +where t1.value = 'value1' and t2.value = 'value2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet1_mm +POSTHOOK: Input: default@parquet2_mm +#### A masked pattern was here #### +1 value1 value2 +PREHOOK: query: drop table parquet1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet1_mm +PREHOOK: Output: default@parquet1_mm +POSTHOOK: query: drop table parquet1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet1_mm +POSTHOOK: Output: default@parquet1_mm +PREHOOK: query: drop table parquet2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet2_mm +PREHOOK: Output: default@parquet2_mm +POSTHOOK: query: drop table parquet2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet2_mm +POSTHOOK: Output: default@parquet2_mm +PREHOOK: query: DROP TABLE IF EXISTS temp1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS temp1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TEMPORARY TABLE temp1 (a int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@temp1 +POSTHOOK: query: CREATE TEMPORARY TABLE temp1 (a int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@temp1 +PREHOOK: query: INSERT INTO temp1 SELECT key FROM intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@temp1 +POSTHOOK: query: INSERT INTO temp1 SELECT key FROM intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@temp1 +POSTHOOK: Lineage: temp1.a SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: DESC EXTENDED temp1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@temp1 +POSTHOOK: query: DESC EXTENDED temp1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@temp1 +a int + +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM temp1 +PREHOOK: type: QUERY +PREHOOK: Input: default@temp1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM temp1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@temp1 +#### A masked pattern was here #### +98 +97 +0 +10 +100 +103 +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermediate +PREHOOK: Output: default@intermediate +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermediate +POSTHOOK: Output: default@intermediate diff --git a/ql/src/test/results/clientpositive/mm_buckets.q.out b/ql/src/test/results/clientpositive/mm_buckets.q.out new file mode 100644 index 0000000..fa25be7 --- /dev/null +++ b/ql/src/test/results/clientpositive/mm_buckets.q.out @@ -0,0 +1,489 @@ +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate +POSTHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate +PREHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=455 +POSTHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=455 +POSTHOOK: Lineage: intermediate PARTITION(p=455).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=456 +POSTHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=456 +POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=457 +POSTHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=457 +POSTHOOK: Lineage: intermediate PARTITION(p=457).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: drop table bucket0_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket0_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table bucket0_mm(key int, id int) +clustered by (key) into 2 buckets +tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket0_mm +POSTHOOK: query: create table bucket0_mm(key int, id int) +clustered by (key) into 2 buckets +tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket0_mm +PREHOOK: query: insert into table bucket0_mm select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@bucket0_mm +POSTHOOK: query: insert into table bucket0_mm select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@bucket0_mm +POSTHOOK: Lineage: bucket0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from bucket0_mm order by key, id +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm order by key, id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +0 0 +10 10 +97 97 +98 98 +100 100 +103 103 +PREHOOK: query: select * from bucket0_mm tablesample (bucket 1 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm tablesample (bucket 1 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +100 100 +10 10 +0 0 +98 98 +PREHOOK: query: select * from bucket0_mm tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +103 103 +97 97 +PREHOOK: query: insert into table bucket0_mm select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@bucket0_mm +POSTHOOK: query: insert into table bucket0_mm select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@bucket0_mm +POSTHOOK: Lineage: bucket0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from bucket0_mm order by key, id +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm order by key, id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +0 0 +0 0 +10 10 +10 10 +97 97 +97 97 +98 98 +98 98 +100 100 +100 100 +103 103 +103 103 +PREHOOK: query: select * from bucket0_mm tablesample (bucket 1 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm tablesample (bucket 1 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +100 100 +10 10 +0 0 +98 98 +100 100 +10 10 +0 0 +98 98 +PREHOOK: query: select * from bucket0_mm tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +103 103 +97 97 +103 103 +97 97 +PREHOOK: query: drop table bucket0_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@bucket0_mm +PREHOOK: Output: default@bucket0_mm +POSTHOOK: query: drop table bucket0_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@bucket0_mm +POSTHOOK: Output: default@bucket0_mm +PREHOOK: query: drop table bucket1_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket1_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table bucket1_mm(key int, id int) partitioned by (key2 int) +clustered by (key) sorted by (key) into 2 buckets +tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket1_mm +POSTHOOK: query: create table bucket1_mm(key int, id int) partitioned by (key2 int) +clustered by (key) sorted by (key) into 2 buckets +tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket1_mm +PREHOOK: query: insert into table bucket1_mm partition (key2) +select key + 1, key, key - 1 from intermediate +union all +select key - 1, key, key + 1 from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@bucket1_mm +POSTHOOK: query: insert into table bucket1_mm partition (key2) +select key + 1, key, key - 1 from intermediate +union all +select key - 1, key, key + 1 from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@bucket1_mm@key2=-1 +POSTHOOK: Output: default@bucket1_mm@key2=1 +POSTHOOK: Output: default@bucket1_mm@key2=101 +POSTHOOK: Output: default@bucket1_mm@key2=102 +POSTHOOK: Output: default@bucket1_mm@key2=104 +POSTHOOK: Output: default@bucket1_mm@key2=11 +POSTHOOK: Output: default@bucket1_mm@key2=9 +POSTHOOK: Output: default@bucket1_mm@key2=96 +POSTHOOK: Output: default@bucket1_mm@key2=97 +POSTHOOK: Output: default@bucket1_mm@key2=98 +POSTHOOK: Output: default@bucket1_mm@key2=99 +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=-1).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=-1).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=101).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=101).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=102).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=102).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=104).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=104).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=11).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=11).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=1).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=1).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=96).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=96).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=97).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=97).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=98).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=98).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=99).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=99).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=9).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=9).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from bucket1_mm order by key, id +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket1_mm +PREHOOK: Input: default@bucket1_mm@key2=-1 +PREHOOK: Input: default@bucket1_mm@key2=1 +PREHOOK: Input: default@bucket1_mm@key2=101 +PREHOOK: Input: default@bucket1_mm@key2=102 +PREHOOK: Input: default@bucket1_mm@key2=104 +PREHOOK: Input: default@bucket1_mm@key2=11 +PREHOOK: Input: default@bucket1_mm@key2=9 +PREHOOK: Input: default@bucket1_mm@key2=96 +PREHOOK: Input: default@bucket1_mm@key2=97 +PREHOOK: Input: default@bucket1_mm@key2=98 +PREHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket1_mm order by key, id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket1_mm +POSTHOOK: Input: default@bucket1_mm@key2=-1 +POSTHOOK: Input: default@bucket1_mm@key2=1 +POSTHOOK: Input: default@bucket1_mm@key2=101 +POSTHOOK: Input: default@bucket1_mm@key2=102 +POSTHOOK: Input: default@bucket1_mm@key2=104 +POSTHOOK: Input: default@bucket1_mm@key2=11 +POSTHOOK: Input: default@bucket1_mm@key2=9 +POSTHOOK: Input: default@bucket1_mm@key2=96 +POSTHOOK: Input: default@bucket1_mm@key2=97 +POSTHOOK: Input: default@bucket1_mm@key2=98 +POSTHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +-1 0 1 +1 0 -1 +9 10 11 +11 10 9 +96 97 98 +97 98 99 +98 97 96 +99 98 97 +99 100 101 +101 100 99 +102 103 104 +104 103 102 +PREHOOK: query: select * from bucket1_mm tablesample (bucket 1 out of 2) s order by key, id +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket1_mm +PREHOOK: Input: default@bucket1_mm@key2=-1 +PREHOOK: Input: default@bucket1_mm@key2=1 +PREHOOK: Input: default@bucket1_mm@key2=101 +PREHOOK: Input: default@bucket1_mm@key2=102 +PREHOOK: Input: default@bucket1_mm@key2=104 +PREHOOK: Input: default@bucket1_mm@key2=11 +PREHOOK: Input: default@bucket1_mm@key2=9 +PREHOOK: Input: default@bucket1_mm@key2=96 +PREHOOK: Input: default@bucket1_mm@key2=97 +PREHOOK: Input: default@bucket1_mm@key2=98 +PREHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket1_mm tablesample (bucket 1 out of 2) s order by key, id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket1_mm +POSTHOOK: Input: default@bucket1_mm@key2=-1 +POSTHOOK: Input: default@bucket1_mm@key2=1 +POSTHOOK: Input: default@bucket1_mm@key2=101 +POSTHOOK: Input: default@bucket1_mm@key2=102 +POSTHOOK: Input: default@bucket1_mm@key2=104 +POSTHOOK: Input: default@bucket1_mm@key2=11 +POSTHOOK: Input: default@bucket1_mm@key2=9 +POSTHOOK: Input: default@bucket1_mm@key2=96 +POSTHOOK: Input: default@bucket1_mm@key2=97 +POSTHOOK: Input: default@bucket1_mm@key2=98 +POSTHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +96 97 98 +98 97 96 +102 103 104 +104 103 102 +PREHOOK: query: select * from bucket1_mm tablesample (bucket 2 out of 2) s order by key, id +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket1_mm +PREHOOK: Input: default@bucket1_mm@key2=-1 +PREHOOK: Input: default@bucket1_mm@key2=1 +PREHOOK: Input: default@bucket1_mm@key2=101 +PREHOOK: Input: default@bucket1_mm@key2=102 +PREHOOK: Input: default@bucket1_mm@key2=104 +PREHOOK: Input: default@bucket1_mm@key2=11 +PREHOOK: Input: default@bucket1_mm@key2=9 +PREHOOK: Input: default@bucket1_mm@key2=96 +PREHOOK: Input: default@bucket1_mm@key2=97 +PREHOOK: Input: default@bucket1_mm@key2=98 +PREHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket1_mm tablesample (bucket 2 out of 2) s order by key, id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket1_mm +POSTHOOK: Input: default@bucket1_mm@key2=-1 +POSTHOOK: Input: default@bucket1_mm@key2=1 +POSTHOOK: Input: default@bucket1_mm@key2=101 +POSTHOOK: Input: default@bucket1_mm@key2=102 +POSTHOOK: Input: default@bucket1_mm@key2=104 +POSTHOOK: Input: default@bucket1_mm@key2=11 +POSTHOOK: Input: default@bucket1_mm@key2=9 +POSTHOOK: Input: default@bucket1_mm@key2=96 +POSTHOOK: Input: default@bucket1_mm@key2=97 +POSTHOOK: Input: default@bucket1_mm@key2=98 +POSTHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +-1 0 1 +1 0 -1 +9 10 11 +11 10 9 +97 98 99 +99 98 97 +99 100 101 +101 100 99 +PREHOOK: query: drop table bucket1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@bucket1_mm +PREHOOK: Output: default@bucket1_mm +POSTHOOK: query: drop table bucket1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@bucket1_mm +POSTHOOK: Output: default@bucket1_mm +PREHOOK: query: drop table bucket2_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket2_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table bucket2_mm(key int, id int) +clustered by (key) into 10 buckets +tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket2_mm +POSTHOOK: query: create table bucket2_mm(key int, id int) +clustered by (key) into 10 buckets +tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket2_mm +PREHOOK: query: insert into table bucket2_mm select key, key from intermediate where key == 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@bucket2_mm +POSTHOOK: query: insert into table bucket2_mm select key, key from intermediate where key == 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@bucket2_mm +POSTHOOK: Lineage: bucket2_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket2_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from bucket2_mm order by key, id +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm order by key, id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +0 0 +PREHOOK: query: select * from bucket2_mm tablesample (bucket 1 out of 10) s order by key, id +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm tablesample (bucket 1 out of 10) s order by key, id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +0 0 +PREHOOK: query: select * from bucket2_mm tablesample (bucket 4 out of 10) s order by key, id +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm tablesample (bucket 4 out of 10) s order by key, id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +PREHOOK: query: insert into table bucket2_mm select key, key from intermediate where key in (0, 103) +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@bucket2_mm +POSTHOOK: query: insert into table bucket2_mm select key, key from intermediate where key in (0, 103) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@bucket2_mm +POSTHOOK: Lineage: bucket2_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket2_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from bucket2_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +103 103 +0 0 +0 0 +PREHOOK: query: select * from bucket2_mm tablesample (bucket 1 out of 10) s order by key, id +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm tablesample (bucket 1 out of 10) s order by key, id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +0 0 +0 0 +PREHOOK: query: select * from bucket2_mm tablesample (bucket 4 out of 10) s order by key, id +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm tablesample (bucket 4 out of 10) s order by key, id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +103 103 +PREHOOK: query: drop table bucket2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@bucket2_mm +PREHOOK: Output: default@bucket2_mm +POSTHOOK: query: drop table bucket2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@bucket2_mm +POSTHOOK: Output: default@bucket2_mm +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermediate +PREHOOK: Output: default@intermediate +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermediate +POSTHOOK: Output: default@intermediate diff --git a/ql/src/test/results/clientpositive/mm_conversions.q.out b/ql/src/test/results/clientpositive/mm_conversions.q.out new file mode 100644 index 0000000..d95a70e --- /dev/null +++ b/ql/src/test/results/clientpositive/mm_conversions.q.out @@ -0,0 +1,853 @@ +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate +POSTHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate +PREHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=455 +POSTHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=455 +POSTHOOK: Lineage: intermediate PARTITION(p=455).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=456 +POSTHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=456 +POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=457 +POSTHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=457 +POSTHOOK: Lineage: intermediate PARTITION(p=457).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: drop table simple_from_mm1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table simple_from_mm1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table simple_from_mm1(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: create table simple_from_mm1(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@simple_from_mm1 +PREHOOK: query: insert into table simple_from_mm1 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: insert into table simple_from_mm1 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm1 +POSTHOOK: Lineage: simple_from_mm1.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table simple_from_mm1 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: insert into table simple_from_mm1 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm1 +POSTHOOK: Lineage: simple_from_mm1.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_from_mm1 s1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm1 s1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +0 +0 +98 +98 +100 +100 +PREHOOK: query: alter table simple_from_mm1 unset tblproperties('transactional_properties', 'transactional') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@simple_from_mm1 +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: alter table simple_from_mm1 unset tblproperties('transactional_properties', 'transactional') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@simple_from_mm1 +POSTHOOK: Output: default@simple_from_mm1 +PREHOOK: query: select * from simple_from_mm1 s2 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm1 s2 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +0 +0 +98 +98 +100 +100 +PREHOOK: query: insert into table simple_from_mm1 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: insert into table simple_from_mm1 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm1 +POSTHOOK: Lineage: simple_from_mm1.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_from_mm1 s3 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm1 s3 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm1 +#### A masked pattern was here #### +0 +0 +0 +98 +98 +98 +100 +100 +100 +PREHOOK: query: drop table simple_from_mm1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@simple_from_mm1 +PREHOOK: Output: default@simple_from_mm1 +POSTHOOK: query: drop table simple_from_mm1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@simple_from_mm1 +POSTHOOK: Output: default@simple_from_mm1 +PREHOOK: query: drop table simple_from_mm2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table simple_from_mm2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table simple_from_mm2(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: create table simple_from_mm2(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@simple_from_mm2 +PREHOOK: query: insert into table simple_from_mm2 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: insert into table simple_from_mm2 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm2 +POSTHOOK: Lineage: simple_from_mm2.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table simple_from_mm2 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: insert into table simple_from_mm2 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm2 +POSTHOOK: Lineage: simple_from_mm2.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_from_mm2 s1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm2 s1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +0 +0 +98 +98 +100 +100 +PREHOOK: query: alter table simple_from_mm2 set tblproperties("transactional"="false", 'transactional_properties'='false') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@simple_from_mm2 +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: alter table simple_from_mm2 set tblproperties("transactional"="false", 'transactional_properties'='false') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@simple_from_mm2 +POSTHOOK: Output: default@simple_from_mm2 +PREHOOK: query: select * from simple_from_mm2 s2 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm2 s2 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +0 +0 +98 +98 +100 +100 +PREHOOK: query: insert into table simple_from_mm2 select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: insert into table simple_from_mm2 select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_from_mm2 +POSTHOOK: Lineage: simple_from_mm2.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_from_mm2 s3 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_from_mm2 s3 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_from_mm2 +#### A masked pattern was here #### +0 +0 +0 +98 +98 +98 +100 +100 +100 +PREHOOK: query: drop table simple_from_mm2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@simple_from_mm2 +PREHOOK: Output: default@simple_from_mm2 +POSTHOOK: query: drop table simple_from_mm2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@simple_from_mm2 +POSTHOOK: Output: default@simple_from_mm2 +PREHOOK: query: drop table simple_to_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table simple_to_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table simple_to_mm(key int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: create table simple_to_mm(key int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@simple_to_mm +PREHOOK: query: insert into table simple_to_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: insert into table simple_to_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_to_mm +POSTHOOK: Lineage: simple_to_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_to_mm s1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_to_mm s1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +0 +98 +100 +PREHOOK: query: alter table simple_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@simple_to_mm +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: alter table simple_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@simple_to_mm +POSTHOOK: Output: default@simple_to_mm +PREHOOK: query: select * from simple_to_mm s2 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_to_mm s2 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +0 +98 +100 +PREHOOK: query: insert into table simple_to_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: insert into table simple_to_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_to_mm +POSTHOOK: Lineage: simple_to_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table simple_to_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: insert into table simple_to_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@simple_to_mm +POSTHOOK: Lineage: simple_to_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_to_mm s3 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_to_mm s3 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_to_mm +#### A masked pattern was here #### +0 +0 +0 +98 +98 +98 +100 +100 +100 +PREHOOK: query: drop table simple_to_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@simple_to_mm +PREHOOK: Output: default@simple_to_mm +POSTHOOK: query: drop table simple_to_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@simple_to_mm +POSTHOOK: Output: default@simple_to_mm +PREHOOK: query: drop table part_from_mm1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table part_from_mm1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table part_from_mm1(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_from_mm1 +POSTHOOK: query: create table part_from_mm1(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_from_mm1 +PREHOOK: query: insert into table part_from_mm1 partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm1@key_mm=455 +POSTHOOK: query: insert into table part_from_mm1 partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm1@key_mm=455 +POSTHOOK: Lineage: part_from_mm1 PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_from_mm1 partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm1@key_mm=455 +POSTHOOK: query: insert into table part_from_mm1 partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm1@key_mm=455 +POSTHOOK: Lineage: part_from_mm1 PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_from_mm1 partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm1@key_mm=456 +POSTHOOK: query: insert into table part_from_mm1 partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm1@key_mm=456 +POSTHOOK: Lineage: part_from_mm1 PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_from_mm1 s1 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm1 +PREHOOK: Input: default@part_from_mm1@key_mm=455 +PREHOOK: Input: default@part_from_mm1@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm1 s1 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm1 +POSTHOOK: Input: default@part_from_mm1@key_mm=455 +POSTHOOK: Input: default@part_from_mm1@key_mm=456 +#### A masked pattern was here #### +0 455 +0 455 +0 456 +98 455 +98 455 +98 456 +100 455 +100 455 +100 456 +PREHOOK: query: alter table part_from_mm1 unset tblproperties('transactional_properties', 'transactional') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@part_from_mm1 +PREHOOK: Output: default@part_from_mm1 +POSTHOOK: query: alter table part_from_mm1 unset tblproperties('transactional_properties', 'transactional') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@part_from_mm1 +POSTHOOK: Output: default@part_from_mm1 +PREHOOK: query: select * from part_from_mm1 s2 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm1 +PREHOOK: Input: default@part_from_mm1@key_mm=455 +PREHOOK: Input: default@part_from_mm1@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm1 s2 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm1 +POSTHOOK: Input: default@part_from_mm1@key_mm=455 +POSTHOOK: Input: default@part_from_mm1@key_mm=456 +#### A masked pattern was here #### +0 455 +0 455 +0 456 +98 455 +98 455 +98 456 +100 455 +100 455 +100 456 +PREHOOK: query: insert into table part_from_mm1 partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm1@key_mm=456 +POSTHOOK: query: insert into table part_from_mm1 partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm1@key_mm=456 +POSTHOOK: Lineage: part_from_mm1 PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_from_mm1 partition(key_mm='457') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm1@key_mm=457 +POSTHOOK: query: insert into table part_from_mm1 partition(key_mm='457') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm1@key_mm=457 +POSTHOOK: Lineage: part_from_mm1 PARTITION(key_mm=457).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_from_mm1 s3 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm1 +PREHOOK: Input: default@part_from_mm1@key_mm=455 +PREHOOK: Input: default@part_from_mm1@key_mm=456 +PREHOOK: Input: default@part_from_mm1@key_mm=457 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm1 s3 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm1 +POSTHOOK: Input: default@part_from_mm1@key_mm=455 +POSTHOOK: Input: default@part_from_mm1@key_mm=456 +POSTHOOK: Input: default@part_from_mm1@key_mm=457 +#### A masked pattern was here #### +0 455 +0 455 +0 456 +0 456 +0 457 +98 455 +98 455 +98 456 +98 456 +98 457 +100 455 +100 455 +100 456 +100 456 +100 457 +PREHOOK: query: drop table part_from_mm1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_from_mm1 +PREHOOK: Output: default@part_from_mm1 +POSTHOOK: query: drop table part_from_mm1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_from_mm1 +POSTHOOK: Output: default@part_from_mm1 +PREHOOK: query: drop table part_from_mm2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table part_from_mm2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table part_from_mm2(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_from_mm2 +POSTHOOK: query: create table part_from_mm2(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_from_mm2 +PREHOOK: query: insert into table part_from_mm2 partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm2@key_mm=456 +POSTHOOK: query: insert into table part_from_mm2 partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm2@key_mm=456 +POSTHOOK: Lineage: part_from_mm2 PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: --fails here +insert into table part_from_mm2 partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm2@key_mm=455 +POSTHOOK: query: --fails here +insert into table part_from_mm2 partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm2@key_mm=455 +POSTHOOK: Lineage: part_from_mm2 PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_from_mm2 s1 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm2 +PREHOOK: Input: default@part_from_mm2@key_mm=455 +PREHOOK: Input: default@part_from_mm2@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm2 s1 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm2 +POSTHOOK: Input: default@part_from_mm2@key_mm=455 +POSTHOOK: Input: default@part_from_mm2@key_mm=456 +#### A masked pattern was here #### +0 455 +0 456 +98 455 +98 456 +100 455 +100 456 +PREHOOK: query: alter table part_from_mm2 set tblproperties("transactional"="false", 'transactional_properties'='false') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@part_from_mm2 +PREHOOK: Output: default@part_from_mm2 +POSTHOOK: query: alter table part_from_mm2 set tblproperties("transactional"="false", 'transactional_properties'='false') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@part_from_mm2 +POSTHOOK: Output: default@part_from_mm2 +PREHOOK: query: select * from part_from_mm2 s2 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm2 +PREHOOK: Input: default@part_from_mm2@key_mm=455 +PREHOOK: Input: default@part_from_mm2@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm2 s2 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm2 +POSTHOOK: Input: default@part_from_mm2@key_mm=455 +POSTHOOK: Input: default@part_from_mm2@key_mm=456 +#### A masked pattern was here #### +0 455 +0 456 +98 455 +98 456 +100 455 +100 456 +PREHOOK: query: insert into table part_from_mm2 partition(key_mm='457') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_from_mm2@key_mm=457 +POSTHOOK: query: insert into table part_from_mm2 partition(key_mm='457') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_from_mm2@key_mm=457 +POSTHOOK: Lineage: part_from_mm2 PARTITION(key_mm=457).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_from_mm2 s3 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_from_mm2 +PREHOOK: Input: default@part_from_mm2@key_mm=455 +PREHOOK: Input: default@part_from_mm2@key_mm=456 +PREHOOK: Input: default@part_from_mm2@key_mm=457 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_from_mm2 s3 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_from_mm2 +POSTHOOK: Input: default@part_from_mm2@key_mm=455 +POSTHOOK: Input: default@part_from_mm2@key_mm=456 +POSTHOOK: Input: default@part_from_mm2@key_mm=457 +#### A masked pattern was here #### +0 455 +0 456 +0 457 +98 455 +98 456 +98 457 +100 455 +100 456 +100 457 +PREHOOK: query: drop table part_from_mm2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_from_mm2 +PREHOOK: Output: default@part_from_mm2 +POSTHOOK: query: drop table part_from_mm2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_from_mm2 +POSTHOOK: Output: default@part_from_mm2 +PREHOOK: query: drop table part_to_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table part_to_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table part_to_mm(key int) partitioned by (key_mm int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_to_mm +POSTHOOK: query: create table part_to_mm(key int) partitioned by (key_mm int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_to_mm +PREHOOK: query: insert into table part_to_mm partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_to_mm@key_mm=455 +POSTHOOK: query: insert into table part_to_mm partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_to_mm@key_mm=455 +POSTHOOK: Lineage: part_to_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_to_mm partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_to_mm@key_mm=456 +POSTHOOK: query: insert into table part_to_mm partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_to_mm@key_mm=456 +POSTHOOK: Lineage: part_to_mm PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_to_mm s1 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_to_mm +PREHOOK: Input: default@part_to_mm@key_mm=455 +PREHOOK: Input: default@part_to_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_to_mm s1 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_to_mm +POSTHOOK: Input: default@part_to_mm@key_mm=455 +POSTHOOK: Input: default@part_to_mm@key_mm=456 +#### A masked pattern was here #### +0 455 +0 456 +98 455 +98 456 +100 455 +100 456 +PREHOOK: query: alter table part_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@part_to_mm +PREHOOK: Output: default@part_to_mm +POSTHOOK: query: alter table part_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@part_to_mm +POSTHOOK: Output: default@part_to_mm +PREHOOK: query: select * from part_to_mm s2 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_to_mm +PREHOOK: Input: default@part_to_mm@key_mm=455 +PREHOOK: Input: default@part_to_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_to_mm s2 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_to_mm +POSTHOOK: Input: default@part_to_mm@key_mm=455 +POSTHOOK: Input: default@part_to_mm@key_mm=456 +#### A masked pattern was here #### +0 455 +0 456 +98 455 +98 456 +100 455 +100 456 +PREHOOK: query: insert into table part_to_mm partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_to_mm@key_mm=456 +POSTHOOK: query: insert into table part_to_mm partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_to_mm@key_mm=456 +POSTHOOK: Lineage: part_to_mm PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_to_mm partition(key_mm='457') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@part_to_mm@key_mm=457 +POSTHOOK: query: insert into table part_to_mm partition(key_mm='457') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@part_to_mm@key_mm=457 +POSTHOOK: Lineage: part_to_mm PARTITION(key_mm=457).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_to_mm s3 order by key, key_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_to_mm +PREHOOK: Input: default@part_to_mm@key_mm=455 +PREHOOK: Input: default@part_to_mm@key_mm=456 +PREHOOK: Input: default@part_to_mm@key_mm=457 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_to_mm s3 order by key, key_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_to_mm +POSTHOOK: Input: default@part_to_mm@key_mm=455 +POSTHOOK: Input: default@part_to_mm@key_mm=456 +POSTHOOK: Input: default@part_to_mm@key_mm=457 +#### A masked pattern was here #### +0 455 +0 456 +0 456 +0 457 +98 455 +98 456 +98 456 +98 457 +100 455 +100 456 +100 456 +100 457 +PREHOOK: query: drop table part_to_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_to_mm +PREHOOK: Output: default@part_to_mm +POSTHOOK: query: drop table part_to_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_to_mm +POSTHOOK: Output: default@part_to_mm +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermediate +PREHOOK: Output: default@intermediate +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermediate +POSTHOOK: Output: default@intermediate diff --git a/ql/src/test/results/clientpositive/mm_exchangepartition.q.out b/ql/src/test/results/clientpositive/mm_exchangepartition.q.out new file mode 100644 index 0000000..3ba6c21 --- /dev/null +++ b/ql/src/test/results/clientpositive/mm_exchangepartition.q.out @@ -0,0 +1,382 @@ +PREHOOK: query: drop database if exists ex1 +PREHOOK: type: DROPDATABASE +POSTHOOK: query: drop database if exists ex1 +POSTHOOK: type: DROPDATABASE +PREHOOK: query: drop database if exists ex2 +PREHOOK: type: DROPDATABASE +POSTHOOK: query: drop database if exists ex2 +POSTHOOK: type: DROPDATABASE +PREHOOK: query: create database ex1 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:ex1 +POSTHOOK: query: create database ex1 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:ex1 +PREHOOK: query: create database ex2 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:ex2 +POSTHOOK: query: create database ex2 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:ex2 +PREHOOK: query: CREATE TABLE ex1.exchange_part_test1 (f1 string) PARTITIONED BY (ds STRING) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:ex1 +PREHOOK: Output: ex1@exchange_part_test1 +POSTHOOK: query: CREATE TABLE ex1.exchange_part_test1 (f1 string) PARTITIONED BY (ds STRING) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:ex1 +POSTHOOK: Output: ex1@exchange_part_test1 +PREHOOK: query: CREATE TABLE ex2.exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:ex2 +PREHOOK: Output: ex2@exchange_part_test2 +POSTHOOK: query: CREATE TABLE ex2.exchange_part_test2 (f1 string) PARTITIONED BY (ds STRING) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:ex2 +POSTHOOK: Output: ex2@exchange_part_test2 +PREHOOK: query: SHOW PARTITIONS ex1.exchange_part_test1 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: ex1@exchange_part_test1 +POSTHOOK: query: SHOW PARTITIONS ex1.exchange_part_test1 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: ex1@exchange_part_test1 +PREHOOK: query: SHOW PARTITIONS ex2.exchange_part_test2 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: ex2@exchange_part_test2 +POSTHOOK: query: SHOW PARTITIONS ex2.exchange_part_test2 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: ex2@exchange_part_test2 +PREHOOK: query: ALTER TABLE ex2.exchange_part_test2 ADD PARTITION (ds='2013-04-05') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: ex2@exchange_part_test2 +POSTHOOK: query: ALTER TABLE ex2.exchange_part_test2 ADD PARTITION (ds='2013-04-05') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: ex2@exchange_part_test2 +POSTHOOK: Output: ex2@exchange_part_test2@ds=2013-04-05 +PREHOOK: query: SHOW PARTITIONS ex1.exchange_part_test1 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: ex1@exchange_part_test1 +POSTHOOK: query: SHOW PARTITIONS ex1.exchange_part_test1 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: ex1@exchange_part_test1 +PREHOOK: query: SHOW PARTITIONS ex2.exchange_part_test2 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: ex2@exchange_part_test2 +POSTHOOK: query: SHOW PARTITIONS ex2.exchange_part_test2 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: ex2@exchange_part_test2 +ds=2013-04-05 +PREHOOK: query: ALTER TABLE ex1.exchange_part_test1 EXCHANGE PARTITION (ds='2013-04-05') WITH TABLE ex2.exchange_part_test2 +PREHOOK: type: ALTERTABLE_EXCHANGEPARTITION +PREHOOK: Input: ex2@exchange_part_test2 +PREHOOK: Output: ex1@exchange_part_test1 +POSTHOOK: query: ALTER TABLE ex1.exchange_part_test1 EXCHANGE PARTITION (ds='2013-04-05') WITH TABLE ex2.exchange_part_test2 +POSTHOOK: type: ALTERTABLE_EXCHANGEPARTITION +POSTHOOK: Input: ex2@exchange_part_test2 +POSTHOOK: Input: ex2@exchange_part_test2@ds=2013-04-05 +POSTHOOK: Output: ex1@exchange_part_test1 +POSTHOOK: Output: ex1@exchange_part_test1@ds=2013-04-05 +POSTHOOK: Output: ex2@exchange_part_test2@ds=2013-04-05 +PREHOOK: query: SHOW PARTITIONS ex1.exchange_part_test1 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: ex1@exchange_part_test1 +POSTHOOK: query: SHOW PARTITIONS ex1.exchange_part_test1 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: ex1@exchange_part_test1 +ds=2013-04-05 +PREHOOK: query: SHOW PARTITIONS ex2.exchange_part_test2 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: ex2@exchange_part_test2 +POSTHOOK: query: SHOW PARTITIONS ex2.exchange_part_test2 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: ex2@exchange_part_test2 +PREHOOK: query: DROP TABLE IF EXISTS t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t5 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t5 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t6 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t6 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE t1 (a int) PARTITIONED BY (d1 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: CREATE TABLE t1 (a int) PARTITIONED BY (d1 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE t2 (a int) PARTITIONED BY (d1 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: CREATE TABLE t2 (a int) PARTITIONED BY (d1 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: CREATE TABLE t3 (a int) PARTITIONED BY (d1 int, d2 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t3 +POSTHOOK: query: CREATE TABLE t3 (a int) PARTITIONED BY (d1 int, d2 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t3 +PREHOOK: query: CREATE TABLE t4 (a int) PARTITIONED BY (d1 int, d2 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t4 +POSTHOOK: query: CREATE TABLE t4 (a int) PARTITIONED BY (d1 int, d2 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t4 +PREHOOK: query: CREATE TABLE t5 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t5 +POSTHOOK: query: CREATE TABLE t5 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t5 +PREHOOK: query: CREATE TABLE t6 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t6 +POSTHOOK: query: CREATE TABLE t6 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t6 +PREHOOK: query: INSERT INTO TABLE t1 PARTITION (d1 = 1) SELECT key FROM src where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1@d1=1 +POSTHOOK: query: INSERT INTO TABLE t1 PARTITION (d1 = 1) SELECT key FROM src where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1@d1=1 +POSTHOOK: Lineage: t1 PARTITION(d1=1).a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT INTO TABLE t3 PARTITION (d1 = 1, d2 = 1) SELECT key FROM src where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t3@d1=1/d2=1 +POSTHOOK: query: INSERT INTO TABLE t3 PARTITION (d1 = 1, d2 = 1) SELECT key FROM src where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t3@d1=1/d2=1 +POSTHOOK: Lineage: t3 PARTITION(d1=1,d2=1).a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT INTO TABLE t5 PARTITION (d1 = 1, d2 = 1, d3=1) SELECT key FROM src where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: query: INSERT INTO TABLE t5 PARTITION (d1 = 1, d2 = 1, d3=1) SELECT key FROM src where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: Lineage: t5 PARTITION(d1=1,d2=1,d3=1).a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@d1=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@d1=1 +#### A masked pattern was here #### +100 1 +PREHOOK: query: SELECT * FROM t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +PREHOOK: Input: default@t3@d1=1/d2=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t3@d1=1/d2=1 +#### A masked pattern was here #### +100 1 1 +PREHOOK: query: SELECT * FROM t5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t5 +PREHOOK: Input: default@t5@d1=1/d2=1/d3=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t5 +POSTHOOK: Input: default@t5@d1=1/d2=1/d3=1 +#### A masked pattern was here #### +100 1 1 1 +PREHOOK: query: ALTER TABLE t2 EXCHANGE PARTITION (d1 = 1) WITH TABLE t1 +PREHOOK: type: ALTERTABLE_EXCHANGEPARTITION +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: ALTER TABLE t2 EXCHANGE PARTITION (d1 = 1) WITH TABLE t1 +POSTHOOK: type: ALTERTABLE_EXCHANGEPARTITION +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@d1=1 +POSTHOOK: Output: default@t1@d1=1 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2@d1=1 +PREHOOK: query: SELECT * FROM t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@d1=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t2@d1=1 +#### A masked pattern was here #### +100 1 +PREHOOK: query: ALTER TABLE t4 EXCHANGE PARTITION (d1 = 1, d2 = 1) WITH TABLE t3 +PREHOOK: type: ALTERTABLE_EXCHANGEPARTITION +PREHOOK: Input: default@t3 +PREHOOK: Output: default@t4 +POSTHOOK: query: ALTER TABLE t4 EXCHANGE PARTITION (d1 = 1, d2 = 1) WITH TABLE t3 +POSTHOOK: type: ALTERTABLE_EXCHANGEPARTITION +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t3@d1=1/d2=1 +POSTHOOK: Output: default@t3@d1=1/d2=1 +POSTHOOK: Output: default@t4 +POSTHOOK: Output: default@t4@d1=1/d2=1 +PREHOOK: query: SELECT * FROM t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM t4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t4 +PREHOOK: Input: default@t4@d1=1/d2=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t4 +POSTHOOK: Input: default@t4@d1=1/d2=1 +#### A masked pattern was here #### +100 1 1 +PREHOOK: query: ALTER TABLE t6 EXCHANGE PARTITION (d1 = 1, d2 = 1, d3 = 1) WITH TABLE t5 +PREHOOK: type: ALTERTABLE_EXCHANGEPARTITION +PREHOOK: Input: default@t5 +PREHOOK: Output: default@t6 +POSTHOOK: query: ALTER TABLE t6 EXCHANGE PARTITION (d1 = 1, d2 = 1, d3 = 1) WITH TABLE t5 +POSTHOOK: type: ALTERTABLE_EXCHANGEPARTITION +POSTHOOK: Input: default@t5 +POSTHOOK: Input: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: Output: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: Output: default@t6 +POSTHOOK: Output: default@t6@d1=1/d2=1/d3=1 +PREHOOK: query: SELECT * FROM t5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t5 +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM t6 +PREHOOK: type: QUERY +PREHOOK: Input: default@t6 +PREHOOK: Input: default@t6@d1=1/d2=1/d3=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t6 +POSTHOOK: Input: default@t6@d1=1/d2=1/d3=1 +#### A masked pattern was here #### +100 1 1 1 +PREHOOK: query: DROP DATABASE ex1 CASCADE +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:ex1 +PREHOOK: Output: database:ex1 +PREHOOK: Output: ex1@exchange_part_test1 +POSTHOOK: query: DROP DATABASE ex1 CASCADE +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:ex1 +POSTHOOK: Output: database:ex1 +POSTHOOK: Output: ex1@exchange_part_test1 +PREHOOK: query: DROP DATABASE ex2 CASCADE +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:ex2 +PREHOOK: Output: database:ex2 +PREHOOK: Output: ex2@exchange_part_test2 +POSTHOOK: query: DROP DATABASE ex2 CASCADE +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:ex2 +POSTHOOK: Output: database:ex2 +POSTHOOK: Output: ex2@exchange_part_test2 +PREHOOK: query: DROP TABLE t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: DROP TABLE t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: DROP TABLE t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: DROP TABLE t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: DROP TABLE t3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t3 +PREHOOK: Output: default@t3 +POSTHOOK: query: DROP TABLE t3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t3 +POSTHOOK: Output: default@t3 +PREHOOK: query: DROP TABLE t4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t4 +PREHOOK: Output: default@t4 +POSTHOOK: query: DROP TABLE t4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t4 +POSTHOOK: Output: default@t4 +PREHOOK: query: DROP TABLE t5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t5 +PREHOOK: Output: default@t5 +POSTHOOK: query: DROP TABLE t5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t5 +POSTHOOK: Output: default@t5 +PREHOOK: query: DROP TABLE t6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t6 +PREHOOK: Output: default@t6 +POSTHOOK: query: DROP TABLE t6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t6 +POSTHOOK: Output: default@t6 diff --git a/ql/src/test/results/clientpositive/mm_exim.q.out b/ql/src/test/results/clientpositive/mm_exim.q.out new file mode 100644 index 0000000..910a46d --- /dev/null +++ b/ql/src/test/results/clientpositive/mm_exim.q.out @@ -0,0 +1,557 @@ +PREHOOK: query: drop table intermediate +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermediate +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate +POSTHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate +PREHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=455 +POSTHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=455 +POSTHOOK: Lineage: intermediate PARTITION(p=455).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=456 +POSTHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=456 +POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=457 +POSTHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=457 +POSTHOOK: Lineage: intermediate PARTITION(p=457).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: drop table intermediate_nonpart +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermediate_nonpart +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table intermmediate_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermmediate_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table intermmediate_nonpart +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermmediate_nonpart +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate_nonpart(key int, p int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate_nonpart +POSTHOOK: query: create table intermediate_nonpart(key int, p int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate_nonpart +PREHOOK: query: insert into intermediate_nonpart select * from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@intermediate_nonpart +POSTHOOK: query: insert into intermediate_nonpart select * from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@intermediate_nonpart +POSTHOOK: Lineage: intermediate_nonpart.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: intermediate_nonpart.p SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +PREHOOK: query: create table intermmediate_nonpart(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermmediate_nonpart +POSTHOOK: query: create table intermmediate_nonpart(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermmediate_nonpart +PREHOOK: query: insert into intermmediate_nonpart select * from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@intermmediate_nonpart +POSTHOOK: query: insert into intermmediate_nonpart select * from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@intermmediate_nonpart +POSTHOOK: Lineage: intermmediate_nonpart.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: intermmediate_nonpart.p SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +PREHOOK: query: create table intermmediate(key int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermmediate +POSTHOOK: query: create table intermmediate(key int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermmediate +PREHOOK: query: insert into table intermmediate partition(p) select key, p from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@intermmediate +POSTHOOK: query: insert into table intermmediate partition(p) select key, p from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@intermmediate@p=455 +POSTHOOK: Output: default@intermmediate@p=456 +POSTHOOK: Output: default@intermmediate@p=457 +POSTHOOK: Lineage: intermmediate PARTITION(p=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: intermmediate PARTITION(p=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: intermmediate PARTITION(p=457).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: export table intermediate_nonpart to 'ql/test/data/exports/intermediate_nonpart' +PREHOOK: type: EXPORT +PREHOOK: Input: default@intermediate_nonpart +#### A masked pattern was here #### +POSTHOOK: query: export table intermediate_nonpart to 'ql/test/data/exports/intermediate_nonpart' +POSTHOOK: type: EXPORT +POSTHOOK: Input: default@intermediate_nonpart +#### A masked pattern was here #### +PREHOOK: query: export table intermmediate_nonpart to 'ql/test/data/exports/intermmediate_nonpart' +PREHOOK: type: EXPORT +PREHOOK: Input: default@intermmediate_nonpart +#### A masked pattern was here #### +POSTHOOK: query: export table intermmediate_nonpart to 'ql/test/data/exports/intermmediate_nonpart' +POSTHOOK: type: EXPORT +POSTHOOK: Input: default@intermmediate_nonpart +#### A masked pattern was here #### +PREHOOK: query: export table intermediate to 'ql/test/data/exports/intermediate_part' +PREHOOK: type: EXPORT +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +#### A masked pattern was here #### +POSTHOOK: query: export table intermediate to 'ql/test/data/exports/intermediate_part' +POSTHOOK: type: EXPORT +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +#### A masked pattern was here #### +PREHOOK: query: export table intermmediate to 'ql/test/data/exports/intermmediate_part' +PREHOOK: type: EXPORT +PREHOOK: Input: default@intermmediate@p=455 +PREHOOK: Input: default@intermmediate@p=456 +PREHOOK: Input: default@intermmediate@p=457 +#### A masked pattern was here #### +POSTHOOK: query: export table intermmediate to 'ql/test/data/exports/intermmediate_part' +POSTHOOK: type: EXPORT +POSTHOOK: Input: default@intermmediate@p=455 +POSTHOOK: Input: default@intermmediate@p=456 +POSTHOOK: Input: default@intermmediate@p=457 +#### A masked pattern was here #### +PREHOOK: query: drop table intermediate_nonpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermediate_nonpart +PREHOOK: Output: default@intermediate_nonpart +POSTHOOK: query: drop table intermediate_nonpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermediate_nonpart +POSTHOOK: Output: default@intermediate_nonpart +PREHOOK: query: drop table intermmediate_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermmediate_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table intermmediate_nonpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermmediate_nonpart +PREHOOK: Output: default@intermmediate_nonpart +POSTHOOK: query: drop table intermmediate_nonpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermmediate_nonpart +POSTHOOK: Output: default@intermmediate_nonpart +PREHOOK: query: drop table import0_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table import0_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table import0_mm(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@import0_mm +POSTHOOK: query: create table import0_mm(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@import0_mm +PREHOOK: query: import table import0_mm from 'ql/test/data/exports/intermediate_nonpart' +PREHOOK: type: IMPORT +#### A masked pattern was here #### +PREHOOK: Output: default@import0_mm +POSTHOOK: query: import table import0_mm from 'ql/test/data/exports/intermediate_nonpart' +POSTHOOK: type: IMPORT +#### A masked pattern was here #### +POSTHOOK: Output: default@import0_mm +PREHOOK: query: select * from import0_mm order by key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@import0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from import0_mm order by key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@import0_mm +#### A masked pattern was here #### +0 456 +10 456 +97 455 +98 455 +100 457 +103 457 +PREHOOK: query: drop table import0_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@import0_mm +PREHOOK: Output: default@import0_mm +POSTHOOK: query: drop table import0_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@import0_mm +POSTHOOK: Output: default@import0_mm +PREHOOK: query: drop table import1_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table import1_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table import1_mm(key int) partitioned by (p int) + stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@import1_mm +POSTHOOK: query: create table import1_mm(key int) partitioned by (p int) + stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@import1_mm +PREHOOK: query: import table import1_mm from 'ql/test/data/exports/intermediate_part' +PREHOOK: type: IMPORT +#### A masked pattern was here #### +PREHOOK: Output: default@import1_mm +POSTHOOK: query: import table import1_mm from 'ql/test/data/exports/intermediate_part' +POSTHOOK: type: IMPORT +#### A masked pattern was here #### +POSTHOOK: Output: default@import1_mm +POSTHOOK: Output: default@import1_mm@p=455 +POSTHOOK: Output: default@import1_mm@p=456 +POSTHOOK: Output: default@import1_mm@p=457 +PREHOOK: query: select * from import1_mm order by key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@import1_mm +PREHOOK: Input: default@import1_mm@p=455 +PREHOOK: Input: default@import1_mm@p=456 +PREHOOK: Input: default@import1_mm@p=457 +#### A masked pattern was here #### +POSTHOOK: query: select * from import1_mm order by key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@import1_mm +POSTHOOK: Input: default@import1_mm@p=455 +POSTHOOK: Input: default@import1_mm@p=456 +POSTHOOK: Input: default@import1_mm@p=457 +#### A masked pattern was here #### +0 456 +10 456 +97 455 +98 455 +100 457 +103 457 +PREHOOK: query: drop table import1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@import1_mm +PREHOOK: Output: default@import1_mm +POSTHOOK: query: drop table import1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@import1_mm +POSTHOOK: Output: default@import1_mm +PREHOOK: query: drop table import2_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table import2_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: import table import2_mm from 'ql/test/data/exports/intermmediate_nonpart' +PREHOOK: type: IMPORT +#### A masked pattern was here #### +PREHOOK: Output: database:default +POSTHOOK: query: import table import2_mm from 'ql/test/data/exports/intermmediate_nonpart' +POSTHOOK: type: IMPORT +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@import2_mm +PREHOOK: query: desc import2_mm +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@import2_mm +POSTHOOK: query: desc import2_mm +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@import2_mm +key int +p int +PREHOOK: query: select * from import2_mm order by key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@import2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from import2_mm order by key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@import2_mm +#### A masked pattern was here #### +0 456 +10 456 +97 455 +98 455 +100 457 +103 457 +PREHOOK: query: drop table import2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@import2_mm +PREHOOK: Output: default@import2_mm +POSTHOOK: query: drop table import2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@import2_mm +POSTHOOK: Output: default@import2_mm +PREHOOK: query: drop table import3_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table import3_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: import table import3_mm from 'ql/test/data/exports/intermmediate_part' +PREHOOK: type: IMPORT +#### A masked pattern was here #### +PREHOOK: Output: database:default +POSTHOOK: query: import table import3_mm from 'ql/test/data/exports/intermmediate_part' +POSTHOOK: type: IMPORT +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@import3_mm +POSTHOOK: Output: default@import3_mm@p=455 +POSTHOOK: Output: default@import3_mm@p=456 +POSTHOOK: Output: default@import3_mm@p=457 +PREHOOK: query: desc import3_mm +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@import3_mm +POSTHOOK: query: desc import3_mm +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@import3_mm +key int +p int + +# Partition Information +# col_name data_type comment + +p int +PREHOOK: query: select * from import3_mm order by key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@import3_mm +PREHOOK: Input: default@import3_mm@p=455 +PREHOOK: Input: default@import3_mm@p=456 +PREHOOK: Input: default@import3_mm@p=457 +#### A masked pattern was here #### +POSTHOOK: query: select * from import3_mm order by key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@import3_mm +POSTHOOK: Input: default@import3_mm@p=455 +POSTHOOK: Input: default@import3_mm@p=456 +POSTHOOK: Input: default@import3_mm@p=457 +#### A masked pattern was here #### +0 456 +10 456 +97 455 +98 455 +100 457 +103 457 +PREHOOK: query: drop table import3_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@import3_mm +PREHOOK: Output: default@import3_mm +POSTHOOK: query: drop table import3_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@import3_mm +POSTHOOK: Output: default@import3_mm +PREHOOK: query: drop table import4_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table import4_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table import4_mm(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@import4_mm +POSTHOOK: query: create table import4_mm(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@import4_mm +PREHOOK: query: import table import4_mm from 'ql/test/data/exports/intermmediate_nonpart' +PREHOOK: type: IMPORT +#### A masked pattern was here #### +PREHOOK: Output: default@import4_mm +POSTHOOK: query: import table import4_mm from 'ql/test/data/exports/intermmediate_nonpart' +POSTHOOK: type: IMPORT +#### A masked pattern was here #### +POSTHOOK: Output: default@import4_mm +PREHOOK: query: select * from import4_mm order by key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@import4_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from import4_mm order by key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@import4_mm +#### A masked pattern was here #### +0 456 +10 456 +97 455 +98 455 +100 457 +103 457 +PREHOOK: query: drop table import4_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@import4_mm +PREHOOK: Output: default@import4_mm +POSTHOOK: query: drop table import4_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@import4_mm +POSTHOOK: Output: default@import4_mm +PREHOOK: query: drop table import5_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table import5_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table import5_mm(key int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@import5_mm +POSTHOOK: query: create table import5_mm(key int) partitioned by (p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@import5_mm +PREHOOK: query: import table import5_mm partition(p=455) from 'ql/test/data/exports/intermmediate_part' +PREHOOK: type: IMPORT +#### A masked pattern was here #### +PREHOOK: Output: default@import5_mm +POSTHOOK: query: import table import5_mm partition(p=455) from 'ql/test/data/exports/intermmediate_part' +POSTHOOK: type: IMPORT +#### A masked pattern was here #### +POSTHOOK: Output: default@import5_mm +POSTHOOK: Output: default@import5_mm@p=455 +PREHOOK: query: select * from import5_mm order by key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@import5_mm +PREHOOK: Input: default@import5_mm@p=455 +#### A masked pattern was here #### +POSTHOOK: query: select * from import5_mm order by key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@import5_mm +POSTHOOK: Input: default@import5_mm@p=455 +#### A masked pattern was here #### +97 455 +98 455 +PREHOOK: query: drop table import5_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@import5_mm +PREHOOK: Output: default@import5_mm +POSTHOOK: query: drop table import5_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@import5_mm +POSTHOOK: Output: default@import5_mm +PREHOOK: query: drop table import6_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table import6_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table import6_mm(key int, p int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@import6_mm +POSTHOOK: query: create table import6_mm(key int, p int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@import6_mm +PREHOOK: query: import table import6_mm from 'ql/test/data/exports/intermmediate_nonpart' +PREHOOK: type: IMPORT +#### A masked pattern was here #### +PREHOOK: Output: default@import6_mm +POSTHOOK: query: import table import6_mm from 'ql/test/data/exports/intermmediate_nonpart' +POSTHOOK: type: IMPORT +#### A masked pattern was here #### +POSTHOOK: Output: default@import6_mm +PREHOOK: query: select * from import6_mm order by key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@import6_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from import6_mm order by key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@import6_mm +#### A masked pattern was here #### +0 456 +10 456 +97 455 +98 455 +100 457 +103 457 +PREHOOK: query: drop table import6_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@import6_mm +PREHOOK: Output: default@import6_mm +POSTHOOK: query: drop table import6_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@import6_mm +POSTHOOK: Output: default@import6_mm +PREHOOK: query: drop table import7_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table import7_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table import7_mm(key int) partitioned by (p int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@import7_mm +POSTHOOK: query: create table import7_mm(key int) partitioned by (p int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@import7_mm +PREHOOK: query: import table import7_mm from 'ql/test/data/exports/intermmediate_part' +PREHOOK: type: IMPORT +#### A masked pattern was here #### +PREHOOK: Output: default@import7_mm +POSTHOOK: query: import table import7_mm from 'ql/test/data/exports/intermmediate_part' +POSTHOOK: type: IMPORT +#### A masked pattern was here #### +POSTHOOK: Output: default@import7_mm +POSTHOOK: Output: default@import7_mm@p=455 +POSTHOOK: Output: default@import7_mm@p=456 +POSTHOOK: Output: default@import7_mm@p=457 +PREHOOK: query: select * from import7_mm order by key, p +PREHOOK: type: QUERY +PREHOOK: Input: default@import7_mm +PREHOOK: Input: default@import7_mm@p=455 +PREHOOK: Input: default@import7_mm@p=456 +PREHOOK: Input: default@import7_mm@p=457 +#### A masked pattern was here #### +POSTHOOK: query: select * from import7_mm order by key, p +POSTHOOK: type: QUERY +POSTHOOK: Input: default@import7_mm +POSTHOOK: Input: default@import7_mm@p=455 +POSTHOOK: Input: default@import7_mm@p=456 +POSTHOOK: Input: default@import7_mm@p=457 +#### A masked pattern was here #### +0 456 +10 456 +97 455 +98 455 +100 457 +103 457 +PREHOOK: query: drop table import7_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@import7_mm +PREHOOK: Output: default@import7_mm +POSTHOOK: query: drop table import7_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@import7_mm +POSTHOOK: Output: default@import7_mm diff --git a/ql/src/test/results/clientpositive/mm_loaddata.q.out b/ql/src/test/results/clientpositive/mm_loaddata.q.out new file mode 100644 index 0000000..b849a88 --- /dev/null +++ b/ql/src/test/results/clientpositive/mm_loaddata.q.out @@ -0,0 +1,296 @@ +PREHOOK: query: drop table load0_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table load0_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table load0_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@load0_mm +POSTHOOK: query: create table load0_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@load0_mm +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_mm +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_mm +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_mm +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_mm +PREHOOK: query: select count(1) from load0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_mm +#### A masked pattern was here #### +500 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_mm +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_mm +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_mm +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_mm +PREHOOK: query: select count(1) from load0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_mm +#### A masked pattern was here #### +1000 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' overwrite into table load0_mm +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_mm +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' overwrite into table load0_mm +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_mm +PREHOOK: query: select count(1) from load0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_mm +#### A masked pattern was here #### +500 +PREHOOK: query: drop table load0_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@load0_mm +PREHOOK: Output: default@load0_mm +POSTHOOK: query: drop table load0_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@load0_mm +POSTHOOK: Output: default@load0_mm +PREHOOK: query: drop table intermediate2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table intermediate2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate2 (key string, value string) stored as textfile +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: create table intermediate2 (key string, value string) stored as textfile +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: drop table load1_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table load1_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table load1_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@load1_mm +POSTHOOK: query: create table load1_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@load1_mm +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load1_mm +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load1_mm +PREHOOK: query: select count(1) from load1_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load1_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load1_mm +#### A masked pattern was here #### +1000 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load1_mm +PREHOOK: query: select count(1) from load1_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load1_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load1_mm +#### A masked pattern was here #### +1050 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load1_mm +PREHOOK: query: select count(1) from load1_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load1_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load1_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load1_mm +#### A masked pattern was here #### +500 +PREHOOK: query: drop table load1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@load1_mm +PREHOOK: Output: default@load1_mm +POSTHOOK: query: drop table load1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@load1_mm +POSTHOOK: Output: default@load1_mm +PREHOOK: query: drop table load2_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table load2_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table load2_mm (key string, value string) + partitioned by (k int, l int) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@load2_mm +POSTHOOK: query: create table load2_mm (key string, value string) + partitioned by (k int, l int) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@load2_mm +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +PREHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@intermediate2 +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load2_mm +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load2_mm +POSTHOOK: Output: default@load2_mm@k=5/l=5 +PREHOOK: query: select count(1) from load2_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@load2_mm +PREHOOK: Input: default@load2_mm@k=5/l=5 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load2_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load2_mm +POSTHOOK: Input: default@load2_mm@k=5/l=5 +#### A masked pattern was here #### +1025 +PREHOOK: query: drop table load2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@load2_mm +PREHOOK: Output: default@load2_mm +POSTHOOK: query: drop table load2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@load2_mm +POSTHOOK: Output: default@load2_mm +PREHOOK: query: drop table intermediate2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermediate2 +PREHOOK: Output: default@intermediate2 +POSTHOOK: query: drop table intermediate2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermediate2 +POSTHOOK: Output: default@intermediate2 diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index d26a9a3..9f764b8 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -509,7 +509,7 @@ Stage-3 Conditional Operator Stage-1 Map 1 vectorized - File Output Operator [FS_10] + File Output Operator [FS_8] table:{"name:":"default.orc_merge5"} Select Operator [SEL_9] (rows=306 width=335) Output:["_col0","_col1","_col2","_col3","_col4"] diff --git a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp index 5c09fdd..3e4c754 100644 --- a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp +++ b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp @@ -178,12 +178,14 @@ const char* _kFileMetadataExprTypeNames[] = { const std::map _FileMetadataExprType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(1, _kFileMetadataExprTypeValues, _kFileMetadataExprTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); int _kClientCapabilityValues[] = { - ClientCapability::TEST_CAPABILITY + ClientCapability::TEST_CAPABILITY, + ClientCapability::INSERT_ONLY_TABLES }; const char* _kClientCapabilityNames[] = { - "TEST_CAPABILITY" + "TEST_CAPABILITY", + "INSERT_ONLY_TABLES" }; -const std::map _ClientCapability_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(1, _kClientCapabilityValues, _kClientCapabilityNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); +const std::map _ClientCapability_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(2, _kClientCapabilityValues, _kClientCapabilityNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); Version::~Version() throw() { diff --git a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h index d5963f3..cfbc488 100644 --- a/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h +++ b/standalone-metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h @@ -160,7 +160,8 @@ extern const std::map _FileMetadataExprType_VALUES_TO_NAMES; struct ClientCapability { enum type { - TEST_CAPABILITY = 1 + TEST_CAPABILITY = 1, + INSERT_ONLY_TABLES = 2 }; }; diff --git a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ClientCapability.java b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ClientCapability.java index f53bd82..8fc8311 100644 --- a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ClientCapability.java +++ b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ClientCapability.java @@ -12,7 +12,8 @@ import org.apache.thrift.TEnum; public enum ClientCapability implements org.apache.thrift.TEnum { - TEST_CAPABILITY(1); + TEST_CAPABILITY(1), + INSERT_ONLY_TABLES(2); private final int value; @@ -35,6 +36,8 @@ public static ClientCapability findByValue(int value) { switch (value) { case 1: return TEST_CAPABILITY; + case 2: + return INSERT_ONLY_TABLES; default: return null; } diff --git a/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php b/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php index f0f0a57..71c2cef 100644 --- a/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php +++ b/standalone-metastore/src/gen/thrift/gen-php/metastore/Types.php @@ -169,8 +169,10 @@ final class FileMetadataExprType { final class ClientCapability { const TEST_CAPABILITY = 1; + const INSERT_ONLY_TABLES = 2; static public $__names = array( 1 => 'TEST_CAPABILITY', + 2 => 'INSERT_ONLY_TABLES', ); } diff --git a/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py index 7570895..c21dfb0 100644 --- a/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ b/standalone-metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -252,13 +252,16 @@ class FileMetadataExprType: class ClientCapability: TEST_CAPABILITY = 1 + INSERT_ONLY_TABLES = 2 _VALUES_TO_NAMES = { 1: "TEST_CAPABILITY", + 2: "INSERT_ONLY_TABLES", } _NAMES_TO_VALUES = { "TEST_CAPABILITY": 1, + "INSERT_ONLY_TABLES": 2, } diff --git a/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb b/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb index ea73b34..1dae310 100644 --- a/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ b/standalone-metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -120,8 +120,9 @@ end module ClientCapability TEST_CAPABILITY = 1 - VALUE_MAP = {1 => "TEST_CAPABILITY"} - VALID_VALUES = Set.new([TEST_CAPABILITY]).freeze + INSERT_ONLY_TABLES = 2 + VALUE_MAP = {1 => "TEST_CAPABILITY", 2 => "INSERT_ONLY_TABLES"} + VALID_VALUES = Set.new([TEST_CAPABILITY, INSERT_ONLY_TABLES]).freeze end class Version diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java index 6cc7157..3759348 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java @@ -55,8 +55,8 @@ public MTable() {} */ public MTable(String tableName, MDatabase database, MStorageDescriptor sd, String owner, int createTime, int lastAccessTime, int retention, List partitionKeys, - Map parameters, String viewOriginalText, String viewExpandedText, - boolean rewriteEnabled, String tableType) { + Map parameters, + String viewOriginalText, String viewExpandedText, boolean rewriteEnabled, String tableType) { this.tableName = tableName; this.database = database; this.sd = sd; diff --git a/standalone-metastore/src/main/resources/package.jdo b/standalone-metastore/src/main/resources/package.jdo index 570fd44..bbb8f2d 100644 --- a/standalone-metastore/src/main/resources/package.jdo +++ b/standalone-metastore/src/main/resources/package.jdo @@ -53,7 +53,7 @@ - + @@ -207,7 +207,7 @@ - + @@ -216,7 +216,7 @@ - + @@ -285,7 +285,7 @@ - + @@ -305,7 +305,7 @@ - + @@ -1018,7 +1018,7 @@ - + @@ -1082,7 +1082,6 @@ - diff --git a/standalone-metastore/src/main/thrift/hive_metastore.thrift b/standalone-metastore/src/main/thrift/hive_metastore.thrift index 7268d53..322e0ec 100644 --- a/standalone-metastore/src/main/thrift/hive_metastore.thrift +++ b/standalone-metastore/src/main/thrift/hive_metastore.thrift @@ -326,7 +326,7 @@ struct Table { 12: string tableType, // table type enum, e.g. EXTERNAL_TABLE 13: optional PrincipalPrivilegeSet privileges, 14: optional bool temporary=false, - 15: optional bool rewriteEnabled // rewrite enabled or not + 15: optional bool rewriteEnabled, // rewrite enabled or not } struct Partition { @@ -966,7 +966,8 @@ struct GetAllFunctionsResponse { } enum ClientCapability { - TEST_CAPABILITY = 1 + TEST_CAPABILITY = 1, + INSERT_ONLY_TABLES = 2 }