diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java index 6c80a14..a49b220 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -210,17 +210,15 @@ public void closeOp(boolean abort) throws HiveException { + fss.getLen()); // move any incompatible files to final path - if (!incompatFileSet.isEmpty()) { - for (Path incompatFile : incompatFileSet) { - Path destDir = finalPath.getParent(); - try { - Utilities.renameOrMoveFiles(fs, incompatFile, destDir); - LOG.info("Moved incompatible file " + incompatFile + " to " + - destDir); - } catch (HiveException e) { - LOG.error("Unable to move " + incompatFile + " to " + destDir); - throw new IOException(e); - } + for (Path incompatFile : incompatFileSet) { + Path markedPath = Utilities.toExcludedPath(incompatFile); + Path destPath = new Path(finalPath.getParent(), markedPath.getName()); + try { + Utilities.renameOrMoveFiles(fs, incompatFile, destPath); + LOG.info("Moved incompatible file " + incompatFile + " to " + destPath); + } catch (HiveException e) { + LOG.error("Unable to move " + incompatFile + " to " + destPath); + throw e; } } } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 4170659..9787286 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -1496,6 +1496,8 @@ public static String realFile(String newFile, Configuration conf) throws IOExcep return src; } + private static final String excludedPrefix = "excluded."; + private static final String tmpPrefix = "_tmp."; private static final String taskTmpPrefix = "_task_tmp."; @@ -1506,6 +1508,13 @@ public static Path toTaskTempPath(Path orig) { return new Path(orig.getParent(), taskTmpPrefix + orig.getName()); } + public static Path toExcludedPath(Path orig) { + if (orig.getName().startsWith(excludedPrefix)) { + return orig; + } + return new Path(orig.getParent(), excludedPrefix + orig.getName()); + } + public static Path toTempPath(Path orig) { if (orig.getName().indexOf(tmpPrefix) == 0) { return orig; @@ -1520,6 +1529,11 @@ public static Path toTempPath(String orig) { return toTempPath(new Path(orig)); } + public static boolean isExcludedPath(FileStatus file) { + String name = file.getPath().getName(); + return name.startsWith(excludedPrefix); + } + /** * Detect if the supplied file is a temporary path. */ @@ -1595,68 +1609,59 @@ public static void renameOrMoveFiles(FileSystem fs, Path src, Path dst) throws I } /** - * The first group will contain the task id. The second group is the optional extension. The file - * name looks like: "0_0" or "0_0.gz". There may be a leading prefix (tmp_). Since getTaskId() can - * return an integer only - this should match a pure integer as well. {1,6} is used to limit - * matching for attempts #'s 0-999999. - */ - private static final Pattern FILE_NAME_TO_TASK_ID_REGEX = - Pattern.compile("^.*?([0-9]+)(_[0-9]{1,6})?(\\..*)?$"); - - /** - * Some jobs like "INSERT INTO" jobs create copies of files like 0000001_0_copy_2. - * For such files, - * Group 1: 00000001 [taskId] - * Group 3: 0 [task attempId] - * Group 4: _copy_2 [copy suffix] - * Group 6: copy [copy keyword] - * Group 8: 2 [copy file index] - */ - private static final String COPY_KEYWORD = "_copy_"; // copy keyword - private static final Pattern COPY_FILE_NAME_TO_TASK_ID_REGEX = - Pattern.compile("^.*?"+ // any prefix - "([0-9]+)"+ // taskId - "(_)"+ // separator - "([0-9]{1,6})?"+ // attemptId (limited to 6 digits) - "((_)(\\Bcopy\\B)(_)" + - "([0-9]{1,6})$)?"+ // copy file index - "(\\..*)?$"); // any suffix/file extension - - /** - * This retruns prefix part + taskID for bucket join for partitioned table - */ - private static final Pattern FILE_NAME_PREFIXED_TASK_ID_REGEX = - Pattern.compile("^.*?((\\(.*\\))?[0-9]+)(_[0-9]{1,6})?(\\..*)?$"); - - /** - * This breaks a prefixed bucket number into the prefix and the taskID - */ - private static final Pattern PREFIXED_TASK_ID_REGEX = + * prefix? + ()? + taskId + [_]? + [_copy_]? + postfix? + * + * prefix_(part=spec)0000001_0_copy_2.postfix + * + * Group 1: prefix_(part=spec)0000001 [prefix + partSpec + taskId] + * Group 2: prefix_(part=spec) [prefix + partSpec] + * Group 3: (part=spec) [partSpec] + * Group 4: 00000001 [taskId] + * Group 5: 0 [task attemptId] + * Group 6: 2 [copy number] + * Group 7: .postfix [postfix] + */ + public static final Pattern HIVE_FILE_NAME_CONVENTION = Pattern.compile("^" + + "(" + + "(\\D*?(\\(.*\\))?)?" + // prefix + partSpec + "([0-9]+)" + // taskId + ")" + + "(?:_([0-9]{1,6}))?" + // task attemptId (limited to 6 digits) + "(?:_copy_([0-9]{1,6}))?" + // copy number (limited to 6 digits) + "(\\..*)?" + // postfix + "$"); + + /** + * This breaks a prefixed bucket number into the prefix and the bucket number + * Group 1: partition spec + * Group 2: bucket number + */ + public static final Pattern FILEID_CONVENTION = Pattern.compile("^(.*?\\(.*\\))?([0-9]+)$"); /** - * Get the task id from the filename. It is assumed that the filename is derived from the output - * of getTaskId + * Get the part-spec + task id from the filename. It is assumed that the filename is derived + * from the output of getTaskId * * @param filename * filename to extract taskid from */ - public static String getTaskIdFromFilename(String filename) { - return getIdFromFilename(filename, FILE_NAME_TO_TASK_ID_REGEX); + public static String getPrefixedTaskIdFromFilename(String filename) { + return getMatchedGroup(filename, HIVE_FILE_NAME_CONVENTION, 1); } /** - * Get the part-spec + task id from the filename. It is assumed that the filename is derived - * from the output of getTaskId + * Get the task id from the filename. It is assumed that the filename is derived from the output + * of getTaskId * * @param filename * filename to extract taskid from */ - public static String getPrefixedTaskIdFromFilename(String filename) { - return getIdFromFilename(filename, FILE_NAME_PREFIXED_TASK_ID_REGEX); + public static String getTaskIdFromFilename(String filename) { + return getMatchedGroup(filename, HIVE_FILE_NAME_CONVENTION, 4); } - private static String getIdFromFilename(String filename, Pattern pattern) { + private static String getMatchedGroup(String filename, Pattern pattern, int group) { String taskId = filename; int dirEnd = filename.lastIndexOf(Path.SEPARATOR); if (dirEnd != -1) { @@ -1668,20 +1673,12 @@ private static String getIdFromFilename(String filename, Pattern pattern) { LOG.warn("Unable to get task id from file name: " + filename + ". Using last component" + taskId + " as task id."); } else { - taskId = m.group(1); + taskId = m.group(group); } LOG.debug("TaskId for " + filename + " = " + taskId); return taskId; } - public static String getFileNameFromDirName(String dirName) { - int dirEnd = dirName.lastIndexOf(Path.SEPARATOR); - if (dirEnd != -1) { - return dirName.substring(dirEnd + 1); - } - return dirName; - } - /** * Replace the task id from the filename. It is assumed that the filename is derived from the * output of getTaskId @@ -1690,55 +1687,51 @@ public static String getFileNameFromDirName(String dirName) { * filename to replace taskid "0_0" or "0_0.gz" by 33 to "33_0" or "33_0.gz" */ public static String replaceTaskIdFromFilename(String filename, int bucketNum) { - return replaceTaskIdFromFilename(filename, String.valueOf(bucketNum)); + String taskId = getTaskIdFromFilename(filename); + String newTaskId = toZeroPaddedString(String.valueOf(bucketNum), taskId.length()); + return replaceTaskIdFromFilename(filename, taskId, newTaskId); } public static String replaceTaskIdFromFilename(String filename, String fileId) { String taskId = getTaskIdFromFilename(filename); - String newTaskId = replaceTaskId(taskId, fileId); - String ret = replaceTaskIdFromFilename(filename, taskId, newTaskId); - return (ret); - } - - private static String replaceTaskId(String taskId, int bucketNum) { - return replaceTaskId(taskId, String.valueOf(bucketNum)); + String newTaskId = extractPaddedTaskId(fileId, taskId.length()); + return replaceTaskIdFromFilename(filename, taskId, newTaskId); } /** * Returns strBucketNum with enough 0's prefixing the task ID portion of the String to make it * equal in length to taskId * - * @param taskId - the taskId used as a template for length - * @param strBucketNum - the bucket number of the output, may or may not be prefixed + * @param fileID - the bucket number of the output, may or may not be prefixed + * @param length - target length * @return */ - private static String replaceTaskId(String taskId, String strBucketNum) { - Matcher m = PREFIXED_TASK_ID_REGEX.matcher(strBucketNum); + private static String extractPaddedTaskId(String fileID, int length) { + Matcher m = FILEID_CONVENTION.matcher(fileID); if (!m.matches()) { - LOG.warn("Unable to determine bucket number from file ID: " + strBucketNum + ". Using " + + LOG.warn("Unable to determine bucket number from file ID: " + fileID + ". Using " + "file ID as bucket number."); - return adjustBucketNumLen(strBucketNum, taskId); - } else { - String adjustedBucketNum = adjustBucketNumLen(m.group(2), taskId); - return (m.group(1) == null ? "" : m.group(1)) + adjustedBucketNum; + return toZeroPaddedString(fileID, length); } + String partSpec = m.group(1); + String bucketNum = m.group(2); + String adjustedBucketNum = toZeroPaddedString(bucketNum, length); + return partSpec == null ? adjustedBucketNum : partSpec + adjustedBucketNum; } /** - * Adds 0's to the beginning of bucketNum until bucketNum and taskId are the same length. + * Append 0's to the input string to be the length. * - * @param bucketNum - the bucket number, should not be prefixed - * @param taskId - the taskId used as a template for length + * @param input - input string + * @param length - target length * @return */ - private static String adjustBucketNumLen(String bucketNum, String taskId) { - int bucketNumLen = bucketNum.length(); - int taskIdLen = taskId.length(); - StringBuffer s = new StringBuffer(); - for (int i = 0; i < taskIdLen - bucketNumLen; i++) { - s.append("0"); + private static String toZeroPaddedString(String input, int length) { + StringBuilder s = new StringBuilder(length); + for (int i = 0; i < length - input.length(); i++) { + s.append('0'); } - return s.toString() + bucketNum; + return s.toString() + input; } /** @@ -1872,7 +1865,7 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I * * @return a list of path names corresponding to should-be-created empty buckets. */ - public static ArrayList removeTempOrDuplicateFiles(FileSystem fs, Path path, + private static ArrayList removeTempOrDuplicateFiles(FileSystem fs, Path path, DynamicPartitionCtx dpCtx) throws IOException { if (path == null) { return null; @@ -1881,7 +1874,6 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I ArrayList result = new ArrayList(); if (dpCtx != null) { FileStatus parts[] = HiveStatsUtils.getFileStatusRecurse(path, dpCtx.getNumDPCols(), fs); - HashMap taskIDToFile = null; for (int i = 0; i < parts.length; ++i) { assert parts[i].isDir() : "dynamic partition " + parts[i].getPath() @@ -1897,17 +1889,15 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I } } - taskIDToFile = removeTempOrDuplicateFiles(items, fs); + Map pTaskIDToFile = removeTempOrDuplicateFiles(items, fs); // if the table is bucketed and enforce bucketing, we should check and generate all buckets - if (dpCtx.getNumBuckets() > 0 && taskIDToFile != null) { - // refresh the file list - items = fs.listStatus(parts[i].getPath()); + if (dpCtx.getNumBuckets() > 0 && pTaskIDToFile != null) { // get the missing buckets and generate empty buckets - String taskID1 = taskIDToFile.keySet().iterator().next(); - Path bucketPath = taskIDToFile.values().iterator().next().getPath(); + String pTaskID1 = pTaskIDToFile.keySet().iterator().next(); + Path bucketPath = pTaskIDToFile.values().iterator().next().getPath(); for (int j = 0; j < dpCtx.getNumBuckets(); ++j) { - String taskID2 = replaceTaskId(taskID1, j); - if (!taskIDToFile.containsKey(taskID2)) { + String taskID2 = toZeroPaddedString(String.valueOf(j), pTaskID1.length()); + if (!pTaskIDToFile.containsKey(taskID2)) { // create empty bucket, file name should be derived from taskID2 String path2 = replaceTaskIdFromFilename(bucketPath.toUri().getPath().toString(), j); result.add(path2); @@ -1922,7 +1912,8 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I return result; } - public static HashMap removeTempOrDuplicateFiles(FileStatus[] items, + // returns prefixed-taskID to file mapping + private static HashMap removeTempOrDuplicateFiles(FileStatus[] items, FileSystem fs) throws IOException { if (items == null || fs == null) { @@ -1932,6 +1923,9 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I HashMap taskIdToFile = new HashMap(); for (FileStatus one : items) { + if (isExcludedPath(one)) { + continue; + } if (isTempPath(one)) { if (!fs.delete(one.getPath(), true)) { throw new IOException("Unable to delete tmp file: " + one.getPath()); @@ -1946,43 +1940,26 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I // any attempt files could contain partial results (due to task failures or // speculative runs), but the largest should be the correct one since the result // of a successful run should never be smaller than a failed/speculative run. - FileStatus toDelete = null; - - // "LOAD .. INTO" and "INSERT INTO" commands will generate files with - // "_copy_x" suffix. These files are usually read by map tasks and the - // task output gets written to some tmp path. The output file names will - // be of format taskId_attemptId. The usual path for all these tasks is - // srcPath -> taskTmpPath -> tmpPath -> finalPath. - // But, MergeFileTask can move files directly from src path to final path - // without copying it to tmp path. In such cases, different files with - // "_copy_x" suffix will be identified as duplicates (change in value - // of x is wrongly identified as attempt id) and will be deleted. - // To avoid that we will ignore files with "_copy_x" suffix from duplicate - // elimination. - if (!isCopyFile(one.getPath().getName())) { - if (otherFile.getLen() >= one.getLen()) { - toDelete = one; - } else { - toDelete = otherFile; - taskIdToFile.put(taskId, one); - } - long len1 = toDelete.getLen(); - long len2 = taskIdToFile.get(taskId).getLen(); - if (!fs.delete(toDelete.getPath(), true)) { - throw new IOException( - "Unable to delete duplicate file: " + toDelete.getPath() - + ". Existing file: " + - taskIdToFile.get(taskId).getPath()); - } else { - LOG.warn("Duplicate taskid file removed: " + toDelete.getPath() + - " with length " - + len1 + ". Existing file: " + - taskIdToFile.get(taskId).getPath() + " with length " - + len2); - } + FileStatus toDelete; + if (otherFile.getLen() >= one.getLen()) { + toDelete = one; + } else { + toDelete = otherFile; + taskIdToFile.put(taskId, one); + } + long len1 = toDelete.getLen(); + long len2 = taskIdToFile.get(taskId).getLen(); + if (!fs.delete(toDelete.getPath(), true)) { + throw new IOException( + "Unable to delete duplicate file: " + toDelete.getPath() + + ". Existing file: " + + taskIdToFile.get(taskId).getPath()); } else { - LOG.info(one.getPath() + " file identified as duplicate. This file is" + - " not deleted as it has copySuffix."); + LOG.warn("Duplicate taskid file removed: " + toDelete.getPath() + + " with length " + + len1 + ". Existing file: " + + taskIdToFile.get(taskId).getPath() + " with length " + + len2); } } } @@ -1990,38 +1967,6 @@ public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws I return taskIdToFile; } - public static boolean isCopyFile(String filename) { - String taskId = filename; - String copyFileSuffix = null; - int dirEnd = filename.lastIndexOf(Path.SEPARATOR); - if (dirEnd != -1) { - taskId = filename.substring(dirEnd + 1); - } - Matcher m = COPY_FILE_NAME_TO_TASK_ID_REGEX.matcher(taskId); - if (!m.matches()) { - LOG.warn("Unable to verify if file name " + filename + " has _copy_ suffix."); - } else { - taskId = m.group(1); - copyFileSuffix = m.group(4); - } - - LOG.debug("Filename: " + filename + " TaskId: " + taskId + " CopySuffix: " + copyFileSuffix); - if (taskId != null && copyFileSuffix != null) { - return true; - } - - return false; - } - - public static String getBucketFileNameFromPathSubString(String bucketName) { - try { - return bucketName.split(COPY_KEYWORD)[0]; - } catch (Exception e) { - e.printStackTrace(); - return bucketName; - } - } - public static String getNameMessage(Exception e) { return e.getClass().getName() + "(" + e.getMessage() + ")"; } @@ -2093,7 +2038,7 @@ private static URL urlFromPathString(String onestr) { } /** - * get the jar files from specified directory or get jar files by several jar names sperated by comma + * get the jar files from specified directory or get jar files by several jar names separated by comma * @param path * @return */ @@ -2357,7 +2302,7 @@ public static void copyTableJobPropertiesToConf(TableDesc tbl, JobConf job) { } /** - * Copies the storage handler proeprites configured for a table descriptor to a runtime job + * Copies the storage handler properties configured for a table descriptor to a runtime job * configuration. This differs from {@link #copyTablePropertiesToConf(org.apache.hadoop.hive.ql.plan.TableDesc, org.apache.hadoop.mapred.JobConf)} * in that it does not allow parameters already set in the job to override the values from the * table. This is important for setting the config up for reading, @@ -3117,7 +3062,7 @@ public static int estimateNumberOfReducers(HiveConf conf, ContentSummary inputSu + maxReducers + " estimated totalInputFileSize=" + totalInputFileSize); } else { LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers=" - + maxReducers + " totalInputFileSize=" + totalInputFileSize); + + maxReducers + " totalInputFileSize=" + totalInputFileSize); } // If this map reduce job writes final data to a table and bucketing is being inferred, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java index 297ce44..6cc95ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java @@ -236,13 +236,12 @@ public void onRootVertexInitialized(String inputName, InputDescriptor inputDescr } catch (IOException e) { throw new RuntimeException("Failed to get file split for event: " + diEvent); } - Set fsList = - pathFileSplitsMap.get(Utilities.getBucketFileNameFromPathSubString(fileSplit.getPath() - .getName())); + String fileName = fileSplit.getPath().getName(); + String bucketFileName = Utilities.getPrefixedTaskIdFromFilename(fileName); + Set fsList = pathFileSplitsMap.get(bucketFileName); if (fsList == null) { fsList = new TreeSet(new PathComparatorForSplit()); - pathFileSplitsMap.put( - Utilities.getBucketFileNameFromPathSubString(fileSplit.getPath().getName()), fsList); + pathFileSplitsMap.put(bucketFileName, fsList); } fsList.add(fileSplit); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java index 69f8889..6c42014 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java @@ -27,6 +27,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.regex.Matcher; import com.google.common.collect.Sets; import com.google.common.io.Files; @@ -140,4 +141,39 @@ public void testGetJarFilesByPath() { FileUtils.deleteQuietly(f); } } + + public void testParseFilePath() { + validate("_tmp(key=value)000000_1_copy_2.tgz", + "_tmp(key=value)000000", "_tmp(key=value)", "(key=value)", "000000", "1", "2", ".tgz"); + + validate("_tmp_000000_1_copy_2.tgz", + "_tmp_000000", "_tmp_", null, "000000", "1", "2", ".tgz"); + validate("(key=value)000000_1_copy_2.tgz", + "(key=value)000000", "(key=value)", "(key=value)", "000000", "1", "2", ".tgz"); + validate("_tmp(key=value)000000_copy_2.tgz", + "_tmp(key=value)000000", "_tmp(key=value)", "(key=value)", "000000", null, "2", ".tgz"); + validate("_tmp(key=value)000000_1.tgz", + "_tmp(key=value)000000", "_tmp(key=value)", "(key=value)", "000000", "1", null, ".tgz"); + validate("_tmp(key=value)000000_1_copy_2", + "_tmp(key=value)000000", "_tmp(key=value)", "(key=value)", "000000", "1", "2", null); + + validate("000000", "000000", "", null, "000000", null, null, null); + validate("_tmp_000000", "_tmp_000000", "_tmp_", null, "000000", null, null, null); + validate("(key=value)000000", "(key=value)000000", "(key=value)", "(key=value)", "000000", null, null, null); + validate("000000_1", "000000", "", null, "000000", "1", null, null); + validate("000000_copy_2", "000000", "", null, "000000", null, "2", null); + validate("000000.tgz", "000000", "", null, "000000", null, null, ".tgz"); + + // test more combinations + } + + private void validate(String input, String... expected) { + Matcher matcher = Utilities.HIVE_FILE_NAME_CONVENTION.matcher(input); + if (!matcher.matches()) { + throw new RuntimeException("Failed to parse " + input); + } + for (int i = 1; i <= matcher.groupCount(); i++) { + Assert.assertEquals(expected[i - 1], matcher.group(i)); + } + } } diff --git ql/src/test/queries/clientpositive/orc_merge_incompat1.q ql/src/test/queries/clientpositive/orc_merge_incompat1.q index 0348948..2f16b3a 100644 --- ql/src/test/queries/clientpositive/orc_merge_incompat1.q +++ ql/src/test/queries/clientpositive/orc_merge_incompat1.q @@ -29,7 +29,7 @@ select * from orc_merge5b; set hive.merge.orcfile.stripe.level=true; alter table orc_merge5b concatenate; --- 3 file after merging - all 0.12 format files will be merged and 0.11 files will be left behind +-- 4 file after merging - all 0.12 format files will be merged and 0.11 files will be left behind analyze table orc_merge5b compute statistics noscan; dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_merge5b/; select * from orc_merge5b; diff --git ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out index ea2dd5d..392dd6c 100644 --- ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out +++ ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out @@ -204,12 +204,12 @@ POSTHOOK: query: alter table orc_merge5b concatenate POSTHOOK: type: ALTER_TABLE_MERGE POSTHOOK: Input: default@orc_merge5b POSTHOOK: Output: default@orc_merge5b -PREHOOK: query: -- 3 file after merging - all 0.12 format files will be merged and 0.11 files will be left behind +PREHOOK: query: -- 4 file after merging - all 0.12 format files will be merged and 0.11 files will be left behind analyze table orc_merge5b compute statistics noscan PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5b PREHOOK: Output: default@orc_merge5b -POSTHOOK: query: -- 3 file after merging - all 0.12 format files will be merged and 0.11 files will be left behind +POSTHOOK: query: -- 4 file after merging - all 0.12 format files will be merged and 0.11 files will be left behind analyze table orc_merge5b compute statistics noscan POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_merge5b