diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 54e2b18..43f7864 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -801,18 +801,11 @@ "map-reduce job to merge the output files into bigger files. This is only done for map-only jobs \n" + "if hive.merge.mapfiles is true, and for map-reduce jobs if hive.merge.mapredfiles is true."), HIVEMERGERCFILEBLOCKLEVEL("hive.merge.rcfile.block.level", true, ""), - HIVEMERGEINPUTFORMATBLOCKLEVEL("hive.merge.input.format.block.level", - "org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat", ""), HIVEMERGEORCFILESTRIPELEVEL("hive.merge.orcfile.stripe.level", true, "When hive.merge.mapfiles or hive.merge.mapredfiles is enabled while writing a\n" + " table with ORC file format, enabling this config will do stripe level fast merge\n" + " for small ORC files. Note that enabling this config will not honor padding tolerance\n" + " config (hive.exec.orc.block.padding.tolerance)."), - HIVEMERGEINPUTFORMATSTRIPELEVEL("hive.merge.input.format.stripe.level", - "org.apache.hadoop.hive.ql.io.orc.OrcFileStripeMergeInputFormat", - "Input file format to use for ORC stripe level merging (for internal use only)"), - HIVEMERGECURRENTJOBHASDYNAMICPARTITIONS( - "hive.merge.current.job.has.dynamic.partitions", false, ""), HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true, "If this is set the header for RCFiles will simply be RCF. If this is not\n" + @@ -1670,17 +1663,6 @@ " it will now take 512 reducers, similarly if the max number of reducers is 511,\n" + " and a job was going to use this many, it will now use 256 reducers."), - /* The following section contains all configurations used for list bucketing feature.*/ - /* This is not for clients. but only for block merge task. */ - /* This is used by BlockMergeTask to send out flag to RCFileMergeMapper */ - /* about alter table...concatenate and list bucketing case. */ - HIVEMERGECURRENTJOBCONCATENATELISTBUCKETING( - "hive.merge.current.job.concatenate.list.bucketing", true, ""), - /* This is not for clients. but only for block merge task. */ - /* This is used by BlockMergeTask to send out flag to RCFileMergeMapper */ - /* about depth of list bucketing. */ - HIVEMERGECURRENTJOBCONCATENATELISTBUCKETINGDEPTH( - "hive.merge.current.job.concatenate.list.bucketing.depth", 0, ""), HIVEOPTLISTBUCKETING("hive.optimize.listbucketing", false, "Enable list bucketing optimizer. Default value is false so that we disable it by default."), diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 99049ca..b1ed918 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -95,6 +95,11 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ orc_merge2.q,\ orc_merge3.q,\ orc_merge4.q,\ + orc_merge5.q,\ + orc_merge6.q,\ + orc_merge7.q,\ + orc_merge_incompat1.q,\ + orc_merge_incompat2.q,\ ptf.q,\ sample1.q,\ script_env_var1.q,\ diff --git ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java index 6f23575..4e735f7 100644 --- ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java +++ ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java @@ -7,10 +7,6 @@ package org.apache.hadoop.hive.ql.plan.api; -import java.util.Map; -import java.util.HashMap; -import org.apache.thrift.TEnum; - public enum OperatorType implements org.apache.thrift.TEnum { JOIN(0), MAPJOIN(1), @@ -33,7 +29,9 @@ PTF(18), MUX(19), DEMUX(20), - EVENT(21); + EVENT(21), + ORCFILEMERGE(22), + RCFILEMERGE(23); private final int value; @@ -98,6 +96,10 @@ public static OperatorType findByValue(int value) { return DEMUX; case 21: return EVENT; + case 22: + return ORCFILEMERGE; + case 23: + return RCFILEMERGE; default: return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java new file mode 100644 index 0000000..6795109 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java @@ -0,0 +1,297 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; +import org.apache.hadoop.hive.ql.plan.FileMergeDesc; +import org.apache.hadoop.mapred.JobConf; + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashSet; +import java.util.Set; + +/** + * Fast file merge operator for ORC and RCfile. This is an abstract class which + * does not process any rows. Refer {@link org.apache.hadoop.hive.ql.exec.OrcFileMergeOperator} + * or {@link org.apache.hadoop.hive.ql.exec.RCFileMergeOperator} for more details. + */ +public abstract class AbstractFileMergeOperator + extends Operator implements Serializable { + + public static final String BACKUP_PREFIX = "_backup."; + public static final Log LOG = LogFactory + .getLog(AbstractFileMergeOperator.class); + + protected JobConf jc; + protected FileSystem fs; + protected boolean autoDelete; + protected boolean exception; + protected Path outPath; + protected Path finalPath; + protected Path dpPath; + protected Path tmpPath; + protected Path taskTmpPath; + protected int listBucketingDepth; + protected boolean hasDynamicPartitions; + protected boolean isListBucketingAlterTableConcatenate; + protected boolean tmpPathFixedConcatenate; + protected boolean tmpPathFixed; + protected Set incompatFileSet; + protected transient DynamicPartitionCtx dpCtx; + + @Override + public void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + this.jc = new JobConf(hconf); + incompatFileSet = new HashSet(); + autoDelete = false; + exception = false; + tmpPathFixed = false; + tmpPathFixedConcatenate = false; + outPath = null; + finalPath = null; + dpPath = null; + tmpPath = null; + taskTmpPath = null; + dpCtx = conf.getDpCtx(); + hasDynamicPartitions = conf.hasDynamicPartitions(); + isListBucketingAlterTableConcatenate = conf + .isListBucketingAlterTableConcatenate(); + listBucketingDepth = conf.getListBucketingDepth(); + Path specPath = conf.getOutputPath(); + updatePaths(Utilities.toTempPath(specPath), + Utilities.toTaskTempPath(specPath)); + try { + fs = specPath.getFileSystem(hconf); + autoDelete = fs.deleteOnExit(outPath); + } catch (IOException e) { + this.exception = true; + throw new HiveException("Failed to initialize AbstractFileMergeOperator", + e); + } + } + + // sets up temp and task temp path + private void updatePaths(Path tp, Path ttp) { + String taskId = Utilities.getTaskId(jc); + tmpPath = tp; + taskTmpPath = ttp; + finalPath = new Path(tp, taskId); + outPath = new Path(ttp, Utilities.toTempPath(taskId)); + } + + /** + * Fixes tmpPath to point to the correct partition. Before this is called, tmpPath will default to + * the root tmp table dir fixTmpPath(..) works for DP + LB + multiple skewed values + merge. + * reason: 1. fixTmpPath(..) compares inputPath and tmpDepth, find out path difference and put it + * into newPath. Then add newpath to existing this.tmpPath and this.taskTmpPath. 2. The path + * difference between inputPath and tmpDepth can be DP or DP+LB. It will automatically handle it. + * 3. For example, if inputpath is /-ext-10002/hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/ + * HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME tmppath is /_tmp.-ext-10000 newpath will be + * hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME Then, + * this.tmpPath and this.taskTmpPath will be update correctly. We have list_bucket_dml_6.q cover + * this case: DP + LP + multiple skewed values + merge. + * + * @param inputPath - input path + * @throws java.io.IOException + */ + protected void fixTmpPath(Path inputPath) throws IOException { + dpPath = inputPath; + Path newPath = new Path("."); + int inputDepth = inputPath.depth(); + int tmpDepth = tmpPath.depth(); + + // Build the path from bottom up + while (inputPath != null && inputDepth > tmpDepth) { + newPath = new Path(inputPath.getName(), newPath); + inputDepth--; + inputPath = inputPath.getParent(); + } + + Path newTmpPath = new Path(tmpPath, newPath); + Path newTaskTmpPath = new Path(taskTmpPath, newPath); + if (!fs.exists(newTmpPath)) { + fs.mkdirs(newTmpPath); + } + updatePaths(newTmpPath, newTaskTmpPath); + } + + /** + * Fixes tmpPath to point to the correct list bucketing sub-directories. Before this is called, + * tmpPath will default to the root tmp table dir Reason to add a new method instead of changing + * fixTmpPath() Reason 1: logic has slightly difference fixTmpPath(..) needs 2 variables in order + * to decide path delta which is in variable newPath. 1. inputPath.depth() 2. tmpPath.depth() + * fixTmpPathConcatenate needs 2 variables too but one of them is different from fixTmpPath(..) 1. + * inputPath.depth() 2. listBucketingDepth Reason 2: less risks The existing logic is a little not + * trivial around map() and fixTmpPath(). In order to ensure minimum impact on existing flow, we + * try to avoid change on existing code/flow but add new code for new feature. + * + * @param inputPath - input path + * @throws IOException + */ + protected void fixTmpPathConcatenate(Path inputPath) throws IOException { + dpPath = inputPath; + Path newPath = new Path("."); + + int depth = listBucketingDepth; + // Build the path from bottom up. pick up list bucketing subdirectories + while ((inputPath != null) && (depth > 0)) { + newPath = new Path(inputPath.getName(), newPath); + inputPath = inputPath.getParent(); + depth--; + } + + Path newTmpPath = new Path(tmpPath, newPath); + Path newTaskTmpPath = new Path(taskTmpPath, newPath); + if (!fs.exists(newTmpPath)) { + fs.mkdirs(newTmpPath); + } + updatePaths(newTmpPath, newTaskTmpPath); + } + + /** + * Validates that each input path belongs to the same partition since each mapper merges the input + * to a single output directory + * + * @param inputPath - input path + */ + protected void checkPartitionsMatch(Path inputPath) throws IOException { + if (!dpPath.equals(inputPath)) { + // Temp partition input path does not match exist temp path + String msg = "Multiple partitions for one merge mapper: " + dpPath + + " NOT EQUAL TO " + + inputPath; + LOG.error(msg); + throw new IOException(msg); + } + } + + protected void fixTmpPathAlterTable(Path path) throws IOException { + + /** + * 1. boolean isListBucketingAlterTableConcatenate will be true only if it is alter table ... + * concatenate on stored-as-dir so it will handle list bucketing alter table merge in the if + * cause with the help of fixTmpPathConcatenate 2. If it is DML, + * isListBucketingAlterTableConcatenate will be false so that it will be handled by else cause. + * In this else cause, we have another if check. 2.1 the if check will make sure DP or LB, we + * will fix path with the help of fixTmpPath(..). Since both has sub-directories. it includes SP + * + LB. 2.2 only SP without LB, we dont fix path. + */ + + // Fix temp path for alter table ... concatenate + if (isListBucketingAlterTableConcatenate) { + if (this.tmpPathFixedConcatenate) { + checkPartitionsMatch(path); + } else { + fixTmpPathConcatenate(path); + tmpPathFixedConcatenate = true; + } + } else { + if (hasDynamicPartitions || (listBucketingDepth > 0)) { + if (tmpPathFixed) { + checkPartitionsMatch(path); + } else { + // We haven't fixed the TMP path for this mapper yet + fixTmpPath(path); + tmpPathFixed = true; + } + } + } + } + + @Override + public void closeOp(boolean abort) throws HiveException { + try { + if (!exception) { + FileStatus fss = fs.getFileStatus(outPath); + if (!fs.rename(outPath, finalPath)) { + throw new IOException( + "Unable to rename " + outPath + " to " + finalPath); + } + LOG.info("renamed path " + outPath + " to " + finalPath + " . File" + + " size is " + + fss.getLen()); + + // move any incompatible files to final path + if (!incompatFileSet.isEmpty()) { + for (Path incompatFile : incompatFileSet) { + String fileName = incompatFile.getName(); + Path destFile = new Path(finalPath.getParent(), fileName); + try { + Utilities.renameOrMoveFiles(fs, incompatFile, destFile); + LOG.info("Moved incompatible file " + incompatFile + " to " + + destFile); + } catch (HiveException e) { + LOG.error("Unable to move " + incompatFile + " to " + destFile); + throw new IOException(e); + } + } + } + } else { + if (!autoDelete) { + fs.delete(outPath, true); + } + } + } catch (IOException e) { + throw new HiveException("Failed to close AbstractFileMergeOperator", e); + } + } + + @Override + public void jobCloseOp(Configuration hconf, boolean success) + throws HiveException { + try { + Path outputDir = conf.getOutputPath(); + FileSystem fs = outputDir.getFileSystem(hconf); + Path backupPath = backupOutputPath(fs, outputDir); + Utilities + .mvFileToFinalPath(outputDir, hconf, success, LOG, conf.getDpCtx(), + null, reporter); + if (success) { + LOG.info("jobCloseOp moved merged files to output dir: " + outputDir); + } + if (backupPath != null) { + fs.delete(backupPath, true); + } + } catch (IOException e) { + throw new HiveException("Failed jobCloseOp for AbstractFileMergeOperator", + e); + } + super.jobCloseOp(hconf, success); + } + + private Path backupOutputPath(FileSystem fs, Path outpath) + throws IOException, HiveException { + if (fs.exists(outpath)) { + Path backupPath = new Path(outpath.getParent(), + BACKUP_PREFIX + outpath.getName()); + Utilities.rename(fs, outpath, backupPath); + return backupPath; + } else { + return null; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index e076683..9fb239c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -18,32 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import static org.apache.commons.lang.StringUtils.join; -import static org.apache.hadoop.util.StringUtils.stringifyException; - -import java.io.BufferedWriter; -import java.io.DataOutputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Serializable; -import java.io.Writer; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; - import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; @@ -88,8 +62,9 @@ import org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.io.merge.MergeTask; -import org.apache.hadoop.hive.ql.io.merge.MergeWork; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.merge.MergeFileTask; +import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask; import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork; @@ -133,14 +108,19 @@ import org.apache.hadoop.hive.ql.plan.DropDatabaseDesc; import org.apache.hadoop.hive.ql.plan.DropIndexDesc; import org.apache.hadoop.hive.ql.plan.DropTableDesc; +import org.apache.hadoop.hive.ql.plan.FileMergeDesc; import org.apache.hadoop.hive.ql.plan.GrantDesc; import org.apache.hadoop.hive.ql.plan.GrantRevokeRoleDDL; +import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; import org.apache.hadoop.hive.ql.plan.LockDatabaseDesc; import org.apache.hadoop.hive.ql.plan.LockTableDesc; import org.apache.hadoop.hive.ql.plan.MsckDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc; import org.apache.hadoop.hive.ql.plan.PrincipalDesc; import org.apache.hadoop.hive.ql.plan.PrivilegeDesc; import org.apache.hadoop.hive.ql.plan.PrivilegeObjectDesc; +import org.apache.hadoop.hive.ql.plan.RCFileMergeDesc; import org.apache.hadoop.hive.ql.plan.RenamePartitionDesc; import org.apache.hadoop.hive.ql.plan.RevokeDesc; import org.apache.hadoop.hive.ql.plan.RoleDDLDesc; @@ -194,6 +174,33 @@ import org.apache.hive.common.util.AnnotationUtils; import org.stringtemplate.v4.ST; +import java.io.BufferedWriter; +import java.io.DataOutputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.io.Writer; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +import static org.apache.commons.lang.StringUtils.join; +import static org.apache.hadoop.util.StringUtils.stringifyException; + /** * DDLTask implementation. * @@ -546,15 +553,40 @@ private DataOutputStream getOutputStream(Path outputFile) throws Exception { */ private int mergeFiles(Hive db, AlterTablePartMergeFilesDesc mergeFilesDesc) throws HiveException { + ListBucketingCtx lbCtx = mergeFilesDesc.getLbCtx(); + boolean lbatc = lbCtx == null ? false : lbCtx.isSkewedStoredAsDir(); + int lbd = lbCtx == null ? 0 : lbCtx.calculateListBucketingLevel(); + // merge work only needs input and output. - MergeWork mergeWork = new MergeWork(mergeFilesDesc.getInputDir(), - mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass()); + MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(), + mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName()); mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx()); mergeWork.resolveConcatenateMerge(db.getConf()); mergeWork.setMapperCannotSpanPartns(true); - mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass()); + mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass().getName()); + Operator mergeOp = null; + final FileMergeDesc fmd; + if (mergeFilesDesc.getInputFormatClass().equals(RCFileInputFormat.class)) { + fmd = new RCFileMergeDesc(); + } else { + // safe to assume else is ORC as semantic analyzer will check for RC/ORC + fmd = new OrcFileMergeDesc(); + } + + fmd.setDpCtx(null); + fmd.setHasDynamicPartitions(false); + fmd.setListBucketingAlterTableConcatenate(lbatc); + fmd.setListBucketingDepth(lbd); + fmd.setOutputPath(mergeFilesDesc.getOutputDir()); + + mergeOp = OperatorFactory.get(fmd); + + LinkedHashMap> aliasToWork = + new LinkedHashMap>(); + aliasToWork.put(mergeFilesDesc.getInputDir().toString(), mergeOp); + mergeWork.setAliasToWork(aliasToWork); DriverContext driverCxt = new DriverContext(); - MergeTask taskExec = new MergeTask(); + MergeFileTask taskExec = new MergeFileTask(); taskExec.initialize(db.getConf(), null, driverCxt); taskExec.setWork(mergeWork); taskExec.setQueryPlan(this.getQueryPlan()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 7477199..e50129a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -36,7 +36,7 @@ import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; -import org.apache.hadoop.hive.ql.io.merge.MergeTask; +import org.apache.hadoop.hive.ql.io.merge.MergeFileTask; import org.apache.hadoop.hive.ql.lockmgr.HiveLock; import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj; @@ -47,7 +47,13 @@ import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; -import org.apache.hadoop.hive.ql.plan.*; +import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; +import org.apache.hadoop.hive.ql.plan.LoadFileDesc; +import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc; +import org.apache.hadoop.hive.ql.plan.LoadTableDesc; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.util.StringUtils; @@ -55,7 +61,12 @@ import java.io.IOException; import java.io.Serializable; import java.security.AccessControlException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; /** * MoveTask implementation. @@ -294,7 +305,7 @@ public int execute(DriverContext driverContext) { while (task.getParentTasks() != null && task.getParentTasks().size() == 1) { task = (Task)task.getParentTasks().get(0); // If it was a merge task or a local map reduce task, nothing can be inferred - if (task instanceof MergeTask || task instanceof MapredLocalTask) { + if (task instanceof MergeFileTask || task instanceof MapredLocalTask) { break; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index 8946221..ed8692d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -18,10 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator; @@ -53,7 +49,9 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MuxDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc; +import org.apache.hadoop.hive.ql.plan.RCFileMergeDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; import org.apache.hadoop.hive.ql.plan.ScriptDesc; @@ -62,6 +60,10 @@ import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + /** * OperatorFactory. * @@ -108,6 +110,10 @@ AppMasterEventOperator.class)); opvec.add(new OpTuple(DynamicPruningEventDesc.class, AppMasterEventOperator.class)); + opvec.add(new OpTuple(RCFileMergeDesc.class, + RCFileMergeOperator.class)); + opvec.add(new OpTuple(OrcFileMergeDesc.class, + OrcFileMergeOperator.class)); } static { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java new file mode 100644 index 0000000..f302ce2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.orc.CompressionKind; +import org.apache.hadoop.hive.ql.io.orc.OrcFile; +import org.apache.hadoop.hive.ql.io.orc.OrcFileKeyWrapper; +import org.apache.hadoop.hive.ql.io.orc.OrcFileValueWrapper; +import org.apache.hadoop.hive.ql.io.orc.Reader; +import org.apache.hadoop.hive.ql.io.orc.Writer; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.shims.CombineHiveKey; + +import java.io.IOException; +import java.util.List; + +/** + * Fast file merge operator for ORC files. + */ +public class OrcFileMergeOperator extends + AbstractFileMergeOperator { + public final static Log LOG = LogFactory.getLog("OrcFileMergeOperator"); + + // These parameters must match for all orc files involved in merging. If it + // does not merge the file will be put into incompatible file set and will not + // be merged. + CompressionKind compression = null; + long compressBuffSize = 0; + List version; + int columnCount = 0; + int rowIndexStride = 0; + + Writer outWriter; + Path prevPath; + private Reader reader; + private FSDataInputStream fdis; + + @Override + public void processOp(Object row, int tag) throws HiveException { + Object[] keyValue = (Object[]) row; + processKeyValuePairs(keyValue[0], keyValue[1]); + } + + private void processKeyValuePairs(Object key, Object value) + throws HiveException { + try { + OrcFileValueWrapper v; + OrcFileKeyWrapper k; + if (key instanceof CombineHiveKey) { + k = (OrcFileKeyWrapper) ((CombineHiveKey) key).getKey(); + } else { + k = (OrcFileKeyWrapper) key; + } + + fixTmpPathAlterTable(k.getInputPath().getParent()); + + v = (OrcFileValueWrapper) value; + + if (prevPath == null) { + prevPath = k.getInputPath(); + reader = OrcFile.createReader(fs, k.getInputPath()); + LOG.info("ORC merge file input path: " + k.getInputPath()); + } + + // store the orc configuration from the first file. All other files should + // match this configuration before merging else will not be merged + if (outWriter == null) { + compression = k.getCompression(); + compressBuffSize = k.getCompressBufferSize(); + version = k.getVersionList(); + columnCount = k.getTypes().get(0).getSubtypesCount(); + rowIndexStride = k.getRowIndexStride(); + + // block size and stripe size will be from config + outWriter = OrcFile.createWriter(outPath, + OrcFile.writerOptions(jc).compress(compression) + .inspector(reader.getObjectInspector())); + LOG.info("ORC merge file output path: " + outPath); + } + + if (!checkCompatibility(k)) { + incompatFileSet.add(k.getInputPath()); + return; + } + + // next file in the path + if (!k.getInputPath().equals(prevPath)) { + reader = OrcFile.createReader(fs, k.getInputPath()); + } + + // initialize buffer to read the entire stripe + byte[] buffer = new byte[(int) v.getStripeInformation().getLength()]; + fdis = fs.open(k.getInputPath()); + fdis.readFully(v.getStripeInformation().getOffset(), buffer, 0, + (int) v.getStripeInformation().getLength()); + + // append the stripe buffer to the new ORC file + outWriter.appendStripe(buffer, 0, buffer.length, v.getStripeInformation(), + v.getStripeStatistics()); + + LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : " + + v.getStripeInformation().getOffset() + " length: " + + v.getStripeInformation().getLength() + " ]"); + + // add user metadata to footer in case of any + if (v.isLastStripeInFile()) { + outWriter.appendUserMetadata(v.getUserMetadata()); + } + } catch (Throwable e) { + this.exception = true; + closeOp(true); + throw new HiveException(e); + } + } + + private boolean checkCompatibility(OrcFileKeyWrapper k) { + // check compatibility with subsequent files + if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) { + LOG.info("Incompatible ORC file merge! Column counts does not match for " + + k.getInputPath()); + return false; + } + + if (!k.getCompression().equals(compression)) { + LOG.info("Incompatible ORC file merge! Compression codec does not match" + + " for " + k.getInputPath()); + return false; + } + + if (k.getCompressBufferSize() != compressBuffSize) { + LOG.info("Incompatible ORC file merge! Compression buffer size does not" + + " match for " + k.getInputPath()); + return false; + + } + + if (!k.getVersionList().equals(version)) { + LOG.info("Incompatible ORC file merge! Version does not match for " + + k.getInputPath()); + return false; + } + + if (k.getRowIndexStride() != rowIndexStride) { + LOG.info("Incompatible ORC file merge! Row index stride does not match" + + " for " + k.getInputPath()); + return false; + } + + return true; + } + + @Override + public OperatorType getType() { + return OperatorType.ORCFILEMERGE; + } + + /** + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "OFM"; + } + + @Override + public void closeOp(boolean abort) throws HiveException { + // close writer + if (outWriter == null) { + return; + } + + try { + if (fdis != null) { + fdis.close(); + fdis = null; + } + + outWriter.close(); + outWriter = null; + } catch (IOException e) { + throw new HiveException("Unable to close OrcFileMergeOperator", e); + } + super.closeOp(abort); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java new file mode 100644 index 0000000..c135123 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/RCFileMergeOperator.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.io.RCFile; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper; +import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileValueBufferWrapper; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.RCFileMergeDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.hive.shims.CombineHiveKey; + +import java.io.IOException; + +/** + * Fast file merge operator for RC files. + */ +public class RCFileMergeOperator + extends AbstractFileMergeOperator { + public final static Log LOG = LogFactory.getLog("RCFileMergeMapper"); + + RCFile.Writer outWriter; + CompressionCodec codec = null; + int columnNumber = 0; + + @Override + public void processOp(Object row, int tag) throws HiveException { + Object[] keyValue = (Object[]) row; + processKeyValuePairs(keyValue[0], keyValue[1]); + } + + private void processKeyValuePairs(Object k, Object v) + throws HiveException { + try { + + RCFileKeyBufferWrapper key; + if (k instanceof CombineHiveKey) { + key = (RCFileKeyBufferWrapper) ((CombineHiveKey) k).getKey(); + } else { + key = (RCFileKeyBufferWrapper) k; + } + RCFileValueBufferWrapper value = (RCFileValueBufferWrapper) v; + + fixTmpPathAlterTable(key.getInputPath().getParent()); + + if (outWriter == null) { + codec = key.getCodec(); + columnNumber = key.getKeyBuffer().getColumnNumber(); + RCFileOutputFormat.setColumnNumber(jc, columnNumber); + outWriter = new RCFile.Writer(fs, jc, outPath, null, codec); + } + + boolean sameCodec = ((codec == key.getCodec()) || codec.getClass().equals( + key.getCodec().getClass())); + + if ((key.getKeyBuffer().getColumnNumber() != columnNumber) || + (!sameCodec)) { + throw new IOException( "RCFileMerge failed because the input files" + + " use different CompressionCodec or have different column number" + + " setting."); + } + + outWriter.flushBlock(key.getKeyBuffer(), value.getValueBuffer(), + key.getRecordLength(), key.getKeyLength(), + key.getCompressedKeyLength()); + } catch (Throwable e) { + this.exception = true; + closeOp(true); + throw new HiveException(e); + } + } + + @Override + public void closeOp(boolean abort) throws HiveException { + // close writer + if (outWriter == null) { + return; + } + + try { + outWriter.close(); + } catch (IOException e) { + throw new HiveException("Unable to close RCFileMergeOperator", e); + } + outWriter = null; + + super.closeOp(abort); + } + + @Override + public OperatorType getType() { + return OperatorType.RCFILEMERGE; + } + + /** + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "RFM"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java index 3d74459..e3469a0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java @@ -28,8 +28,8 @@ import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.index.IndexMetadataChangeTask; import org.apache.hadoop.hive.ql.index.IndexMetadataChangeWork; -import org.apache.hadoop.hive.ql.io.merge.MergeTask; -import org.apache.hadoop.hive.ql.io.merge.MergeWork; +import org.apache.hadoop.hive.ql.io.merge.MergeFileTask; +import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanTask; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; @@ -94,8 +94,8 @@ public TaskTuple(Class workClass, Class> taskClass) { taskvec.add(new TaskTuple(StatsNoJobWork.class, StatsNoJobTask.class)); taskvec.add(new TaskTuple(ColumnStatsWork.class, ColumnStatsTask.class)); taskvec.add(new TaskTuple(ColumnStatsUpdateWork.class, ColumnStatsUpdateTask.class)); - taskvec.add(new TaskTuple(MergeWork.class, - MergeTask.class)); + taskvec.add(new TaskTuple(MergeFileWork.class, + MergeFileTask.class)); taskvec.add(new TaskTuple(DependencyCollectionWork.class, DependencyCollectionTask.class)); taskvec.add(new TaskTuple(PartialScanWork.class, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 2d9b9c3..2d905ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -18,67 +18,11 @@ package org.apache.hadoop.hive.ql.exec; -import java.beans.DefaultPersistenceDelegate; -import java.beans.Encoder; -import java.beans.ExceptionListener; -import java.beans.Expression; -import java.beans.PersistenceDelegate; -import java.beans.Statement; -import java.beans.XMLDecoder; -import java.beans.XMLEncoder; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInput; -import java.io.EOFException; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.Serializable; -import java.io.UnsupportedEncodingException; -import java.net.URI; -import java.net.URL; -import java.net.URLClassLoader; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.SQLTransientException; -import java.sql.Timestamp; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Collection; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Random; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.zip.Deflater; -import java.util.zip.DeflaterOutputStream; -import java.util.zip.InflaterInputStream; - +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; +import com.esotericsoftware.kryo.serializers.FieldSerializer; +import com.esotericsoftware.shaded.org.objenesis.strategy.StdInstantiatorStrategy; import org.antlr.runtime.CommonToken; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; @@ -121,9 +65,8 @@ import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; import org.apache.hadoop.hive.ql.io.RCFile; import org.apache.hadoop.hive.ql.io.ReworkMapredInputFormat; -import org.apache.hadoop.hive.ql.io.merge.MergeWork; -import org.apache.hadoop.hive.ql.io.orc.OrcFileMergeMapper; -import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileMergeMapper; +import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper; +import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanMapper; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateMapper; @@ -179,11 +122,66 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Shell; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; -import com.esotericsoftware.kryo.io.Output; -import com.esotericsoftware.kryo.serializers.FieldSerializer; -import com.esotericsoftware.shaded.org.objenesis.strategy.StdInstantiatorStrategy; +import java.beans.DefaultPersistenceDelegate; +import java.beans.Encoder; +import java.beans.ExceptionListener; +import java.beans.Expression; +import java.beans.PersistenceDelegate; +import java.beans.Statement; +import java.beans.XMLDecoder; +import java.beans.XMLEncoder; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInput; +import java.io.EOFException; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URL; +import java.net.URLClassLoader; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.SQLTransientException; +import java.sql.Timestamp; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; +import java.util.zip.InflaterInputStream; /** * Utilities. @@ -350,9 +348,8 @@ private static BaseWork getBaseWork(Configuration conf, String name) { if(MAP_PLAN_NAME.equals(name)){ if (ExecMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))){ gWork = deserializePlan(in, MapWork.class, conf); - } else if(RCFileMergeMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS)) || - OrcFileMergeMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) { - gWork = deserializePlan(in, MergeWork.class, conf); + } else if(MergeFileMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) { + gWork = deserializePlan(in, MergeFileWork.class, conf); } else if(ColumnTruncateMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) { gWork = deserializePlan(in, ColumnTruncateWork.class, conf); } else if(PartialScanMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 4ff568d1..b7d40c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -17,22 +17,9 @@ */ package org.apache.hadoop.hive.ql.exec.tez; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; - -import javax.security.auth.login.LoginException; - +import com.google.common.base.Function; +import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; @@ -54,6 +41,9 @@ import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; +import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper; +import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat; +import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -68,6 +58,7 @@ import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; @@ -113,9 +104,20 @@ import org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig; import org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput; -import com.google.common.base.Function; -import com.google.common.collect.Iterators; -import com.google.common.collect.Lists; +import javax.security.auth.login.LoginException; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; /** * DagUtils. DagUtils is a collection of helper methods to convert @@ -212,6 +214,16 @@ private JobConf initializeVertexConf(JobConf baseConf, Context context, MapWork conf.set("mapred.mapper.class", ExecMapper.class.getName()); conf.set("mapred.input.format.class", inpFormat); + if (mapWork instanceof MergeFileWork) { + MergeFileWork mfWork = (MergeFileWork) mapWork; + // This mapper class is used for serializaiton/deserializaiton of merge + // file work. + conf.set("mapred.mapper.class", MergeFileMapper.class.getName()); + conf.set("mapred.input.format.class", mfWork.getInputformat()); + conf.setClass("mapred.output.format.class", MergeFileOutputFormat.class, + FileOutputFormat.class); + } + return conf; } @@ -486,6 +498,21 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, } } + if (mapWork instanceof MergeFileWork) { + Path outputPath = ((MergeFileWork) mapWork).getOutputDir(); + // prepare the tmp output directory. The output tmp directory should + // exist before jobClose (before renaming after job completion) + Path tempOutPath = Utilities.toTempPath(outputPath); + try { + if (!fs.exists(tempOutPath)) { + fs.mkdirs(tempOutPath); + } + } catch (IOException e) { + throw new RuntimeException( + "Can't make path " + outputPath + " : " + e.getMessage()); + } + } + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION) && !mapWork.isUseOneNullRowInputFormat()) { @@ -515,9 +542,13 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, } UserPayload serializedConf = TezUtils.createUserPayloadFromConf(conf); - map = Vertex.create(mapWork.getName(), - ProcessorDescriptor.create(MapTezProcessor.class.getName()). - setUserPayload(serializedConf), numTasks, getContainerResource(conf)); + String procClassName = MapTezProcessor.class.getName(); + if (mapWork instanceof MergeFileWork) { + procClassName = MergeFileTezProcessor.class.getName(); + } + map = Vertex.create(mapWork.getName(), ProcessorDescriptor.create(procClassName) + .setUserPayload(serializedConf), numTasks, getContainerResource(conf)); + map.setTaskEnvironment(getContainerEnvironment(conf, true)); map.setTaskLaunchCmdOpts(getContainerJavaOpts(conf)); @@ -784,7 +815,7 @@ public String getBaseName(LocalResource lr) { } /** - * @param path - the path from which we try to determine the resource base name + * @param path - the string from which we try to determine the resource base name * @return the name of the resource from a given path string. */ public String getResourceBaseName(Path path) { @@ -831,7 +862,8 @@ public LocalResource localizeResource(Path src, Path dest, Configuration conf) conf.getInt(HiveConf.ConfVars.HIVE_LOCALIZE_RESOURCE_NUM_WAIT_ATTEMPTS.varname, HiveConf.ConfVars.HIVE_LOCALIZE_RESOURCE_NUM_WAIT_ATTEMPTS.defaultIntVal); long sleepInterval = HiveConf.getTimeVar( - conf, HiveConf.ConfVars.HIVE_LOCALIZE_RESOURCE_WAIT_INTERVAL, TimeUnit.MILLISECONDS); + conf, HiveConf.ConfVars.HIVE_LOCALIZE_RESOURCE_WAIT_INTERVAL, + TimeUnit.MILLISECONDS); LOG.info("Number of wait attempts: " + waitAttempts + ". Wait interval: " + sleepInterval); boolean found = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java new file mode 100644 index 0000000..3425bf6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileRecordProcessor.java @@ -0,0 +1,208 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.tez; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.MapredContext; +import org.apache.hadoop.hive.ql.exec.ObjectCacheFactory; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.mr.ExecMapper; +import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; +import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; +import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.util.StringUtils; +import org.apache.tez.mapreduce.input.MRInputLegacy; +import org.apache.tez.mapreduce.processor.MRTaskReporter; +import org.apache.tez.runtime.api.LogicalInput; +import org.apache.tez.runtime.api.LogicalOutput; +import org.apache.tez.runtime.api.ProcessorContext; +import org.apache.tez.runtime.library.api.KeyValueReader; + +import java.util.Map; + +/** + * Record processor for fast merging of files. + */ +public class MergeFileRecordProcessor extends RecordProcessor { + + public static final Log LOG = LogFactory + .getLog(MergeFileRecordProcessor.class); + + protected Operator mergeOp; + private final ExecMapperContext execContext = new ExecMapperContext(); + protected static final String MAP_PLAN_KEY = "__MAP_PLAN__"; + private MergeFileWork mfWork; + private boolean abort = false; + private Object[] row = new Object[2]; + + @Override + void init(JobConf jconf, ProcessorContext processorContext, + MRTaskReporter mrReporter, Map inputs, + Map outputs) throws Exception { + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); + super.init(jconf, processorContext, mrReporter, inputs, outputs); + + //Update JobConf using MRInput, info like filename comes via this + MRInputLegacy mrInput = TezProcessor.getMRInput(inputs); + Configuration updatedConf = mrInput.getConfigUpdates(); + if (updatedConf != null) { + for (Map.Entry entry : updatedConf) { + jconf.set(entry.getKey(), entry.getValue()); + } + } + + createOutputMap(); + // Start all the Outputs. + for (Map.Entry outputEntry : outputs.entrySet()) { + outputEntry.getValue().start(); + ((TezProcessor.TezKVOutputCollector) outMap.get(outputEntry.getKey())) + .initialize(); + } + + org.apache.hadoop.hive.ql.exec.ObjectCache cache = ObjectCacheFactory + .getCache(jconf); + try { + execContext.setJc(jconf); + // create map and fetch operators + MapWork mapWork = (MapWork) cache.retrieve(MAP_PLAN_KEY); + if (mapWork == null) { + mapWork = Utilities.getMapWork(jconf); + if (mapWork instanceof MergeFileWork) { + mfWork = (MergeFileWork) mapWork; + } else { + throw new RuntimeException("MapWork should be an instance of" + + " MergeFileWork."); + } + cache.cache(MAP_PLAN_KEY, mapWork); + } else { + Utilities.setMapWork(jconf, mapWork); + } + + String alias = mfWork.getAliasToWork().keySet().iterator().next(); + mergeOp = mfWork.getAliasToWork().get(alias); + LOG.info(mergeOp.dump(0)); + + MapredContext.init(true, new JobConf(jconf)); + ((TezContext) MapredContext.get()).setInputs(inputs); + mergeOp.setExecContext(execContext); + mergeOp.initializeLocalWork(jconf); + mergeOp.initialize(jconf, null); + + OperatorUtils.setChildrenCollector(mergeOp.getChildOperators(), outMap); + mergeOp.setReporter(reporter); + MapredContext.get().setReporter(reporter); + } catch (Throwable e) { + if (e instanceof OutOfMemoryError) { + // will this be true here? + // Don't create a new object if we are already out of memory + throw (OutOfMemoryError) e; + } else { + throw new RuntimeException("Map operator initialization failed", e); + } + } + perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS); + } + + @Override + void run() throws Exception { + MRInputLegacy in = TezProcessor.getMRInput(inputs); + KeyValueReader reader = in.getReader(); + + //process records until done + while (reader.next()) { + boolean needMore = processRow(reader.getCurrentKey(), + reader.getCurrentValue()); + if (!needMore) { + break; + } + } + } + + @Override + void close() { + // check if there are IOExceptions + if (!abort) { + abort = execContext.getIoCxt().getIOExceptions(); + } + + // detecting failed executions by exceptions thrown by the operator tree + try { + if (mergeOp == null || mfWork == null) { + return; + } + mergeOp.close(abort); + + if (isLogInfoEnabled) { + logCloseInfo(); + } + ExecMapper.ReportStats rps = new ExecMapper.ReportStats(reporter); + mergeOp.preorderMap(rps); + } catch (Exception e) { + if (!abort) { + // signal new failure to map-reduce + l4j.error("Hit error while closing operators - failing tree"); + throw new RuntimeException("Hive Runtime Error while closing operators", + e); + } + } finally { + Utilities.clearWorkMap(); + MapredContext.close(); + } + } + + /** + * @param key key to process + * @param value value to process + * @return true if it is not done and can take more inputs + */ + private boolean processRow(Object key, Object value) { + // reset the execContext for each new row + execContext.resetRow(); + + try { + if (mergeOp.getDone()) { + return false; //done + } else { + row[0] = key; + row[1] = value; + mergeOp.processOp(row, 0); + if (isLogInfoEnabled) { + logProgress(); + } + } + } catch (Throwable e) { + abort = true; + if (e instanceof OutOfMemoryError) { + // Don't create a new object if we are already out of memory + throw (OutOfMemoryError) e; + } else { + l4j.fatal(StringUtils.stringifyException(e)); + throw new RuntimeException(e); + } + } + return true; //give me more + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileTezProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileTezProcessor.java new file mode 100644 index 0000000..7fff28e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MergeFileTezProcessor.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.tez; + +import org.apache.tez.mapreduce.input.MRInputLegacy; +import org.apache.tez.runtime.api.LogicalInput; +import org.apache.tez.runtime.api.LogicalOutput; +import org.apache.tez.runtime.api.ProcessorContext; + +import java.io.IOException; +import java.util.Map; + +/** + * Tez processor for fast file merging. This is same as TezProcessor except it + * has different record processor. + */ +public class MergeFileTezProcessor extends TezProcessor { + + public MergeFileTezProcessor(ProcessorContext context) { + super(context); + } + + @Override + public void run(Map inputs, + Map outputs) throws Exception { + rproc = new MergeFileRecordProcessor(); + MRInputLegacy mrInput = getMRInput(inputs); + try { + mrInput.init(); + } catch (IOException e) { + throw new RuntimeException("Failed while initializing MRInput", e); + } + initializeAndRunProcessor(inputs, outputs); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java index 994721f..a00d162 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/RecordProcessor.java @@ -16,13 +16,8 @@ * limitations under the License. */ package org.apache.hadoop.hive.ql.exec.tez; -import java.lang.management.ManagementFactory; -import java.lang.management.MemoryMXBean; -import java.net.URLClassLoader; -import java.util.Arrays; -import java.util.Map; -import java.util.Map.Entry; - +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector; @@ -34,8 +29,12 @@ import org.apache.tez.runtime.api.LogicalOutput; import org.apache.tez.runtime.api.ProcessorContext; -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.net.URLClassLoader; +import java.util.Arrays; +import java.util.Map; +import java.util.Map.Entry; /** * Process input from tez LogicalInput and write output @@ -66,7 +65,7 @@ /** * Common initialization code for RecordProcessors * @param jconf - * @param processorContext the {@link TezProcessorContext} + * @param processorContext the {@link ProcessorContext} * @param mrReporter * @param inputs map of Input names to {@link LogicalInput}s * @param outputs map of Output names to {@link LogicalOutput}s diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java index 831e6a5..1268086 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java @@ -17,12 +17,6 @@ */ package org.apache.hadoop.hive.ql.exec.tez; -import java.io.IOException; -import java.text.NumberFormat; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -40,6 +34,11 @@ import org.apache.tez.runtime.api.ProcessorContext; import org.apache.tez.runtime.library.api.KeyValueWriter; +import java.io.IOException; +import java.text.NumberFormat; +import java.util.List; +import java.util.Map; + /** * Hive processor for Tez that forms the vertices in Tez and processes the data. * Does what ExecMapper and ExecReducer does for hive in MR framework. @@ -51,13 +50,15 @@ private static final Log LOG = LogFactory.getLog(TezProcessor.class); protected boolean isMap = false; - RecordProcessor rproc = null; + protected RecordProcessor rproc = null; - private JobConf jobConf; + protected JobConf jobConf; private static final String CLASS_NAME = TezProcessor.class.getName(); private final PerfLogger perfLogger = PerfLogger.getPerfLogger(); + protected ProcessorContext processorContext; + protected static final NumberFormat taskIdFormat = NumberFormat.getInstance(); protected static final NumberFormat jobIdFormat = NumberFormat.getInstance(); static { @@ -121,9 +122,6 @@ private void setupMRLegacyConfigs(ProcessorContext processorContext) { public void run(Map inputs, Map outputs) throws Exception { - Throwable originalThrowable = null; - - try{ perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_PROCESSOR); // in case of broadcast-join read the broadcast edge inputs // (possibly asynchronously) @@ -142,14 +140,23 @@ public void run(Map inputs, Map out rproc = new ReduceRecordProcessor(); } + initializeAndRunProcessor(inputs, outputs); + } + + protected void initializeAndRunProcessor(Map inputs, + Map outputs) + throws Exception { + Throwable originalThrowable = null; + try { TezCacheAccess cacheAccess = TezCacheAccess.createInstance(jobConf); // Start the actual Inputs. After MRInput initialization. - for (Entry inputEntry : inputs.entrySet()) { + for (Map.Entry inputEntry : inputs.entrySet()) { if (!cacheAccess.isInputCached(inputEntry.getKey())) { LOG.info("Input: " + inputEntry.getKey() + " is not cached"); inputEntry.getValue().start(); } else { - LOG.info("Input: " + inputEntry.getKey() + " is already cached. Skipping start"); + LOG.info("Input: " + inputEntry.getKey() + + " is already cached. Skipping start"); } } @@ -170,7 +177,7 @@ public void run(Map inputs, Map out } try { - if(rproc != null){ + if (rproc != null) { rproc.close(); } } catch (Throwable t) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileInputFormat.java new file mode 100644 index 0000000..92ddce5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileInputFormat.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.merge; + +import java.io.IOException; + +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; + +public abstract class MergeFileInputFormat extends FileInputFormat { + + @Override + public abstract RecordReader getRecordReader(InputSplit split, JobConf job, + Reporter reporter) throws IOException; + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileMapper.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileMapper.java new file mode 100644 index 0000000..39b3711 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileMapper.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.merge; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.AbstractFileMergeOperator; +import org.apache.hadoop.hive.ql.exec.ObjectCache; +import org.apache.hadoop.hive.ql.exec.ObjectCacheFactory; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MapReduceBase; +import org.apache.hadoop.mapred.Mapper; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; + +import java.io.IOException; + +/** + * Mapper for fast file merging of ORC and RC files. This is very similar to + * ExecMapper except that root operator is AbstractFileMergeOperator. This class + * name is used for serialization and deserialization of MergeFileWork. + */ +public class MergeFileMapper extends MapReduceBase implements Mapper { + public static final Log LOG = LogFactory.getLog("MergeFileMapper"); + private static final String PLAN_KEY = "__MAP_PLAN__"; + + private JobConf jc; + private Operator op; + private AbstractFileMergeOperator mergeOp; + private Object[] row; + private boolean abort; + + @Override + public void configure(JobConf job) { + jc = job; + ObjectCache cache = ObjectCacheFactory.getCache(job); + MapWork mapWork = (MapWork) cache.retrieve(PLAN_KEY); + + // if map work is found in object cache then return it else retrieve the + // plan from filesystem and cache it + if (mapWork == null) { + mapWork = Utilities.getMapWork(job); + cache.cache(PLAN_KEY, mapWork); + } else { + Utilities.setMapWork(job, mapWork); + } + + try { + if (mapWork instanceof MergeFileWork) { + MergeFileWork mfWork = (MergeFileWork) mapWork; + String alias = mfWork.getAliasToWork().keySet().iterator().next(); + op = mfWork.getAliasToWork().get(alias); + if (op instanceof AbstractFileMergeOperator) { + mergeOp = (AbstractFileMergeOperator) op; + mergeOp.initializeOp(jc); + row = new Object[2]; + abort = false; + } else { + abort = true; + throw new RuntimeException( + "Merge file work's top operator should be an" + + " instance of AbstractFileMergeOperator"); + } + } else { + abort = true; + throw new RuntimeException("Map work should be a merge file work."); + } + } catch (HiveException e) { + abort = true; + throw new RuntimeException(e); + } + } + + @Override + public void close() throws IOException { + try { + mergeOp.closeOp(abort); + } catch (HiveException e) { + throw new IOException(e); + } + super.close(); + } + + @Override + public void map(Object key, Object value, OutputCollector output, + Reporter reporter) throws IOException { + + row[0] = key; + row[1] = value; + try { + mergeOp.processOp(row, 0); + } catch (HiveException e) { + abort = true; + throw new IOException(e); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileOutputFormat.java new file mode 100644 index 0000000..db1e01f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileOutputFormat.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.merge; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.mapred.FileOutputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.util.Progressable; + +import java.io.IOException; + +public class MergeFileOutputFormat extends + FileOutputFormat { + + @Override + public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name, + Progressable progress) throws IOException { + return new RecordWriter() { + public void write(Object key, Object value) { + throw new RuntimeException("Should not be called"); + } + + public void close(Reporter reporter) { + } + }; + } + +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java new file mode 100644 index 0000000..4c2843c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java @@ -0,0 +1,231 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.merge; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper; +import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHook; +import org.apache.hadoop.hive.ql.exec.mr.Throttle; +import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.Counters; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RunningJob; + +import java.io.IOException; +import java.io.Serializable; + +/** + * Task for fast merging of ORC and RC files. + */ +public class MergeFileTask extends Task implements Serializable, + HadoopJobExecHook { + + private transient JobConf job; + private HadoopJobExecHelper jobExecHelper; + private boolean success = true; + + @Override + public void initialize(HiveConf conf, QueryPlan queryPlan, + DriverContext driverContext) { + super.initialize(conf, queryPlan, driverContext); + job = new JobConf(conf, MergeFileTask.class); + jobExecHelper = new HadoopJobExecHelper(job, this.console, this, this); + } + + @Override + public boolean requireLock() { + return true; + } + + /** + * start a new map-reduce job to do the merge, almost the same as ExecDriver. + */ + @Override + public int execute(DriverContext driverContext) { + + Context ctx = driverContext.getCtx(); + boolean ctxCreated = false; + RunningJob rj = null; + int returnVal = 0; + + try { + if (ctx == null) { + ctx = new Context(job); + ctxCreated = true; + } + + ShimLoader.getHadoopShims().prepareJobOutput(job); + job.setInputFormat(work.getInputformatClass()); + job.setOutputFormat(HiveOutputFormatImpl.class); + job.setMapperClass(MergeFileMapper.class); + job.setMapOutputKeyClass(NullWritable.class); + job.setMapOutputValueClass(NullWritable.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(NullWritable.class); + job.setNumReduceTasks(0); + + // create the temp directories + Path outputPath = work.getOutputDir(); + Path tempOutPath = Utilities.toTempPath(outputPath); + FileSystem fs = tempOutPath.getFileSystem(job); + if (!fs.exists(tempOutPath)) { + fs.mkdirs(tempOutPath); + } + + // set job name + boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, + HiveConf.ConfVars.HADOOPJOBNAME)); + + String jobName = null; + if (noName && this.getQueryPlan() != null) { + int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); + jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), + maxlen - 6); + } + + if (noName) { + // This is for a special case to ensure unit tests pass + HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, + jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt()); + } + + // add input path + addInputPaths(job, work); + + // serialize work + Utilities.setMapWork(job, work, ctx.getMRTmpPath(), true); + + // remove pwd from conf file so that job tracker doesn't show this logs + String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); + if (pwd != null) { + HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); + } + + // submit the job + JobClient jc = new JobClient(job); + + String addedJars = Utilities.getResourceFiles(job, + SessionState.ResourceType.JAR); + if (!addedJars.isEmpty()) { + job.set("tmpjars", addedJars); + } + + // make this client wait if job trcker is not behaving well. + Throttle.checkJobTracker(job, LOG); + + // Finally SUBMIT the JOB! + rj = jc.submitJob(job); + + returnVal = jobExecHelper.progress(rj, jc, null); + success = (returnVal == 0); + + } catch (Exception e) { + String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; + if (rj != null) { + mesg = "Ended Job = " + rj.getJobID() + mesg; + } else { + mesg = "Job Submission failed" + mesg; + } + + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils + console.printError(mesg, "\n" + + org.apache.hadoop.util.StringUtils.stringifyException(e)); + + success = false; + returnVal = 1; + } finally { + try { + if (ctxCreated) { + ctx.clear(); + } + if (rj != null) { + if (returnVal != 0) { + rj.killJob(); + } + HadoopJobExecHelper.runningJobs.remove(rj); + jobID = rj.getID().toString(); + } + // get the list of Dynamic partition paths + if (rj != null) { + if (work.getAliasToWork() != null) { + for (Operator op : work.getAliasToWork() + .values()) { + op.jobClose(job, success); + } + } + } + } catch (Exception e) { + // jobClose needs to execute successfully otherwise fail task + if (success) { + success = false; + returnVal = 3; + String mesg = "Job Commit failed with exception '" + + Utilities.getNameMessage(e) + "'"; + console.printError(mesg, "\n" + + org.apache.hadoop.util.StringUtils.stringifyException(e)); + } + } + } + + return returnVal; + } + + private void addInputPaths(JobConf job, MergeFileWork work) { + for (Path path : work.getInputPaths()) { + FileInputFormat.addInputPath(job, path); + } + } + + @Override + public String getName() { + return "MergeFileTask"; + } + + @Override + public StageType getType() { + return StageType.MAPRED; + } + + @Override + public boolean checkFatalErrors(Counters ctrs, StringBuilder errMsg) { + return false; + } + + @Override + public void logPlanProgress(SessionState ss) throws IOException { + // no op + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java new file mode 100644 index 0000000..e572338 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java @@ -0,0 +1,239 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.merge; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.HiveStatsUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcFileStripeMergeInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat; +import org.apache.hadoop.hive.ql.plan.Explain; +import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.mapred.InputFormat; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; + +@Explain(displayName = "Merge File Operator") +public class MergeFileWork extends MapWork { + + private static final Log LOG = LogFactory.getLog(MergeFileWork.class); + private List inputPaths; + private Path outputDir; + private boolean hasDynamicPartitions; + private boolean isListBucketingAlterTableConcatenate; + private ListBucketingCtx listBucketingCtx; + + // source table input format + private String srcTblInputFormat; + + // internal input format used by CombineHiveInputFormat + private Class internalInputFormat; + + public MergeFileWork(List inputPaths, Path outputDir, + String srcTblInputFormat) { + this(inputPaths, outputDir, false, srcTblInputFormat); + } + + public MergeFileWork(List inputPaths, Path outputDir, + boolean hasDynamicPartitions, + String srcTblInputFormat) { + this.inputPaths = inputPaths; + this.outputDir = outputDir; + this.hasDynamicPartitions = hasDynamicPartitions; + this.srcTblInputFormat = srcTblInputFormat; + PartitionDesc partDesc = new PartitionDesc(); + if (srcTblInputFormat.equals(OrcInputFormat.class.getName())) { + this.internalInputFormat = OrcFileStripeMergeInputFormat.class; + } else if (srcTblInputFormat.equals(RCFileInputFormat.class.getName())) { + this.internalInputFormat = RCFileBlockMergeInputFormat.class; + } + partDesc.setInputFileFormatClass(internalInputFormat); + if (this.getPathToPartitionInfo() == null) { + this.setPathToPartitionInfo(new LinkedHashMap()); + } + for (Path path : this.inputPaths) { + this.getPathToPartitionInfo().put(path.toString(), partDesc); + } + this.isListBucketingAlterTableConcatenate = false; + } + + public List getInputPaths() { + return inputPaths; + } + + public void setInputPaths(List inputPaths) { + this.inputPaths = inputPaths; + } + + public Path getOutputDir() { + return outputDir; + } + + public void setOutputDir(Path outputDir) { + this.outputDir = outputDir; + } + + @Override + public Long getMinSplitSize() { + return null; + } + + @Override + public String getInputformat() { + return getInputformatClass().getName(); + } + + public Class getInputformatClass() { + return CombineHiveInputFormat.class; + } + + @Override + public boolean isGatheringStats() { + return false; + } + + public boolean hasDynamicPartitions() { + return this.hasDynamicPartitions; + } + + public void setHasDynamicPartitions(boolean hasDynamicPartitions) { + this.hasDynamicPartitions = hasDynamicPartitions; + } + + @Override + public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf, + Path path, + TableDesc tblDesc, + ArrayList aliases, + PartitionDesc partDesc) { + super.resolveDynamicPartitionStoredAsSubDirsMerge(conf, path, tblDesc, + aliases, partDesc); + // set internal input format for all partition descriptors + partDesc.setInputFileFormatClass(internalInputFormat); + // Add the DP path to the list of input paths + inputPaths.add(path); + } + + /** + * alter table ... concatenate + *

+ * If it is skewed table, use subdirectories in inputpaths. + */ + public void resolveConcatenateMerge(HiveConf conf) { + isListBucketingAlterTableConcatenate = + ((listBucketingCtx == null) ? false : listBucketingCtx + .isSkewedStoredAsDir()); + LOG.info("isListBucketingAlterTableConcatenate : " + + isListBucketingAlterTableConcatenate); + if (isListBucketingAlterTableConcatenate) { + // use sub-dir as inputpath. + assert ((this.inputPaths != null) && (this.inputPaths.size() == 1)) : + "alter table ... concatenate should only have one" + + " directory inside inputpaths"; + Path dirPath = inputPaths.get(0); + try { + FileSystem inpFs = dirPath.getFileSystem(conf); + FileStatus[] status = + HiveStatsUtils.getFileStatusRecurse(dirPath, listBucketingCtx + .getSkewedColNames().size(), inpFs); + List newInputPath = new ArrayList(); + boolean succeed = true; + for (int i = 0; i < status.length; ++i) { + if (status[i].isDir()) { + // Add the lb path to the list of input paths + newInputPath.add(status[i].getPath()); + } else { + // find file instead of dir. dont change inputpath + succeed = false; + } + } + assert (succeed || ((!succeed) && newInputPath.isEmpty())) : + "This partition has " + + " inconsistent file structure: " + + + "it is stored-as-subdir and expected all files in the same depth" + + " of subdirectories."; + if (succeed) { + inputPaths.clear(); + inputPaths.addAll(newInputPath); + } + } catch (IOException e) { + String msg = + "Fail to get filesystem for directory name : " + dirPath.toUri(); + throw new RuntimeException(msg, e); + } + + } + } + + /** + * @return the listBucketingCtx + */ + public ListBucketingCtx getListBucketingCtx() { + return listBucketingCtx; + } + + /** + * @param listBucketingCtx the listBucketingCtx to set + */ + public void setListBucketingCtx(ListBucketingCtx listBucketingCtx) { + this.listBucketingCtx = listBucketingCtx; + } + + /** + * @return the isListBucketingAlterTableConcatenate + */ + public boolean isListBucketingAlterTableConcatenate() { + return isListBucketingAlterTableConcatenate; + } + + @Explain(displayName = "input format") + public String getSourceTableInputFormat() { + return srcTblInputFormat; + } + + public void setSourceTableInputFormat(String srcTblInputFormat) { + this.srcTblInputFormat = srcTblInputFormat; + } + + @Explain(displayName = "merge level") + public String getMergeLevel() { + if (srcTblInputFormat != null) { + if (srcTblInputFormat.equals(OrcInputFormat.class.getName())) { + return "stripe"; + } else if (srcTblInputFormat.equals(RCFileInputFormat.class.getName())) { + return "block"; + } + } + return null; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeInputFormat.java deleted file mode 100644 index 4651920..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeInputFormat.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.merge; - -import java.io.IOException; - -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; - -public abstract class MergeInputFormat extends FileInputFormat { - - @Override - public abstract RecordReader getRecordReader(InputSplit split, JobConf job, - Reporter reporter) throws IOException; - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeMapper.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeMapper.java deleted file mode 100644 index 6c691b1..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeMapper.java +++ /dev/null @@ -1,238 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.merge; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.MapReduceBase; - -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - -public class MergeMapper extends MapReduceBase { - protected JobConf jc; - protected Path finalPath; - protected FileSystem fs; - protected boolean exception = false; - protected boolean autoDelete = false; - protected Path outPath; - protected boolean hasDynamicPartitions = false; - protected boolean isListBucketingDML = false; - protected boolean isListBucketingAlterTableConcatenate = false; - //used as depth for dir-calculation and if it is list bucketing case. - protected int listBucketingDepth; - protected boolean tmpPathFixedConcatenate = false; - protected boolean tmpPathFixed = false; - protected Path tmpPath; - protected Path taskTmpPath; - protected Path dpPath; - protected Set incompatFileSet; - - public final static Log LOG = LogFactory.getLog("MergeMapper"); - - @Override - public void configure(JobConf job) { - jc = job; - hasDynamicPartitions = HiveConf.getBoolVar(job, - HiveConf.ConfVars.HIVEMERGECURRENTJOBHASDYNAMICPARTITIONS); - isListBucketingAlterTableConcatenate = HiveConf.getBoolVar(job, - HiveConf.ConfVars.HIVEMERGECURRENTJOBCONCATENATELISTBUCKETING); - listBucketingDepth = HiveConf.getIntVar(job, - HiveConf.ConfVars.HIVEMERGECURRENTJOBCONCATENATELISTBUCKETINGDEPTH); - - Path specPath = MergeOutputFormat.getMergeOutputPath(job); - incompatFileSet = new HashSet(); - Path tmpPath = Utilities.toTempPath(specPath); - Path taskTmpPath = Utilities.toTaskTempPath(specPath); - updatePaths(tmpPath, taskTmpPath); - try { - fs = specPath.getFileSystem(job); - autoDelete = fs.deleteOnExit(outPath); - } catch (IOException e) { - this.exception = true; - throw new RuntimeException(e); - } - } - - private void updatePaths(Path tmpPath, Path taskTmpPath) { - String taskId = Utilities.getTaskId(jc); - this.tmpPath = tmpPath; - this.taskTmpPath = taskTmpPath; - finalPath = new Path(tmpPath, taskId); - outPath = new Path(taskTmpPath, Utilities.toTempPath(taskId)); - } - - - /** - * Validates that each input path belongs to the same partition since each mapper merges the input - * to a single output directory - * @param inputPath - * @throws HiveException - */ - protected void checkPartitionsMatch(Path inputPath) throws HiveException { - if (!dpPath.equals(inputPath)) { - // Temp partition input path does not match exist temp path - String msg = "Multiple partitions for one block merge mapper: " + dpPath + " NOT EQUAL TO " - + inputPath; - LOG.error(msg); - throw new HiveException(msg); - } - } - - /** - * Fixes tmpPath to point to the correct partition. Before this is called, tmpPath will default to - * the root tmp table dir fixTmpPath(..) works for DP + LB + multiple skewed values + merge. - * reason: 1. fixTmpPath(..) compares inputPath and tmpDepth, find out path difference and put it - * into newPath. Then add newpath to existing this.tmpPath and this.taskTmpPath. 2. The path - * difference between inputPath and tmpDepth can be DP or DP+LB. It will automatically handle it. - * 3. For example, if inputpath is /-ext-10002/hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/ - * HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME tmppath is /_tmp.-ext-10000 newpath will be - * hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME Then, - * this.tmpPath and this.taskTmpPath will be update correctly. We have list_bucket_dml_6.q cover - * this case: DP + LP + multiple skewed values + merge. - * @param inputPath - * @throws HiveException - * @throws IOException - */ - protected void fixTmpPath(Path inputPath) throws HiveException, IOException { - dpPath = inputPath; - Path newPath = new Path("."); - int inputDepth = inputPath.depth(); - int tmpDepth = tmpPath.depth(); - - // Build the path from bottom up - while (inputPath != null && inputDepth > tmpDepth) { - newPath = new Path(inputPath.getName(), newPath); - inputDepth--; - inputPath = inputPath.getParent(); - } - - Path newTmpPath = new Path(tmpPath, newPath); - Path newTaskTmpPath = new Path(taskTmpPath, newPath); - if (!fs.exists(newTmpPath)) { - fs.mkdirs(newTmpPath); - } - updatePaths(newTmpPath, newTaskTmpPath); - } - - /** - * Fixes tmpPath to point to the correct list bucketing sub-directories. Before this is called, - * tmpPath will default to the root tmp table dir Reason to add a new method instead of changing - * fixTmpPath() Reason 1: logic has slightly difference fixTmpPath(..) needs 2 variables in order - * to decide path delta which is in variable newPath. 1. inputPath.depth() 2. tmpPath.depth() - * fixTmpPathConcatenate needs 2 variables too but one of them is different from fixTmpPath(..) 1. - * inputPath.depth() 2. listBucketingDepth Reason 2: less risks The existing logic is a little not - * trivial around map() and fixTmpPath(). In order to ensure minimum impact on existing flow, we - * try to avoid change on existing code/flow but add new code for new feature. - * @param inputPath - * @throws HiveException - * @throws IOException - */ - protected void fixTmpPathConcatenate(Path inputPath) throws HiveException, IOException { - dpPath = inputPath; - Path newPath = new Path("."); - - int depth = listBucketingDepth; - // Build the path from bottom up. pick up list bucketing subdirectories - while ((inputPath != null) && (depth > 0)) { - newPath = new Path(inputPath.getName(), newPath); - inputPath = inputPath.getParent(); - depth--; - } - - Path newTmpPath = new Path(tmpPath, newPath); - Path newTaskTmpPath = new Path(taskTmpPath, newPath); - if (!fs.exists(newTmpPath)) { - fs.mkdirs(newTmpPath); - } - updatePaths(newTmpPath, newTaskTmpPath); - } - - @Override - public void close() throws IOException { - if (!exception) { - FileStatus fss = fs.getFileStatus(outPath); - LOG.info("renamed path " + outPath + " to " + finalPath + " . File size is " + fss.getLen()); - if (!fs.rename(outPath, finalPath)) { - throw new IOException("Unable to rename output to " + finalPath); - } - - // move any incompatible files to final path - if (!incompatFileSet.isEmpty()) { - for (Path incompatFile : incompatFileSet) { - String fileName = incompatFile.getName(); - Path destFile = new Path(finalPath.getParent(), fileName); - try { - Utilities.renameOrMoveFiles(fs, incompatFile, destFile); - LOG.info("Moved incompatible file " + incompatFile + " to " - + destFile); - } catch (HiveException e) { - LOG.error("Unable to move " + incompatFile + " to " + destFile); - throw new IOException(e); - } - } - } - - } else { - if (!autoDelete) { - fs.delete(outPath, true); - } - } - } - - protected void fixTmpPathAlterTable(Path path) throws IOException, HiveException { - /** - * 1. boolean isListBucketingAlterTableConcatenate will be true only if it is alter table ... - * concatenate on stored-as-dir so it will handle list bucketing alter table merge in the if - * cause with the help of fixTmpPathConcatenate 2. If it is DML, - * isListBucketingAlterTableConcatenate will be false so that it will be handled by else - * cause. In this else cause, we have another if check. 2.1 the if check will make sure DP or - * LB, we will fix path with the help of fixTmpPath(..). Since both has sub-directories. it - * includes SP + LB. 2.2 only SP without LB, we dont fix path. - */ - - // Fix temp path for alter table ... concatenate - if (isListBucketingAlterTableConcatenate) { - if (this.tmpPathFixedConcatenate) { - checkPartitionsMatch(path); - } else { - fixTmpPathConcatenate(path); - tmpPathFixedConcatenate = true; - } - } else { - if (hasDynamicPartitions || (listBucketingDepth > 0)) { - if (tmpPathFixed) { - checkPartitionsMatch(path); - } else { - // We haven't fixed the TMP path for this mapper yet - fixTmpPath(path); - tmpPathFixed = true; - } - } - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeOutputFormat.java deleted file mode 100644 index a3ce699..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeOutputFormat.java +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.merge; - -import java.io.IOException; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.FileOutputFormat; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordWriter; -import org.apache.hadoop.util.Progressable; - -public abstract class MergeOutputFormat extends - FileOutputFormat { - - public static void setMergeOutputPath(JobConf job, Path path) { - job.set("hive.merge.output.dir", path.toString()); - } - - public static Path getMergeOutputPath(JobConf conf) { - String name = conf.get("hive.merge.output.dir"); - return name == null ? null: new Path(name); - } - - public abstract RecordWriter getRecordWriter( - FileSystem ignored, JobConf job, String name, Progressable progress) - throws IOException; -} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeTask.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeTask.java deleted file mode 100644 index c30476b..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeTask.java +++ /dev/null @@ -1,419 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.merge; - -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Enumeration; -import java.util.List; - -import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.DriverContext; -import org.apache.hadoop.hive.ql.QueryPlan; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper; -import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHook; -import org.apache.hadoop.hive.ql.exec.mr.Throttle; -import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; -import org.apache.hadoop.hive.ql.plan.api.StageType; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapred.Counters; -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.JobClient; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.Mapper; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.RunningJob; -import org.apache.log4j.Appender; -import org.apache.log4j.FileAppender; -import org.apache.log4j.LogManager; - -public class MergeTask extends Task implements Serializable, - HadoopJobExecHook { - - private static final long serialVersionUID = 1L; - - public static String BACKUP_PREFIX = "_backup."; - - protected transient JobConf job; - protected HadoopJobExecHelper jobExecHelper; - - @Override - public void initialize(HiveConf conf, QueryPlan queryPlan, - DriverContext driverContext) { - super.initialize(conf, queryPlan, driverContext); - job = new JobConf(conf, MergeTask.class); - jobExecHelper = new HadoopJobExecHelper(job, this.console, this, this); - } - - @Override - public boolean requireLock() { - return true; - } - - boolean success = true; - - @Override - /** - * start a new map-reduce job to do the merge, almost the same as ExecDriver. - */ - public int execute(DriverContext driverContext) { - HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, - CombineHiveInputFormat.class.getName()); - success = true; - ShimLoader.getHadoopShims().prepareJobOutput(job); - job.setOutputFormat(HiveOutputFormatImpl.class); - Class mapperClass = work.getMapperClass(work.getSourceTableInputFormat()); - LOG.info("Using " + mapperClass.getCanonicalName() + " mapper class."); - job.setMapperClass(mapperClass); - - Context ctx = driverContext.getCtx(); - boolean ctxCreated = false; - try { - if (ctx == null) { - ctx = new Context(job); - ctxCreated = true; - } - }catch (IOException e) { - e.printStackTrace(); - console.printError("Error launching map-reduce job", "\n" - + org.apache.hadoop.util.StringUtils.stringifyException(e)); - return 5; - } - - job.setMapOutputKeyClass(NullWritable.class); - job.setMapOutputValueClass(NullWritable.class); - if(work.getNumMapTasks() != null) { - job.setNumMapTasks(work.getNumMapTasks()); - } - - // zero reducers - job.setNumReduceTasks(0); - - if (work.getMinSplitSize() != null) { - HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work - .getMinSplitSize().longValue()); - } - - if (work.getInputformat() != null) { - HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work - .getInputformat()); - } - - String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } - - LOG.info("Using " + inpFormat); - - try { - job.setInputFormat((Class) (Class - .forName(inpFormat))); - } catch (ClassNotFoundException e) { - throw new RuntimeException(e.getMessage()); - } - - Path outputPath = this.work.getOutputDir(); - Path tempOutPath = Utilities.toTempPath(outputPath); - try { - FileSystem fs = tempOutPath.getFileSystem(job); - if (!fs.exists(tempOutPath)) { - fs.mkdirs(tempOutPath); - } - } catch (IOException e) { - console.printError("Can't make path " + outputPath + " : " + e.getMessage()); - return 6; - } - - MergeOutputFormat.setMergeOutputPath(job, outputPath); - - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(NullWritable.class); - - HiveConf.setBoolVar(job, - HiveConf.ConfVars.HIVEMERGECURRENTJOBHASDYNAMICPARTITIONS, - work.hasDynamicPartitions()); - - HiveConf.setBoolVar(job, - HiveConf.ConfVars.HIVEMERGECURRENTJOBCONCATENATELISTBUCKETING, - work.isListBucketingAlterTableConcatenate()); - - HiveConf.setIntVar( - job, - HiveConf.ConfVars.HIVEMERGECURRENTJOBCONCATENATELISTBUCKETINGDEPTH, - ((work.getListBucketingCtx() == null) ? 0 : work.getListBucketingCtx() - .calculateListBucketingLevel())); - - int returnVal = 0; - RunningJob rj = null; - boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, - HiveConf.ConfVars.HADOOPJOBNAME)); - - String jobName = null; - if (noName && this.getQueryPlan() != null) { - int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); - jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), - maxlen - 6); - } - - if (noName) { - // This is for a special case to ensure unit tests pass - HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, - jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt()); - } - - try { - addInputPaths(job, work); - - Utilities.setMapWork(job, work, ctx.getMRTmpPath(), true); - - // remove the pwd from conf file so that job tracker doesn't show this - // logs - String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); - if (pwd != null) { - HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); - } - JobClient jc = new JobClient(job); - - String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR); - if (!addedJars.isEmpty()) { - job.set("tmpjars", addedJars); - } - - // make this client wait if job trcker is not behaving well. - Throttle.checkJobTracker(job, LOG); - - // Finally SUBMIT the JOB! - rj = jc.submitJob(job); - - returnVal = jobExecHelper.progress(rj, jc, null); - success = (returnVal == 0); - - } catch (Exception e) { - e.printStackTrace(); - String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; - if (rj != null) { - mesg = "Ended Job = " + rj.getJobID() + mesg; - } else { - mesg = "Job Submission failed" + mesg; - } - - // Has to use full name to make sure it does not conflict with - // org.apache.commons.lang.StringUtils - console.printError(mesg, "\n" - + org.apache.hadoop.util.StringUtils.stringifyException(e)); - - success = false; - returnVal = 1; - } finally { - try { - if (ctxCreated) { - ctx.clear(); - } - if (rj != null) { - if (returnVal != 0) { - rj.killJob(); - } - HadoopJobExecHelper.runningJobs.remove(rj); - jobID = rj.getID().toString(); - } - closeJob(outputPath, success, job, console, work.getDynPartCtx(), null); - } catch (Exception e) { - } - } - - return (returnVal); - } - - private Path backupOutputPath(FileSystem fs, Path outpath, JobConf job) - throws IOException, HiveException { - if (fs.exists(outpath)) { - Path backupPath = new Path(outpath.getParent(), BACKUP_PREFIX - + outpath.getName()); - Utilities.rename(fs, outpath, backupPath); - return backupPath; - } else { - return null; - } - } - - private void closeJob(Path outputPath, boolean success, JobConf job, - LogHelper console, DynamicPartitionCtx dynPartCtx, Reporter reporter - ) throws HiveException, IOException { - FileSystem fs = outputPath.getFileSystem(job); - Path backupPath = backupOutputPath(fs, outputPath, job); - Utilities.mvFileToFinalPath(outputPath, job, success, LOG, dynPartCtx, null, - reporter); - fs.delete(backupPath, true); - } - - private void addInputPaths(JobConf job, MergeWork work) { - for (Path path : work.getInputPaths()) { - FileInputFormat.addInputPath(job, path); - } - } - - @Override - public String getName() { - return "MergeTask"; - } - - public static String INPUT_SEPERATOR = ":"; - - public static void main(String[] args) { - String inputPathStr = null; - String outputDir = null; - String jobConfFileName = null; - String format = null; - - try { - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-input")) { - inputPathStr = args[++i]; - } else if (args[i].equals("-jobconffile")) { - jobConfFileName = args[++i]; - } else if (args[i].equals("-outputDir")) { - outputDir = args[++i]; - } else if (args[i].equals("-format")) { - format = args[++i]; - } - } - } catch (IndexOutOfBoundsException e) { - System.err.println("Missing argument to option"); - printUsage(); - } - - if (inputPathStr == null || outputDir == null - || outputDir.trim().equals("")) { - printUsage(); - } - - List inputPaths = new ArrayList(); - String[] paths = inputPathStr.split(INPUT_SEPERATOR); - if (paths == null || paths.length == 0) { - printUsage(); - } - - FileSystem fs = null; - JobConf conf = new JobConf(MergeTask.class); - for (String path : paths) { - try { - Path pathObj = new Path(path); - if (fs == null) { - fs = FileSystem.get(pathObj.toUri(), conf); - } - FileStatus fstatus = fs.getFileStatus(pathObj); - if (fstatus.isDir()) { - FileStatus[] fileStatus = fs.listStatus(pathObj); - for (FileStatus st : fileStatus) { - inputPaths.add(st.getPath()); - } - } else { - inputPaths.add(fstatus.getPath()); - } - } catch (IOException e) { - e.printStackTrace(System.err); - } - } - - if (jobConfFileName != null) { - conf.addResource(new Path(jobConfFileName)); - } - HiveConf hiveConf = new HiveConf(conf, MergeTask.class); - - Log LOG = LogFactory.getLog(MergeTask.class.getName()); - boolean isSilent = HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVESESSIONSILENT); - LogHelper console = new LogHelper(LOG, isSilent); - - // print out the location of the log file for the user so - // that it's easy to find reason for local mode execution failures - for (Appender appender : Collections - .list((Enumeration) LogManager.getRootLogger() - .getAllAppenders())) { - if (appender instanceof FileAppender) { - console.printInfo("Execution log at: " - + ((FileAppender) appender).getFile()); - } - } - - if (format == null || format.trim().equals("")) { - printUsage(); - } - - MergeWork mergeWork = null; - if (format.equals("rcfile")) { - mergeWork = new MergeWork(inputPaths, new Path(outputDir), RCFileInputFormat.class); - } else if (format.equals("orcfile")) { - mergeWork = new MergeWork(inputPaths, new Path(outputDir), OrcInputFormat.class); - } - - DriverContext driverCxt = new DriverContext(); - MergeTask taskExec = new MergeTask(); - taskExec.initialize(hiveConf, null, driverCxt); - taskExec.setWork(mergeWork); - int ret = taskExec.execute(driverCxt); - - if (ret != 0) { - System.exit(2); - } - - } - - private static void printUsage() { - System.err.println("MergeTask -format -input " - + "-outputDir outputDir [-jobconffile ] "); - System.exit(1); - } - - @Override - public StageType getType() { - return StageType.MAPRED; - } - - @Override - public boolean checkFatalErrors(Counters ctrs, StringBuilder errMsg) { - return false; - } - - @Override - public void logPlanProgress(SessionState ss) throws IOException { - // no op - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeWork.java ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeWork.java deleted file mode 100644 index 9efee3c..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeWork.java +++ /dev/null @@ -1,261 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.merge; - -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.HiveStatsUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcFileMergeMapper; -import org.apache.hadoop.hive.ql.io.orc.OrcFileStripeMergeInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat; -import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileMergeMapper; -import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; -import org.apache.hadoop.hive.ql.plan.Explain; -import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.Mapper; - -@Explain(displayName = "Merge Work") -public class MergeWork extends MapWork implements Serializable { - - private static final long serialVersionUID = 1L; - - private transient List inputPaths; - private transient Path outputDir; - private boolean hasDynamicPartitions; - private DynamicPartitionCtx dynPartCtx; - private boolean isListBucketingAlterTableConcatenate; - private ListBucketingCtx listBucketingCtx; - private Class srcTblInputFormat; - - public MergeWork() { - } - - public MergeWork(List inputPaths, Path outputDir, - Class srcTblInputFormat) { - this(inputPaths, outputDir, false, null, srcTblInputFormat); - } - - public MergeWork(List inputPaths, Path outputDir, - boolean hasDynamicPartitions, DynamicPartitionCtx dynPartCtx, - Class srcTblInputFormat) { - super(); - this.inputPaths = inputPaths; - this.outputDir = outputDir; - this.hasDynamicPartitions = hasDynamicPartitions; - this.dynPartCtx = dynPartCtx; - this.srcTblInputFormat = srcTblInputFormat; - PartitionDesc partDesc = new PartitionDesc(); - if(srcTblInputFormat.equals(OrcInputFormat.class)) { - partDesc.setInputFileFormatClass(OrcFileStripeMergeInputFormat.class); - } else if(srcTblInputFormat.equals(RCFileInputFormat.class)) { - partDesc.setInputFileFormatClass(RCFileBlockMergeInputFormat.class); - } - if(this.getPathToPartitionInfo() == null) { - this.setPathToPartitionInfo(new LinkedHashMap()); - } - for(Path path: this.inputPaths) { - this.getPathToPartitionInfo().put(path.toString(), partDesc); - } - } - - public List getInputPaths() { - return inputPaths; - } - - public void setInputPaths(List inputPaths) { - this.inputPaths = inputPaths; - } - - public Path getOutputDir() { - return outputDir; - } - - public void setOutputDir(Path outputDir) { - this.outputDir = outputDir; - } - - public Class getMapperClass(Class klass) { - if (klass.equals(RCFileInputFormat.class)) { - return RCFileMergeMapper.class; - } else if (klass.equals(OrcInputFormat.class)) { - return OrcFileMergeMapper.class; - } - return null; - } - - @Override - public Long getMinSplitSize() { - return null; - } - - @Override - public String getInputformat() { - return CombineHiveInputFormat.class.getName(); - } - - @Override - public boolean isGatheringStats() { - return false; - } - - public boolean hasDynamicPartitions() { - return this.hasDynamicPartitions; - } - - public void setHasDynamicPartitions(boolean hasDynamicPartitions) { - this.hasDynamicPartitions = hasDynamicPartitions; - } - - @Override - public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf, Path path, - TableDesc tblDesc, ArrayList aliases, PartitionDesc partDesc) { - - String inputFormatClass = null; - if (tblDesc.getInputFileFormatClass().equals(RCFileInputFormat.class)) { - inputFormatClass = conf.getVar(HiveConf.ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL); - } else if (tblDesc.getInputFileFormatClass().equals(OrcInputFormat.class)){ - inputFormatClass = conf.getVar(HiveConf.ConfVars.HIVEMERGEINPUTFORMATSTRIPELEVEL); - } - - try { - partDesc.setInputFileFormatClass((Class ) - Class.forName(inputFormatClass)); - } catch (ClassNotFoundException e) { - String msg = "Merge input format class not found"; - throw new RuntimeException(msg); - } - super.resolveDynamicPartitionStoredAsSubDirsMerge(conf, path, tblDesc, aliases, partDesc); - - // Add the DP path to the list of input paths - inputPaths.add(path); - } - - /** - * alter table ... concatenate - * - * If it is skewed table, use subdirectories in inputpaths. - */ - public void resolveConcatenateMerge(HiveConf conf) { - isListBucketingAlterTableConcatenate = ((listBucketingCtx == null) ? false : listBucketingCtx - .isSkewedStoredAsDir()); - if (isListBucketingAlterTableConcatenate) { - // use sub-dir as inputpath. - assert ((this.inputPaths != null) && (this.inputPaths.size() == 1)) : - "alter table ... concatenate should only have one directory inside inputpaths"; - Path dirPath = inputPaths.get(0); - try { - FileSystem inpFs = dirPath.getFileSystem(conf); - FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(dirPath, listBucketingCtx - .getSkewedColNames().size(), inpFs); - List newInputPath = new ArrayList(); - boolean succeed = true; - for (int i = 0; i < status.length; ++i) { - if (status[i].isDir()) { - // Add the lb path to the list of input paths - newInputPath.add(status[i].getPath()); - } else { - // find file instead of dir. dont change inputpath - succeed = false; - } - } - assert (succeed || ((!succeed) && newInputPath.isEmpty())) : "This partition has " - + " inconsistent file structure: " - + "it is stored-as-subdir and expected all files in the same depth of subdirectories."; - if (succeed) { - inputPaths.clear(); - inputPaths.addAll(newInputPath); - } - } catch (IOException e) { - String msg = "Fail to get filesystem for directory name : " + dirPath.toUri(); - throw new RuntimeException(msg, e); - } - - } - } - - public DynamicPartitionCtx getDynPartCtx() { - return dynPartCtx; - } - - public void setDynPartCtx(DynamicPartitionCtx dynPartCtx) { - this.dynPartCtx = dynPartCtx; - } - - /** - * @return the listBucketingCtx - */ - public ListBucketingCtx getListBucketingCtx() { - return listBucketingCtx; - } - - /** - * @param listBucketingCtx the listBucketingCtx to set - */ - public void setListBucketingCtx(ListBucketingCtx listBucketingCtx) { - this.listBucketingCtx = listBucketingCtx; - } - - /** - * @return the isListBucketingAlterTableConcatenate - */ - public boolean isListBucketingAlterTableConcatenate() { - return isListBucketingAlterTableConcatenate; - } - - public Class getSourceTableInputFormat() { - return srcTblInputFormat; - } - - @Explain(displayName = "input format") - public String getStringifiedInputFormat() { - return srcTblInputFormat.getCanonicalName(); - } - - @Explain(displayName = "merge level") - public String getMergeLevel() { - if (srcTblInputFormat != null) { - if (srcTblInputFormat.equals(OrcInputFormat.class)) { - return "stripe"; - } else if (srcTblInputFormat.equals(RCFileInputFormat.class)) { - return "block"; - } - } - return null; - } - - public void setSourceTableInputFormat(Class srcTblInputFormat) { - this.srcTblInputFormat = srcTblInputFormat; - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileMergeMapper.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileMergeMapper.java deleted file mode 100644 index 13ec642..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileMergeMapper.java +++ /dev/null @@ -1,192 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.orc; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.io.merge.MergeMapper; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.shims.CombineHiveKey; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.Mapper; -import org.apache.hadoop.mapred.OutputCollector; -import org.apache.hadoop.mapred.Reporter; - -import java.io.IOException; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -/** - * Map task fast merging of ORC files. - */ -public class OrcFileMergeMapper extends MergeMapper implements - Mapper { - - // These parameters must match for all orc files involved in merging - CompressionKind compression = null; - long compressBuffSize = 0; - List version; - int columnCount = 0; - int rowIndexStride = 0; - - Writer outWriter; - private byte[] buffer; - Path prevPath; - private Reader reader; - private FSDataInputStream fdis; - public final static Log LOG = LogFactory.getLog("OrcFileMergeMapper"); - - @Override - public void configure(JobConf job) { - super.configure(job); - - outWriter = null; - buffer = null; - prevPath = null; - reader = null; - fdis = null; - } - - @Override - public void map(Object key, OrcFileValueWrapper value, OutputCollector output, - Reporter reporter) throws IOException { - try { - - OrcFileKeyWrapper k = null; - if (key instanceof CombineHiveKey) { - k = (OrcFileKeyWrapper) ((CombineHiveKey) key).getKey(); - } else { - k = (OrcFileKeyWrapper) key; - } - - fixTmpPathAlterTable(k.getInputPath().getParent()); - - if (prevPath == null) { - prevPath = k.getInputPath(); - reader = OrcFile.createReader(fs, k.inputPath); - } - - // store the orc configuration from the first file. All other files should - // match this configuration before merging - if (outWriter == null) { - compression = k.getCompression(); - compressBuffSize = k.getCompressBufferSize(); - version = k.getVersionList(); - columnCount = k.getTypes().get(0).getSubtypesCount(); - rowIndexStride = k.getRowIndexStride(); - - // block size and stripe size will be from config - outWriter = OrcFile.createWriter(outPath, OrcFile.writerOptions(jc).compress(compression) - .inspector(reader.getObjectInspector())); - } - - if (!checkCompatibility(k, value)) { - incompatFileSet.add(k.getInputPath()); - return; - } - - // next file in the path - if (!k.getInputPath().equals(prevPath)) { - reader = OrcFile.createReader(fs, k.inputPath); - } - - // initialize buffer to read the entire stripe - buffer = new byte[(int) value.stripeInformation.getLength()]; - fdis = fs.open(k.inputPath); - fdis.readFully(value.stripeInformation.getOffset(), buffer, 0, - (int) value.stripeInformation.getLength()); - - // append the stripe buffer to the new ORC file - ((WriterImpl) outWriter).appendStripe(buffer, value.getStripeInformation(), - value.getStripeStatistics()); - - LOG.info("Merged stripe from file " + k.inputPath + " [ offset : " - + value.getStripeInformation().getOffset() + " length: " - + value.getStripeInformation().getLength() + " ]"); - - // add user metadata to footer in case of any - if (value.isLastStripeInFile()) { - ((WriterImpl) outWriter).appendUserMetadata(value.getUserMetadata()); - } - } catch (Throwable e) { - this.exception = true; - close(); - throw new IOException(e); - } - } - - private boolean checkCompatibility(OrcFileKeyWrapper k, - OrcFileValueWrapper value) { - // check compatibility with subsequent files - if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) { - LOG.info("Incompatible ORC file merge! Column counts does not match for " - + k.getInputPath()); - return false; - } - - if (!k.compression.equals(compression)) { - LOG.info("Incompatible ORC file merge! Compression codec does not match" + - " for " + k.getInputPath()); - return false; - } - - if (k.compressBufferSize != compressBuffSize) { - LOG.info("Incompatible ORC file merge! Compression buffer size does not" + - " match for " + k.getInputPath()); - return false; - - } - - if (!k.versionList.equals(version)) { - LOG.info("Incompatible ORC file merge! Version does not match for " - + k.getInputPath()); - return false; - } - - if (k.rowIndexStride != rowIndexStride) { - LOG.info("Incompatible ORC file merge! Row index stride does not match" + - " for " + k.getInputPath()); - return false; - } - - return true; - } - - @Override - public void close() throws IOException { - // close writer - if (outWriter == null) { - return; - } - - if (fdis != null) { - fdis.close(); - fdis = null; - } - - outWriter.close(); - outWriter = null; - - super.close(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeInputFormat.java index a6c92fb..ceaeaea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFileStripeMergeInputFormat.java @@ -18,16 +18,16 @@ package org.apache.hadoop.hive.ql.io.orc; -import java.io.IOException; - -import org.apache.hadoop.hive.ql.io.merge.MergeInputFormat; +import org.apache.hadoop.hive.ql.io.merge.MergeFileInputFormat; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -public class OrcFileStripeMergeInputFormat extends MergeInputFormat { +import java.io.IOException; + +public class OrcFileStripeMergeInputFormat extends MergeFileInputFormat { @Override public RecordReader getRecordReader( diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java index c391b0e..6411e3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.List; /** * The interface for writing ORC files. @@ -72,4 +73,30 @@ * @return the offset that would be a valid end location for an ORC file */ long writeIntermediateFooter() throws IOException; + + /** + * Fast stripe append to ORC file. This interface is used for fast ORC file + * merge with other ORC files. When merging, the file to be merged should pass + * stripe in binary form along with stripe information and stripe statistics. + * After appending last stripe of a file, use appendUserMetadata() to append + * any user metadata. + * @param stripe - stripe as byte array + * @param offset - offset within byte array + * @param length - length of stripe within byte array + * @param stripeInfo - stripe information + * @param stripeStatistics - stripe statistics (Protobuf objects can be + * merged directly) + * @throws IOException + */ + public void appendStripe(byte[] stripe, int offset, int length, + StripeInformation stripeInfo, + OrcProto.StripeStatistics stripeStatistics) throws IOException; + + /** + * When fast stripe append is used for merging ORC stripes, after appending + * the last stripe from a file, this interface must be used to merge any + * user metadata. + * @param userMetadata - user metadata + */ + public void appendUserMetadata(List userMetadata); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index 195d60e..5ea3afb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -18,16 +18,10 @@ package org.apache.hadoop.hive.ql.io.orc; -import java.io.IOException; -import java.io.OutputStream; -import java.lang.management.ManagementFactory; -import java.nio.ByteBuffer; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import com.google.protobuf.ByteString; +import com.google.protobuf.CodedOutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -71,10 +65,17 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; -import com.google.protobuf.ByteString; -import com.google.protobuf.CodedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.lang.management.ManagementFactory; +import java.nio.ByteBuffer; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import static com.google.common.base.Preconditions.checkArgument; /** * An ORC file writer. The file is divided into stripes, which is the natural @@ -2277,17 +2278,19 @@ public synchronized long writeIntermediateFooter() throws IOException { return rawWriter.getPos(); } - void appendStripe(byte[] stripe, StripeInformation stripeInfo, - OrcProto.StripeStatistics stripeStatistics) throws IOException { - appendStripe(stripe, 0, stripe.length, stripeInfo, stripeStatistics); - } - - void appendStripe(byte[] stripe, int offset, int length, + @Override + public void appendStripe(byte[] stripe, int offset, int length, StripeInformation stripeInfo, OrcProto.StripeStatistics stripeStatistics) throws IOException { + checkArgument(stripe != null, "Stripe must not be null"); + checkArgument(length <= stripe.length, + "Specified length must not be greater specified array length"); + checkArgument(stripeInfo != null, "Stripe information must not be null"); + checkArgument(stripeStatistics != null, + "Stripe statistics must not be null"); + getStream(); long start = rawWriter.getPos(); - long stripeLen = length; long availBlockSpace = blockSize - (start % blockSize); @@ -2343,7 +2346,8 @@ private void updateFileStatistics(OrcProto.StripeStatistics stripeStatistics) { } } - void appendUserMetadata(List userMetadata) { + @Override + public void appendUserMetadata(List userMetadata) { if (userMetadata != null) { for (UserMetadataItem item : userMetadata) { this.userMetadata.put(item.getName(), item.getValue()); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileBlockMergeInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileBlockMergeInputFormat.java index 6809c79..4df80ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileBlockMergeInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileBlockMergeInputFormat.java @@ -20,14 +20,14 @@ import java.io.IOException; -import org.apache.hadoop.hive.ql.io.merge.MergeInputFormat; +import org.apache.hadoop.hive.ql.io.merge.MergeFileInputFormat; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -public class RCFileBlockMergeInputFormat extends MergeInputFormat { +public class RCFileBlockMergeInputFormat extends MergeFileInputFormat { @Override public RecordReader diff --git ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java deleted file mode 100644 index dee6b1c..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.rcfile.merge; - -import java.io.IOException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.io.RCFile; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.io.merge.MergeMapper; -import org.apache.hadoop.hive.shims.CombineHiveKey; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.mapred.Mapper; -import org.apache.hadoop.mapred.OutputCollector; -import org.apache.hadoop.mapred.Reporter; - -@SuppressWarnings("deprecation") -public class RCFileMergeMapper extends MergeMapper implements - Mapper { - - RCFile.Writer outWriter; - - CompressionCodec codec = null; - int columnNumber = 0; - public final static Log LOG = LogFactory.getLog("RCFileMergeMapper"); - - @Override - public void map(Object k, RCFileValueBufferWrapper value, - OutputCollector output, Reporter reporter) - throws IOException { - try { - - RCFileKeyBufferWrapper key = null; - if (k instanceof CombineHiveKey) { - key = (RCFileKeyBufferWrapper) ((CombineHiveKey) k).getKey(); - } else { - key = (RCFileKeyBufferWrapper) k; - } - - fixTmpPathAlterTable(key.inputPath.getParent()); - - if (outWriter == null) { - codec = key.codec; - columnNumber = key.keyBuffer.getColumnNumber(); - RCFileOutputFormat.setColumnNumber(jc, columnNumber); - outWriter = new RCFile.Writer(fs, jc, outPath, null, codec); - } - - boolean sameCodec = ((codec == key.codec) || codec.getClass().equals( - key.codec.getClass())); - - if ((key.keyBuffer.getColumnNumber() != columnNumber) || (!sameCodec)) { - throw new IOException( - "RCFileMerge failed because the input files use different CompressionCodec or have different column number setting."); - } - - outWriter.flushBlock(key.keyBuffer, value.valueBuffer, key.recordLength, - key.keyLength, key.compressedKeyLength); - } catch (Throwable e) { - this.exception = true; - close(); - throw new IOException(e); - } - } - - @Override - public void close() throws IOException { - // close writer - if (outWriter == null) { - return; - } - - outWriter.close(); - outWriter = null; - - super.close(); - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 7129ed8..2f36f04 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -18,20 +18,6 @@ package org.apache.hadoop.hive.ql.optimizer; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; -import java.util.Set; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; @@ -53,6 +39,8 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.OrcFileMergeOperator; +import org.apache.hadoop.hive.ql.exec.RCFileMergeOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; @@ -65,8 +53,10 @@ import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.merge.MergeWork; +import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; +import org.apache.hadoop.hive.ql.io.orc.OrcFileStripeMergeInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx; @@ -88,6 +78,7 @@ import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.plan.FileMergeDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; @@ -96,8 +87,10 @@ import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.RCFileMergeDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.StatsWork; @@ -106,6 +99,22 @@ import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.InputFormat; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; +import java.util.Set; /** * General utility common functions for the Processor to convert operator into @@ -1250,33 +1259,20 @@ public static void createMRWorkForMergingFiles (FileSinkOperator fsInput, (conf.getBoolVar(ConfVars.HIVEMERGEORCFILESTRIPELEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class))) { - // Check if InputFormatClass is valid - final String inputFormatClass; - if (fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) { - inputFormatClass = conf.getVar(ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL); + cplan = GenMapRedUtils.createMergeTask(fsInputDesc, finalName, + dpCtx != null && dpCtx.getNumDPCols() > 0); + if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { + work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID)); + cplan.setName("Tez Merge File Work"); + ((TezWork) work).add(cplan); } else { - inputFormatClass = conf.getVar(ConfVars.HIVEMERGEINPUTFORMATSTRIPELEVEL); - } - try { - Class c = Class.forName(inputFormatClass); - - if(fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class)) { - LOG.info("OrcFile format - Using stripe level merge"); - } else { - LOG.info("RCFile format- Using block level merge"); - } - cplan = GenMapRedUtils.createMergeTask(fsInputDesc, finalName, - dpCtx != null && dpCtx.getNumDPCols() > 0); work = cplan; - } catch (ClassNotFoundException e) { - String msg = "Illegal input format class: " + inputFormatClass; - throw new SemanticException(msg); } } else { cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc); if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID)); - cplan.setName("Merge"); + cplan.setName("Tez Merge File Work"); ((TezWork)work).add(cplan); } else { work = new MapredWork(); @@ -1489,6 +1485,7 @@ private static MapWork createMRWorkForMergingFiles (HiveConf conf, * * @param fsInputDesc * @param finalName + * @param inputFormatClass * @return MergeWork if table is stored as RCFile or ORCFile, * null otherwise */ @@ -1498,38 +1495,62 @@ public static MapWork createMergeTask(FileSinkDesc fsInputDesc, Path inputDir = fsInputDesc.getFinalDirName(); TableDesc tblDesc = fsInputDesc.getTableInfo(); - if (tblDesc.getInputFileFormatClass().equals(RCFileInputFormat.class) || - tblDesc.getInputFileFormatClass().equals(OrcInputFormat.class)) { - ArrayList inputDirs = new ArrayList(1); - ArrayList inputDirstr = new ArrayList(1); - if (!hasDynamicPartitions - && !GenMapRedUtils.isSkewedStoredAsDirs(fsInputDesc)) { - inputDirs.add(inputDir); - inputDirstr.add(inputDir.toString()); - } - - MergeWork work = new MergeWork(inputDirs, finalName, - hasDynamicPartitions, fsInputDesc.getDynPartCtx(), - tblDesc.getInputFileFormatClass()); - LinkedHashMap> pathToAliases = - new LinkedHashMap>(); - pathToAliases.put(inputDir.toString(), (ArrayList) inputDirstr.clone()); - work.setMapperCannotSpanPartns(true); - work.setPathToAliases(pathToAliases); - work.setAliasToWork( - new LinkedHashMap>()); - if (hasDynamicPartitions - || GenMapRedUtils.isSkewedStoredAsDirs(fsInputDesc)) { - work.getPathToPartitionInfo().put(inputDir.toString(), - new PartitionDesc(tblDesc, null)); - } - work.setListBucketingCtx(fsInputDesc.getLbCtx()); + List inputDirs = new ArrayList(1); + ArrayList inputDirstr = new ArrayList(1); + // this will be populated by MergeFileWork.resolveDynamicPartitionStoredAsSubDirsMerge + // in case of dynamic partitioning and list bucketing + if (!hasDynamicPartitions && + !GenMapRedUtils.isSkewedStoredAsDirs(fsInputDesc)) { + inputDirs.add(inputDir); + } + inputDirstr.add(inputDir.toString()); + + // internal input format class for CombineHiveInputFormat + final Class internalIFClass; + if (tblDesc.getInputFileFormatClass().equals(RCFileInputFormat.class)) { + internalIFClass = RCFileBlockMergeInputFormat.class; + } else if (tblDesc.getInputFileFormatClass().equals(OrcInputFormat.class)) { + internalIFClass = OrcFileStripeMergeInputFormat.class; + } else { + throw new SemanticException("createMergeTask called on a table with file" + + " format other than RCFile or ORCFile"); + } - return work; + // create the merge file work + MergeFileWork work = new MergeFileWork(inputDirs, finalName, + hasDynamicPartitions, tblDesc.getInputFileFormatClass().getName()); + LinkedHashMap> pathToAliases = + new LinkedHashMap>(); + pathToAliases.put(inputDir.toString(), inputDirstr); + work.setMapperCannotSpanPartns(true); + work.setPathToAliases(pathToAliases); + PartitionDesc pDesc = new PartitionDesc(tblDesc, null); + pDesc.setInputFileFormatClass(internalIFClass); + work.getPathToPartitionInfo().put(inputDir.toString(), pDesc); + work.setListBucketingCtx(fsInputDesc.getLbCtx()); + + // create alias to work which contains the merge operator + LinkedHashMap> aliasToWork = + new LinkedHashMap>(); + Operator mergeOp = null; + final FileMergeDesc fmd; + if (tblDesc.getInputFileFormatClass().equals(RCFileInputFormat.class)) { + fmd = new RCFileMergeDesc(); + } else { + fmd = new OrcFileMergeDesc(); } + fmd.setDpCtx(fsInputDesc.getDynPartCtx()); + fmd.setOutputPath(finalName); + fmd.setHasDynamicPartitions(work.hasDynamicPartitions()); + fmd.setListBucketingAlterTableConcatenate(work.isListBucketingAlterTableConcatenate()); + int lbLevel = work.getListBucketingCtx() == null ? 0 : + work.getListBucketingCtx().calculateListBucketingLevel(); + fmd.setListBucketingDepth(lbLevel); + mergeOp = OperatorFactory.get(fmd); + aliasToWork.put(inputDir.toString(), mergeOp); + work.setAliasToWork(aliasToWork); - throw new SemanticException("createMergeTask called on a table with file" - + " format other than RCFile or ORCFile"); + return work; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 11a9419..15b369b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -404,6 +404,9 @@ protected void setInputFormat(Task task) { } private void setInputFormat(MapWork work, Operator op) { + if (op == null) { + return; + } if (op.isUseBucketizedHiveInputFormat()) { work.setUseBucketizedHiveInputFormat(true); return; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FileMergeDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FileMergeDesc.java new file mode 100644 index 0000000..7ec1bdd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FileMergeDesc.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.fs.Path; + +/** + * + */ +public class FileMergeDesc extends AbstractOperatorDesc { + private DynamicPartitionCtx dpCtx; + private Path outputPath; + private int listBucketingDepth; + private boolean hasDynamicPartitions; + private boolean isListBucketingAlterTableConcatenate; + + public FileMergeDesc(DynamicPartitionCtx dynPartCtx, Path outputDir) { + this.dpCtx = dynPartCtx; + this.outputPath = outputDir; + } + + public DynamicPartitionCtx getDpCtx() { + return dpCtx; + } + + public void setDpCtx(DynamicPartitionCtx dpCtx) { + this.dpCtx = dpCtx; + } + + public Path getOutputPath() { + return outputPath; + } + + public void setOutputPath(Path outputPath) { + this.outputPath = outputPath; + } + + public int getListBucketingDepth() { + return listBucketingDepth; + } + + public void setListBucketingDepth(int listBucketingDepth) { + this.listBucketingDepth = listBucketingDepth; + } + + public boolean hasDynamicPartitions() { + return hasDynamicPartitions; + } + + public void setHasDynamicPartitions(boolean hasDynamicPartitions) { + this.hasDynamicPartitions = hasDynamicPartitions; + } + + public boolean isListBucketingAlterTableConcatenate() { + return isListBucketingAlterTableConcatenate; + } + + public void setListBucketingAlterTableConcatenate(boolean isListBucketingAlterTableConcatenate) { + this.isListBucketingAlterTableConcatenate = isListBucketingAlterTableConcatenate; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OrcFileMergeDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/OrcFileMergeDesc.java new file mode 100644 index 0000000..7d0ab0c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OrcFileMergeDesc.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.fs.Path; + +/** + * ORC fast file merge operator descriptor. + */ +@Explain(displayName = "ORC File Merge Operator") +public class OrcFileMergeDesc extends FileMergeDesc { + + public OrcFileMergeDesc() { + this(null, null); + } + + public OrcFileMergeDesc(DynamicPartitionCtx dpCtx, Path outPath) { + super(dpCtx, outPath); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/RCFileMergeDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/RCFileMergeDesc.java new file mode 100644 index 0000000..476aa46 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/RCFileMergeDesc.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.fs.Path; + +/** + * Descriptor for Fast file merge RC file operator. + */ +@Explain(displayName = "RCFile Merge Operator") +public class RCFileMergeDesc extends FileMergeDesc { + + public RCFileMergeDesc() { + this(null, null); + } + + public RCFileMergeDesc(DynamicPartitionCtx dpCtx, Path outPath) { + super(dpCtx, outPath); + } + +} diff --git ql/src/test/queries/clientpositive/list_bucket_dml_8.q ql/src/test/queries/clientpositive/list_bucket_dml_8.q index 9e81b8d..26103f9 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_8.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_8.q @@ -69,7 +69,6 @@ show partitions list_bucketing_dynamic_part; desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1'); desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1'); -set hive.merge.current.job.concatenate.list.bucketing=true; -- concatenate the partition and it will merge files alter table list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') concatenate; diff --git ql/src/test/queries/clientpositive/orc_merge1.q ql/src/test/queries/clientpositive/orc_merge1.q index ee65b98..f9914b4 100644 --- ql/src/test/queries/clientpositive/orc_merge1.q +++ ql/src/test/queries/clientpositive/orc_merge1.q @@ -1,51 +1,89 @@ set hive.merge.orcfile.stripe.level=false; set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; +set hive.optimize.sort.dynamic.partition=false; +set mapred.min.split.size=1000; +set mapred.max.split.size=1000; +set tez.grouping.min-size=1000; +set tez.grouping.max-size=1000; +set hive.merge.tezfiles=false; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; DROP TABLE orcfile_merge1; DROP TABLE orcfile_merge1b; +DROP TABLE orcfile_merge1c; CREATE TABLE orcfile_merge1 (key INT, value STRING) PARTITIONED BY (ds STRING, part STRING) STORED AS ORC; CREATE TABLE orcfile_merge1b (key INT, value STRING) PARTITIONED BY (ds STRING, part STRING) STORED AS ORC; +CREATE TABLE orcfile_merge1c (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC; --- Use non stipe-level merge +-- merge disabled EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src; INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src; -DESC FORMATTED orcfile_merge1 partition (ds='1', part='50'); +DESC FORMATTED orcfile_merge1 partition (ds='1', part='0'); +DESC FORMATTED orcfile_merge1 partition (ds='1', part='1'); -set hive.merge.orcfile.stripe.level=true; +set hive.merge.tezfiles=true; +set hive.merge.mapfiles=true; +set hive.merge.mapredfiles=true; +-- merge disabled EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src; INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src; + +DESC FORMATTED orcfile_merge1b partition (ds='1', part='0'); +DESC FORMATTED orcfile_merge1b partition (ds='1', part='1'); + +set hive.merge.orcfile.stripe.level=true; +set mapred.min.split.size=1000; +set mapred.max.split.size=1000000; +set tez.grouping.min-size=1000; +set tez.grouping.max-size=1000000; +EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src; + +INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part FROM src; -DESC FORMATTED orcfile_merge1 partition (ds='1', part='50'); +DESC FORMATTED orcfile_merge1c partition (ds='1', part='0'); +DESC FORMATTED orcfile_merge1c partition (ds='1', part='1'); +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; -- Verify SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM orcfile_merge1 WHERE ds='1' ) t; -set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; - SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM orcfile_merge1b WHERE ds='1' ) t; +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1c WHERE ds='1' +) t; + DROP TABLE orcfile_merge1; DROP TABLE orcfile_merge1b; +DROP TABLE orcfile_merge1c; diff --git ql/src/test/queries/clientpositive/orc_merge5.q ql/src/test/queries/clientpositive/orc_merge5.q new file mode 100644 index 0000000..1d6f338 --- /dev/null +++ ql/src/test/queries/clientpositive/orc_merge5.q @@ -0,0 +1,61 @@ +-- SORT_QUERY_RESULTS + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; +create table orc_merge5b (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; + +load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=50000; +SET hive.optimize.index.filter=true; +set hive.merge.orcfile.stripe.level=false; +set hive.merge.tezfiles=false; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; +set hive.compute.splits.in.am=true; +set tez.grouping.min-size=1000; +set tez.grouping.max-size=50000; + +-- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; +insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; + +-- 3 files total +analyze table orc_merge5b compute statistics noscan; +desc formatted orc_merge5b; +select * from orc_merge5b; + +set hive.merge.orcfile.stripe.level=true; +set hive.merge.tezfiles=true; +set hive.merge.mapfiles=true; +set hive.merge.mapredfiles=true; + +-- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; +insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; + +-- 1 file after merging +analyze table orc_merge5b compute statistics noscan; +desc formatted orc_merge5b; +select * from orc_merge5b; + +set hive.merge.orcfile.stripe.level=false; +set hive.merge.tezfiles=false; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; +analyze table orc_merge5b compute statistics noscan; +desc formatted orc_merge5b; +select * from orc_merge5b; + +set hive.merge.orcfile.stripe.level=true; +explain alter table orc_merge5b concatenate; +alter table orc_merge5b concatenate; + +-- 1 file after merging +analyze table orc_merge5b compute statistics noscan; +desc formatted orc_merge5b; +select * from orc_merge5b; + diff --git ql/src/test/queries/clientpositive/orc_merge6.q ql/src/test/queries/clientpositive/orc_merge6.q new file mode 100644 index 0000000..0475eee --- /dev/null +++ ql/src/test/queries/clientpositive/orc_merge6.q @@ -0,0 +1,78 @@ +-- SORT_QUERY_RESULTS + +-- orc file merge tests for static partitions +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; +create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (year string, hour int) stored as orc; + +load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=50000; +SET hive.optimize.index.filter=true; +set hive.merge.orcfile.stripe.level=false; +set hive.merge.tezfiles=false; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; +set hive.compute.splits.in.am=true; +set tez.grouping.min-size=1000; +set tez.grouping.max-size=50000; + +-- 3 mappers +explain insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; +insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; +insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; + +-- 3 files total +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan; +analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan; +desc formatted orc_merge5a partition(year="2000",hour=24); +desc formatted orc_merge5a partition(year="2001",hour=24); +show partitions orc_merge5a; +select * from orc_merge5a; + +set hive.merge.orcfile.stripe.level=true; +set hive.merge.tezfiles=true; +set hive.merge.mapfiles=true; +set hive.merge.mapredfiles=true; + +-- 3 mappers +explain insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; +insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; +insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; + +-- 1 file after merging +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan; +analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan; +desc formatted orc_merge5a partition(year="2000",hour=24); +desc formatted orc_merge5a partition(year="2001",hour=24); +show partitions orc_merge5a; +select * from orc_merge5a; + +set hive.merge.orcfile.stripe.level=false; +set hive.merge.tezfiles=false; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; +insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan; +analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan; +desc formatted orc_merge5a partition(year="2000",hour=24); +desc formatted orc_merge5a partition(year="2001",hour=24); +show partitions orc_merge5a; +select * from orc_merge5a; + +set hive.merge.orcfile.stripe.level=true; +explain alter table orc_merge5a partition(year="2000",hour=24) concatenate; +alter table orc_merge5a partition(year="2000",hour=24) concatenate; +alter table orc_merge5a partition(year="2001",hour=24) concatenate; + +-- 1 file after merging +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan; +analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan; +desc formatted orc_merge5a partition(year="2000",hour=24); +desc formatted orc_merge5a partition(year="2001",hour=24); +show partitions orc_merge5a; +select * from orc_merge5a; + diff --git ql/src/test/queries/clientpositive/orc_merge7.q ql/src/test/queries/clientpositive/orc_merge7.q new file mode 100644 index 0000000..ec61c1e --- /dev/null +++ ql/src/test/queries/clientpositive/orc_merge7.q @@ -0,0 +1,82 @@ +-- SORT_QUERY_RESULTS + +-- orc merge file tests for dynamic partition case + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; +create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (st double) stored as orc; + +load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=50000; +SET hive.optimize.index.filter=true; +set hive.merge.orcfile.stripe.level=false; +set hive.merge.tezfiles=false; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; +set hive.compute.splits.in.am=true; +set tez.grouping.min-size=1000; +set tez.grouping.max-size=50000; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.optimize.sort.dynamic.partition=false; + +-- 3 mappers +explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5; +insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5; +insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5; + +-- 3 files total +analyze table orc_merge5a partition(st=80.0) compute statistics noscan; +analyze table orc_merge5a partition(st=0.8) compute statistics noscan; +desc formatted orc_merge5a partition(st=80.0); +desc formatted orc_merge5a partition(st=0.8); +show partitions orc_merge5a; +select * from orc_merge5a where userid<=13; + +set hive.merge.orcfile.stripe.level=true; +set hive.merge.tezfiles=true; +set hive.merge.mapfiles=true; +set hive.merge.mapredfiles=true; + +-- 3 mappers +explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5; +insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5; +insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5; + +-- 1 file after merging +analyze table orc_merge5a partition(st=80.0) compute statistics noscan; +analyze table orc_merge5a partition(st=0.8) compute statistics noscan; +desc formatted orc_merge5a partition(st=80.0); +desc formatted orc_merge5a partition(st=0.8); +show partitions orc_merge5a; +select * from orc_merge5a where userid<=13; + +set hive.merge.orcfile.stripe.level=false; +set hive.merge.tezfiles=false; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5; +insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5; +analyze table orc_merge5a partition(st=80.0) compute statistics noscan; +analyze table orc_merge5a partition(st=0.8) compute statistics noscan; +desc formatted orc_merge5a partition(st=80.0); +desc formatted orc_merge5a partition(st=0.8); +show partitions orc_merge5a; +select * from orc_merge5a where userid<=13; + +set hive.merge.orcfile.stripe.level=true; +explain alter table orc_merge5a partition(st=80.0) concatenate; +alter table orc_merge5a partition(st=80.0) concatenate; +alter table orc_merge5a partition(st=0.8) concatenate; + +-- 1 file after merging +analyze table orc_merge5a partition(st=80.0) compute statistics noscan; +analyze table orc_merge5a partition(st=0.8) compute statistics noscan; +desc formatted orc_merge5a partition(st=80.0); +desc formatted orc_merge5a partition(st=0.8); +show partitions orc_merge5a; +select * from orc_merge5a where userid<=13; + diff --git ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out index ea37c36..1e4c8bf 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out @@ -566,12 +566,16 @@ STAGE PLANS: Stats-Aggr Operator Stage: Stage-4 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-6 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.out index e9367ac..94970ce 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_10.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_10.q.out @@ -220,14 +220,16 @@ STAGE PLANS: #### A masked pattern was here #### Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count -1 @@ -265,14 +267,16 @@ STAGE PLANS: #### A masked pattern was here #### Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count -1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_4.q.out ql/src/test/results/clientpositive/list_bucket_dml_4.q.out index 99496d5..baa3c5d 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_4.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -575,14 +575,16 @@ STAGE PLANS: #### A masked pattern was here #### Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count -1 @@ -620,14 +622,16 @@ STAGE PLANS: #### A masked pattern was here #### Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count -1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_6.q.out ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index d5deadb..e8c24f4 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -692,14 +692,16 @@ STAGE PLANS: #### A masked pattern was here #### Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count -1 @@ -739,14 +741,16 @@ STAGE PLANS: #### A masked pattern was here #### Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count -1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_7.q.out ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index 4aea4db..14bcb9d 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -640,14 +640,16 @@ STAGE PLANS: #### A masked pattern was here #### Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count -1 @@ -687,14 +689,16 @@ STAGE PLANS: #### A masked pattern was here #### Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count -1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_9.q.out ql/src/test/results/clientpositive/list_bucket_dml_9.q.out index f94a3cc..009e5a4 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_9.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -575,14 +575,16 @@ STAGE PLANS: #### A masked pattern was here #### Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count -1 @@ -620,14 +622,16 @@ STAGE PLANS: #### A masked pattern was here #### Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count -1 diff --git ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index 0899648..f14d6ca 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -202,12 +202,16 @@ STAGE PLANS: Stats-Aggr Operator Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 0653469..8a786c8 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -176,12 +176,16 @@ STAGE PLANS: Stats-Aggr Operator Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/orc_createas1.q.out ql/src/test/results/clientpositive/orc_createas1.q.out index b0c58dd..a272112 100644 --- ql/src/test/results/clientpositive/orc_createas1.q.out +++ ql/src/test/results/clientpositive/orc_createas1.q.out @@ -111,12 +111,16 @@ STAGE PLANS: Stats-Aggr Operator Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator merge level: stripe input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator merge level: stripe input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -270,12 +274,16 @@ STAGE PLANS: Stats-Aggr Operator Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator merge level: stripe input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator merge level: stripe input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/orc_merge1.q.out ql/src/test/results/clientpositive/orc_merge1.q.out index fc3e206..b3a0352 100644 --- ql/src/test/results/clientpositive/orc_merge1.q.out +++ ql/src/test/results/clientpositive/orc_merge1.q.out @@ -6,6 +6,10 @@ PREHOOK: query: DROP TABLE orcfile_merge1b PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE orcfile_merge1b POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE orcfile_merge1c +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orcfile_merge1c +POSTHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING) PARTITIONED BY (ds STRING, part STRING) STORED AS ORC PREHOOK: type: CREATETABLE @@ -26,16 +30,26 @@ POSTHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@orcfile_merge1b -PREHOOK: query: -- Use non stipe-level merge +PREHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcfile_merge1c +POSTHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcfile_merge1c +PREHOOK: query: -- merge disabled EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src PREHOOK: type: QUERY -POSTHOOK: query: -- Use non stipe-level merge +POSTHOOK: query: -- merge disabled EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -51,26 +65,17 @@ STAGE PLANS: alias: src Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 100) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) + File Output Operator + compressed: false Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Reduce Operator Tree: - Extract - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge1 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1 Stage: Stage-0 Move Operator @@ -89,317 +94,26 @@ STAGE PLANS: Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@orcfile_merge1@ds=1 POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0 POSTHOOK: Output: default@orcfile_merge1@ds=1/part=1 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=10 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=11 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=12 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=13 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=14 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=15 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=16 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=17 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=18 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=19 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=2 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=20 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=21 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=22 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=23 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=24 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=25 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=26 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=27 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=28 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=29 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=3 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=30 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=31 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=32 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=33 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=34 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=35 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=36 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=37 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=38 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=39 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=4 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=40 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=41 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=42 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=43 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=44 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=45 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=46 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=47 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=48 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=49 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=5 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=50 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=51 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=52 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=53 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=54 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=55 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=56 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=57 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=58 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=59 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=6 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=60 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=61 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=62 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=63 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=64 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=65 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=66 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=67 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=68 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=69 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=7 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=70 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=71 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=72 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=73 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=74 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=75 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=76 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=77 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=78 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=79 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=8 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=80 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=82 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=83 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=84 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=85 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=86 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=87 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=88 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=89 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=9 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=90 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=91 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=92 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=93 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=94 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=95 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=96 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=97 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=98 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=99 POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=13).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=14).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=14).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=15).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=16).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=16).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=17).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=17).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=18).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=18).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=19).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=19).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=20).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=20).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=21).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=21).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=22).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=22).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=23).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=23).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=24).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=24).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=25).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=25).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=26).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=26).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=27).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=27).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=28).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=28).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=29).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=29).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=30).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=30).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=31).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=31).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=32).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=32).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=33).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=33).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=34).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=34).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=35).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=35).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=36).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=36).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=37).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=37).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=38).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=38).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=39).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=39).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=40).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=40).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=41).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=41).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=42).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=42).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=43).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=43).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=44).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=44).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=45).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=45).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=46).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=46).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=47).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=47).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=48).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=48).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=49).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=49).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=4).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=50).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=50).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=51).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=51).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=52).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=52).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=53).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=53).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=54).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=54).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=55).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=55).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=56).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=56).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=57).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=57).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=58).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=58).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=59).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=59).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=5).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=60).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=60).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=61).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=61).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=62).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=62).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=63).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=63).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=64).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=64).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=65).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=65).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=66).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=66).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=67).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=67).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=68).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=68).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=69).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=69).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=6).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=6).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=70).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=70).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=71).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=71).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=72).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=72).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=73).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=73).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=74).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=74).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=75).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=75).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=76).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=76).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=77).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=77).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=78).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=78).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=79).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=79).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=7).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=7).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=80).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=80).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=82).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=82).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=83).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=83).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=84).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=84).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=85).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=85).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=86).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=86).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=87).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=87).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=88).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=88).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=89).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=89).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=8).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=90).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=90).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=91).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=91).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=92).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=92).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=93).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=93).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=94).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=94).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=95).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=95).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=96).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=96).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=97).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=97).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=98).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=98).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=99).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=99).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=9).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='50') +PREHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='0') PREHOOK: type: DESCTABLE PREHOOK: Input: default@orcfile_merge1 -POSTHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='50') +POSTHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='0') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@orcfile_merge1 # col_name data_type comment @@ -414,7 +128,7 @@ ds string part string # Detailed Partition Information -Partition Value: [1, 50] +Partition Value: [1, 0] Database: default Table: orcfile_merge1 #### A masked pattern was here #### @@ -422,10 +136,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 1 - numRows 3 - rawDataSize 282 - totalSize 312 + numFiles 5 + numRows 242 + rawDataSize 22748 + totalSize 2708 #### A masked pattern was here #### # Storage Information @@ -438,20 +152,69 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: EXPLAIN +PREHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orcfile_merge1 +POSTHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orcfile_merge1 +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string +part string + +# Detailed Partition Information +Partition Value: [1, 1] +Database: default +Table: orcfile_merge1 +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 5 + numRows 258 + rawDataSize 24252 + totalSize 2799 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- merge disabled +EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- merge disabled +EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -461,26 +224,26 @@ STAGE PLANS: alias: src Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 100) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) + File Output Operator + compressed: false Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Reduce Operator Tree: - Extract - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge1b + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-0 Move Operator @@ -498,320 +261,59 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@orcfile_merge1b@ds=1 POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=0 POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=1 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=10 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=11 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=12 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=13 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=14 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=15 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=16 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=17 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=18 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=19 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=2 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=20 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=21 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=22 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=23 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=24 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=25 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=26 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=27 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=28 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=29 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=3 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=30 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=31 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=32 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=33 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=34 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=35 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=36 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=37 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=38 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=39 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=4 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=40 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=41 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=42 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=43 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=44 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=45 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=46 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=47 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=48 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=49 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=5 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=50 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=51 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=52 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=53 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=54 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=55 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=56 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=57 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=58 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=59 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=6 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=60 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=61 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=62 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=63 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=64 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=65 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=66 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=67 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=68 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=69 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=7 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=70 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=71 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=72 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=73 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=74 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=75 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=76 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=77 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=78 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=79 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=8 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=80 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=82 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=83 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=84 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=85 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=86 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=87 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=88 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=89 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=9 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=90 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=91 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=92 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=93 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=94 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=95 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=96 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=97 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=98 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=99 POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=13).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=14).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=14).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=15).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=16).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=16).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=17).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=17).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=18).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=18).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=19).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=19).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=20).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=20).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=21).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=21).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=22).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=22).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=23).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=23).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=24).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=24).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=25).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=25).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=26).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=26).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=27).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=27).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=28).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=28).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=29).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=29).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=30).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=30).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=31).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=31).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=32).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=32).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=33).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=33).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=34).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=34).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=35).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=35).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=36).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=36).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=37).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=37).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=38).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=38).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=39).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=39).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=40).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=40).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=41).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=41).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=42).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=42).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=43).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=43).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=44).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=44).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=45).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=45).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=46).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=46).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=47).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=47).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=48).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=48).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=49).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=49).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=4).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=50).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=50).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=51).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=51).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=52).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=52).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=53).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=53).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=54).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=54).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=55).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=55).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=56).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=56).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=57).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=57).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=58).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=58).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=59).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=59).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=5).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=60).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=60).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=61).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=61).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=62).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=62).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=63).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=63).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=64).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=64).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=65).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=65).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=66).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=66).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=67).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=67).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=68).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=68).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=69).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=69).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=6).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=6).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=70).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=70).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=71).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=71).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=72).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=72).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=73).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=73).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=74).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=74).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=75).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=75).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=76).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=76).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=77).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=77).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=78).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=78).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=79).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=79).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=7).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=7).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=80).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=80).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=82).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=82).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=83).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=83).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=84).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=84).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=85).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=85).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=86).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=86).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=87).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=87).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=88).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=88).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=89).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=89).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=8).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=90).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=90).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=91).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=91).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=92).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=92).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=93).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=93).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=94).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=94).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=95).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=95).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=96).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=96).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=97).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=97).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=98).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=98).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=99).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=99).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=9).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='50') +PREHOOK: query: DESC FORMATTED orcfile_merge1b partition (ds='1', part='0') PREHOOK: type: DESCTABLE -PREHOOK: Input: default@orcfile_merge1 -POSTHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='50') +PREHOOK: Input: default@orcfile_merge1b +POSTHOOK: query: DESC FORMATTED orcfile_merge1b partition (ds='1', part='0') POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@orcfile_merge1 +POSTHOOK: Input: default@orcfile_merge1b # col_name data_type comment key int @@ -824,18 +326,246 @@ ds string part string # Detailed Partition Information -Partition Value: [1, 50] +Partition Value: [1, 0] Database: default -Table: orcfile_merge1 +Table: orcfile_merge1b +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 242 + rawDataSize 22748 + totalSize 1300 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESC FORMATTED orcfile_merge1b partition (ds='1', part='1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orcfile_merge1b +POSTHOOK: query: DESC FORMATTED orcfile_merge1b partition (ds='1', part='1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orcfile_merge1b +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string +part string + +# Detailed Partition Information +Partition Value: [1, 1] +Database: default +Table: orcfile_merge1b +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 258 + rawDataSize 24252 + totalSize 1321 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1c + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + part + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1c + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcfile_merge1c@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=0 +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=1 +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED orcfile_merge1c partition (ds='1', part='0') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orcfile_merge1c +POSTHOOK: query: DESC FORMATTED orcfile_merge1c partition (ds='1', part='0') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orcfile_merge1c +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string +part string + +# Detailed Partition Information +Partition Value: [1, 0] +Database: default +Table: orcfile_merge1c #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 3 - rawDataSize 282 - totalSize 312 + numRows 242 + rawDataSize 22748 + totalSize 1299 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESC FORMATTED orcfile_merge1c partition (ds='1', part='1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orcfile_merge1c +POSTHOOK: query: DESC FORMATTED orcfile_merge1c partition (ds='1', part='1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orcfile_merge1c +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string +part string + +# Detailed Partition Information +Partition Value: [1, 1] +Database: default +Table: orcfile_merge1c +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 258 + rawDataSize 24252 + totalSize 1320 #### A masked pattern was here #### # Storage Information @@ -857,103 +587,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orcfile_merge1 PREHOOK: Input: default@orcfile_merge1@ds=1/part=0 PREHOOK: Input: default@orcfile_merge1@ds=1/part=1 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=10 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=11 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=12 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=13 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=14 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=15 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=16 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=17 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=18 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=19 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=2 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=20 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=21 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=22 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=23 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=24 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=25 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=26 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=27 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=28 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=29 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=3 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=30 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=31 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=32 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=33 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=34 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=35 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=36 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=37 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=38 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=39 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=4 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=40 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=41 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=42 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=43 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=44 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=45 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=46 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=47 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=48 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=49 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=5 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=50 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=51 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=52 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=53 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=54 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=55 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=56 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=57 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=58 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=59 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=6 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=60 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=61 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=62 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=63 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=64 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=65 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=66 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=67 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=68 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=69 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=7 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=70 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=71 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=72 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=73 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=74 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=75 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=76 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=77 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=78 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=79 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=8 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=80 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=82 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=83 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=84 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=85 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=86 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=87 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=88 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=89 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=9 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=90 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=91 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=92 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=93 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=94 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=95 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=96 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=97 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=98 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=99 #### A masked pattern was here #### POSTHOOK: query: -- Verify SELECT SUM(HASH(c)) FROM ( @@ -964,105 +597,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcfile_merge1 POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0 POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=10 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=11 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=12 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=13 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=14 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=15 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=16 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=17 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=18 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=19 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=2 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=20 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=21 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=22 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=23 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=24 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=25 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=26 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=27 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=28 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=29 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=3 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=30 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=31 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=32 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=33 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=34 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=35 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=36 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=37 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=38 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=39 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=4 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=40 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=41 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=42 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=43 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=44 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=45 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=46 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=47 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=48 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=49 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=5 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=50 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=51 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=52 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=53 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=54 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=55 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=56 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=57 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=58 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=59 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=6 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=60 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=61 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=62 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=63 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=64 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=65 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=66 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=67 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=68 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=69 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=7 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=70 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=71 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=72 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=73 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=74 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=75 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=76 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=77 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=78 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=79 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=8 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=80 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=82 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=83 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=84 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=85 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=86 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=87 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=88 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=89 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=9 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=90 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=91 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=92 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=93 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=94 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=95 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=96 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=97 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=98 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=99 #### A masked pattern was here #### -59521204047 +-21975308766 PREHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM orcfile_merge1b WHERE ds='1' @@ -1071,103 +607,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orcfile_merge1b PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0 PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=10 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=11 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=12 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=13 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=14 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=15 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=16 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=17 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=18 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=19 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=2 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=20 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=21 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=22 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=23 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=24 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=25 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=26 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=27 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=28 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=29 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=3 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=30 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=31 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=32 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=33 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=34 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=35 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=36 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=37 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=38 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=39 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=4 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=40 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=41 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=42 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=43 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=44 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=45 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=46 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=47 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=48 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=49 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=5 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=50 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=51 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=52 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=53 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=54 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=55 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=56 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=57 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=58 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=59 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=6 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=60 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=61 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=62 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=63 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=64 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=65 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=66 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=67 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=68 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=69 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=7 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=70 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=71 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=72 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=73 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=74 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=75 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=76 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=77 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=78 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=79 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=8 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=80 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=82 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=83 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=84 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=85 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=86 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=87 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=88 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=89 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=9 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=90 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=91 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=92 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=93 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=94 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=95 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=96 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=97 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=98 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=99 #### A masked pattern was here #### POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) @@ -1177,105 +616,27 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcfile_merge1b POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0 POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=10 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=11 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=12 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=13 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=14 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=15 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=16 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=17 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=18 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=19 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=2 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=20 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=21 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=22 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=23 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=24 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=25 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=26 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=27 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=28 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=29 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=3 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=30 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=31 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=32 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=33 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=34 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=35 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=36 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=37 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=38 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=39 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=4 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=40 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=41 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=42 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=43 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=44 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=45 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=46 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=47 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=48 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=49 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=5 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=50 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=51 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=52 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=53 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=54 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=55 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=56 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=57 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=58 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=59 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=6 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=60 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=61 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=62 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=63 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=64 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=65 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=66 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=67 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=68 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=69 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=7 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=70 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=71 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=72 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=73 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=74 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=75 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=76 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=77 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=78 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=79 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=8 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=80 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=82 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=83 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=84 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=85 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=86 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=87 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=88 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=89 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=9 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=90 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=91 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=92 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=93 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=94 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=95 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=96 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=97 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=98 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=99 #### A masked pattern was here #### -59521204047 +-21975308766 +PREHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1c WHERE ds='1' +) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1c +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0 +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1c WHERE ds='1' +) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge1c +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0 +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1 +#### A masked pattern was here #### +-21975308766 PREHOOK: query: DROP TABLE orcfile_merge1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@orcfile_merge1 @@ -1292,3 +653,11 @@ POSTHOOK: query: DROP TABLE orcfile_merge1b POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@orcfile_merge1b POSTHOOK: Output: default@orcfile_merge1b +PREHOOK: query: DROP TABLE orcfile_merge1c +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orcfile_merge1c +PREHOOK: Output: default@orcfile_merge1c +POSTHOOK: query: DROP TABLE orcfile_merge1c +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orcfile_merge1c +POSTHOOK: Output: default@orcfile_merge1c diff --git ql/src/test/results/clientpositive/orc_merge3.q.out ql/src/test/results/clientpositive/orc_merge3.q.out index 258f538..d711aad 100644 --- ql/src/test/results/clientpositive/orc_merge3.q.out +++ ql/src/test/results/clientpositive/orc_merge3.q.out @@ -107,12 +107,16 @@ STAGE PLANS: Stats-Aggr Operator Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator merge level: stripe input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator merge level: stripe input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/orc_merge5.q.out ql/src/test/results/clientpositive/orc_merge5.q.out new file mode 100644 index 0000000..a71edce --- /dev/null +++ ql/src/test/results/clientpositive/orc_merge5.q.out @@ -0,0 +1,460 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: create table orc_merge5b (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: create table orc_merge5b (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_merge5 + filterExpr: (userid <= 13) (type: boolean) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (userid <= 13) (type: boolean) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 3 files total +analyze table orc_merge5b compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: -- 3 files total +analyze table orc_merge5b compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: desc formatted orc_merge5b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5b +POSTHOOK: query: desc formatted orc_merge5b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5b +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from orc_merge5b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 +2 foo 0.8 1 1969-12-31 16:00:00 +5 eat 0.8 6 1969-12-31 16:00:20 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_merge5 + filterExpr: (userid <= 13) (type: boolean) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (userid <= 13) (type: boolean) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5b compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5b compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: desc formatted orc_merge5b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5b +POSTHOOK: query: desc formatted orc_merge5b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5b +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from orc_merge5b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 +2 foo 0.8 1 1969-12-31 16:00:00 +5 eat 0.8 6 1969-12-31 16:00:20 +PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: analyze table orc_merge5b compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: analyze table orc_merge5b compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: desc formatted orc_merge5b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5b +POSTHOOK: query: desc formatted orc_merge5b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5b +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from orc_merge5b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 +2 foo 0.8 1 1969-12-31 16:00:00 +5 eat 0.8 6 1969-12-31 16:00:20 +PREHOOK: query: explain alter table orc_merge5b concatenate +PREHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: query: explain alter table orc_merge5b concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-0 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: alter table orc_merge5b concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_merge5b +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: alter table orc_merge5b concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_merge5b +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5b compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5b compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: desc formatted orc_merge5b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5b +POSTHOOK: query: desc formatted orc_merge5b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5b +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from orc_merge5b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 +2 foo 0.8 1 1969-12-31 16:00:00 +5 eat 0.8 6 1969-12-31 16:00:20 diff --git ql/src/test/results/clientpositive/orc_merge6.q.out ql/src/test/results/clientpositive/orc_merge6.q.out new file mode 100644 index 0000000..69cf6f4 --- /dev/null +++ ql/src/test/results/clientpositive/orc_merge6.q.out @@ -0,0 +1,822 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- orc file merge tests for static partitions +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- orc file merge tests for static partitions +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (year string, hour int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (year string, hour int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5a +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_merge5 + filterExpr: (userid <= 13) (type: boolean) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (userid <= 13) (type: boolean) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-0 + Move Operator + tables: + partition: + hour 24 + year 2000 + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 3 files total +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: -- 3 files total +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +PREHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +PREHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2000, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2001, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +year=2000/hour=24 +year=2001/hour=24 +PREHOOK: query: select * from orc_merge5a +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@year=2000/hour=24 +PREHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 2000 24 +13 bar 80.0 2 1969-12-31 16:00:05 2001 24 +2 foo 0.8 1 1969-12-31 16:00:00 2000 24 +2 foo 0.8 1 1969-12-31 16:00:00 2001 24 +5 eat 0.8 6 1969-12-31 16:00:20 2000 24 +5 eat 0.8 6 1969-12-31 16:00:20 2001 24 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_merge5 + filterExpr: (userid <= 13) (type: boolean) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (userid <= 13) (type: boolean) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + hour 24 + year 2000 + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +PREHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +PREHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2000, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2001, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +year=2000/hour=24 +year=2001/hour=24 +PREHOOK: query: select * from orc_merge5a +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@year=2000/hour=24 +PREHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 2000 24 +13 bar 80.0 2 1969-12-31 16:00:05 2001 24 +2 foo 0.8 1 1969-12-31 16:00:00 2000 24 +2 foo 0.8 1 1969-12-31 16:00:00 2001 24 +5 eat 0.8 6 1969-12-31 16:00:20 2000 24 +5 eat 0.8 6 1969-12-31 16:00:20 2001 24 +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +PREHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +PREHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2000, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2001, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +year=2000/hour=24 +year=2001/hour=24 +PREHOOK: query: select * from orc_merge5a +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@year=2000/hour=24 +PREHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 2000 24 +13 bar 80.0 2 1969-12-31 16:00:05 2001 24 +2 foo 0.8 1 1969-12-31 16:00:00 2000 24 +2 foo 0.8 1 1969-12-31 16:00:00 2001 24 +5 eat 0.8 6 1969-12-31 16:00:20 2000 24 +5 eat 0.8 6 1969-12-31 16:00:20 2001 24 +PREHOOK: query: explain alter table orc_merge5a partition(year="2000",hour=24) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: query: explain alter table orc_merge5a partition(year="2000",hour=24) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-0 + + Stage: Stage-1 + Move Operator + tables: + partition: + hour 24 + year 2000 + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: alter table orc_merge5a partition(year="2000",hour=24) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: alter table orc_merge5a partition(year="2000",hour=24) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +PREHOOK: query: alter table orc_merge5a partition(year="2001",hour=24) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: alter table orc_merge5a partition(year="2001",hour=24) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +PREHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +PREHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2000, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2001, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +year=2000/hour=24 +year=2001/hour=24 +PREHOOK: query: select * from orc_merge5a +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@year=2000/hour=24 +PREHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 2000 24 +13 bar 80.0 2 1969-12-31 16:00:05 2001 24 +2 foo 0.8 1 1969-12-31 16:00:00 2000 24 +2 foo 0.8 1 1969-12-31 16:00:00 2001 24 +5 eat 0.8 6 1969-12-31 16:00:20 2000 24 +5 eat 0.8 6 1969-12-31 16:00:20 2001 24 diff --git ql/src/test/results/clientpositive/orc_merge7.q.out ql/src/test/results/clientpositive/orc_merge7.q.out new file mode 100644 index 0000000..f6058fe --- /dev/null +++ ql/src/test/results/clientpositive/orc_merge7.q.out @@ -0,0 +1,925 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- orc merge file tests for dynamic partition case + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- orc merge file tests for dynamic partition case + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (st double) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (st double) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5a +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_merge5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp), subtype (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-0 + Move Operator + tables: + partition: + st + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 3 files total +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: -- 3 files total +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: desc formatted orc_merge5a partition(st=80.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=80.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [80.0] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1 + rawDataSize 255 + totalSize 521 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(st=0.8) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=0.8) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [0.8] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 2 + numRows 2 + rawDataSize 510 + totalSize 1058 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +st=0.8 +st=1.8 +st=8.0 +st=80.0 +PREHOOK: query: select * from orc_merge5a where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@st=0.8 +PREHOOK: Input: default@orc_merge5a@st=1.8 +PREHOOK: Input: default@orc_merge5a@st=8.0 +PREHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@st=0.8 +POSTHOOK: Input: default@orc_merge5a@st=1.8 +POSTHOOK: Input: default@orc_merge5a@st=8.0 +POSTHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: orc_merge5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp), subtype (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + st + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: desc formatted orc_merge5a partition(st=80.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=80.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [80.0] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1 + rawDataSize 255 + totalSize 521 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(st=0.8) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=0.8) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [0.8] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 2 + rawDataSize 510 + totalSize 852 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +st=0.8 +st=1.8 +st=8.0 +st=80.0 +PREHOOK: query: select * from orc_merge5a where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@st=0.8 +PREHOOK: Input: default@orc_merge5a@st=1.8 +PREHOOK: Input: default@orc_merge5a@st=8.0 +PREHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@st=0.8 +POSTHOOK: Input: default@orc_merge5a@st=1.8 +POSTHOOK: Input: default@orc_merge5a@st=8.0 +POSTHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: analyze table orc_merge5a partition(st=80.0) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: analyze table orc_merge5a partition(st=80.0) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: desc formatted orc_merge5a partition(st=80.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=80.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [80.0] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1 + rawDataSize 255 + totalSize 521 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(st=0.8) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=0.8) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [0.8] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 2 + numRows 2 + rawDataSize 510 + totalSize 1058 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +st=0.8 +st=1.8 +st=8.0 +st=80.0 +PREHOOK: query: select * from orc_merge5a where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@st=0.8 +PREHOOK: Input: default@orc_merge5a@st=1.8 +PREHOOK: Input: default@orc_merge5a@st=8.0 +PREHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@st=0.8 +POSTHOOK: Input: default@orc_merge5a@st=1.8 +POSTHOOK: Input: default@orc_merge5a@st=8.0 +POSTHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +PREHOOK: query: explain alter table orc_merge5a partition(st=80.0) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: query: explain alter table orc_merge5a partition(st=80.0) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-0 + + Stage: Stage-1 + Move Operator + tables: + partition: + st 80.0 + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: alter table orc_merge5a partition(st=0.8) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: alter table orc_merge5a partition(st=0.8) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: desc formatted orc_merge5a partition(st=80.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=80.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [80.0] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1 + rawDataSize 255 + totalSize 521 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(st=0.8) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=0.8) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [0.8] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 2 + rawDataSize 510 + totalSize 852 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +st=0.8 +st=1.8 +st=8.0 +st=80.0 +PREHOOK: query: select * from orc_merge5a where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@st=0.8 +PREHOOK: Input: default@orc_merge5a@st=1.8 +PREHOOK: Input: default@orc_merge5a@st=8.0 +PREHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@st=0.8 +POSTHOOK: Input: default@orc_merge5a@st=1.8 +POSTHOOK: Input: default@orc_merge5a@st=8.0 +POSTHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 diff --git ql/src/test/results/clientpositive/rcfile_createas1.q.out ql/src/test/results/clientpositive/rcfile_createas1.q.out index cdfa036..97eaa1a 100644 --- ql/src/test/results/clientpositive/rcfile_createas1.q.out +++ ql/src/test/results/clientpositive/rcfile_createas1.q.out @@ -111,12 +111,16 @@ STAGE PLANS: Stats-Aggr Operator Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/rcfile_merge1.q.out ql/src/test/results/clientpositive/rcfile_merge1.q.out index ac6a2bd..0795630 100644 --- ql/src/test/results/clientpositive/rcfile_merge1.q.out +++ ql/src/test/results/clientpositive/rcfile_merge1.q.out @@ -497,12 +497,16 @@ STAGE PLANS: Stats-Aggr Operator Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/rcfile_merge2.q.out ql/src/test/results/clientpositive/rcfile_merge2.q.out index d8a61f3..db603cc 100644 --- ql/src/test/results/clientpositive/rcfile_merge2.q.out +++ ql/src/test/results/clientpositive/rcfile_merge2.q.out @@ -81,12 +81,16 @@ STAGE PLANS: Stats-Aggr Operator Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/rcfile_merge3.q.out ql/src/test/results/clientpositive/rcfile_merge3.q.out index 5c717b5..1566ec3 100644 --- ql/src/test/results/clientpositive/rcfile_merge3.q.out +++ ql/src/test/results/clientpositive/rcfile_merge3.q.out @@ -107,12 +107,16 @@ STAGE PLANS: Stats-Aggr Operator Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/tez/orc_merge1.q.out ql/src/test/results/clientpositive/tez/orc_merge1.q.out index f79f878..4be7528 100644 --- ql/src/test/results/clientpositive/tez/orc_merge1.q.out +++ ql/src/test/results/clientpositive/tez/orc_merge1.q.out @@ -6,6 +6,10 @@ PREHOOK: query: DROP TABLE orcfile_merge1b PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE orcfile_merge1b POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE orcfile_merge1c +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orcfile_merge1c +POSTHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING) PARTITIONED BY (ds STRING, part STRING) STORED AS ORC PREHOOK: type: CREATETABLE @@ -26,16 +30,26 @@ POSTHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@orcfile_merge1b -PREHOOK: query: -- Use non stipe-level merge +PREHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcfile_merge1c +POSTHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING) + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcfile_merge1c +PREHOOK: query: -- merge disabled EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src PREHOOK: type: QUERY -POSTHOOK: query: -- Use non stipe-level merge +POSTHOOK: query: -- merge disabled EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -47,8 +61,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -57,27 +69,17 @@ STAGE PLANS: alias: src Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 100) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) + File Output Operator + compressed: false Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Reducer 2 - Reduce Operator Tree: - Extract - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge1 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1 Stage: Stage-2 Dependency Collection @@ -99,317 +101,26 @@ STAGE PLANS: Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@orcfile_merge1@ds=1 POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0 POSTHOOK: Output: default@orcfile_merge1@ds=1/part=1 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=10 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=11 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=12 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=13 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=14 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=15 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=16 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=17 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=18 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=19 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=2 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=20 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=21 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=22 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=23 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=24 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=25 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=26 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=27 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=28 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=29 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=3 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=30 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=31 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=32 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=33 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=34 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=35 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=36 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=37 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=38 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=39 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=4 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=40 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=41 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=42 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=43 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=44 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=45 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=46 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=47 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=48 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=49 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=5 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=50 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=51 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=52 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=53 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=54 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=55 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=56 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=57 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=58 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=59 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=6 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=60 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=61 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=62 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=63 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=64 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=65 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=66 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=67 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=68 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=69 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=7 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=70 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=71 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=72 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=73 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=74 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=75 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=76 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=77 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=78 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=79 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=8 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=80 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=82 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=83 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=84 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=85 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=86 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=87 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=88 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=89 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=9 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=90 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=91 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=92 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=93 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=94 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=95 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=96 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=97 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=98 -POSTHOOK: Output: default@orcfile_merge1@ds=1/part=99 POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=13).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=14).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=14).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=15).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=16).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=16).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=17).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=17).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=18).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=18).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=19).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=19).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=20).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=20).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=21).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=21).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=22).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=22).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=23).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=23).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=24).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=24).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=25).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=25).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=26).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=26).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=27).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=27).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=28).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=28).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=29).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=29).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=30).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=30).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=31).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=31).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=32).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=32).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=33).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=33).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=34).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=34).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=35).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=35).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=36).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=36).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=37).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=37).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=38).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=38).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=39).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=39).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=40).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=40).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=41).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=41).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=42).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=42).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=43).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=43).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=44).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=44).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=45).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=45).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=46).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=46).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=47).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=47).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=48).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=48).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=49).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=49).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=4).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=50).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=50).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=51).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=51).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=52).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=52).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=53).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=53).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=54).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=54).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=55).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=55).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=56).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=56).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=57).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=57).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=58).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=58).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=59).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=59).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=5).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=60).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=60).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=61).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=61).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=62).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=62).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=63).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=63).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=64).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=64).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=65).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=65).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=66).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=66).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=67).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=67).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=68).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=68).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=69).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=69).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=6).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=6).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=70).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=70).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=71).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=71).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=72).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=72).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=73).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=73).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=74).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=74).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=75).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=75).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=76).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=76).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=77).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=77).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=78).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=78).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=79).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=79).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=7).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=7).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=80).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=80).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=82).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=82).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=83).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=83).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=84).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=84).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=85).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=85).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=86).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=86).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=87).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=87).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=88).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=88).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=89).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=89).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=8).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=90).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=90).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=91).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=91).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=92).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=92).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=93).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=93).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=94).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=94).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=95).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=95).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=96).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=96).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=97).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=97).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=98).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=98).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=99).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=99).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=9).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='50') +PREHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='0') PREHOOK: type: DESCTABLE PREHOOK: Input: default@orcfile_merge1 -POSTHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='50') +POSTHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='0') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@orcfile_merge1 # col_name data_type comment @@ -424,7 +135,7 @@ ds string part string # Detailed Partition Information -Partition Value: [1, 50] +Partition Value: [1, 0] Database: default Table: orcfile_merge1 #### A masked pattern was here #### @@ -432,10 +143,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 1 - numRows 3 - rawDataSize 282 - totalSize 312 + numFiles 6 + numRows 242 + rawDataSize 22748 + totalSize 3030 #### A masked pattern was here #### # Storage Information @@ -448,27 +159,74 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: EXPLAIN +PREHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orcfile_merge1 +POSTHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orcfile_merge1 +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string +part string + +# Detailed Partition Information +Partition Value: [1, 1] +Database: default +Table: orcfile_merge1 +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 6 + numRows 258 + rawDataSize 24252 + totalSize 3138 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- merge disabled +EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- merge disabled +EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -477,27 +235,26 @@ STAGE PLANS: alias: src Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 100) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) + File Output Operator + compressed: false Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Reducer 2 - Reduce Operator Tree: - Extract - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orcfile_merge1b + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-2 Dependency Collection @@ -518,320 +275,65 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Tez +#### A masked pattern was here #### + Vertices: + Tez Merge File Work + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b + + Stage: Stage-6 + Tez +#### A masked pattern was here #### + Vertices: + Tez Merge File Work + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1b + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@orcfile_merge1b@ds=1 POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part) - SELECT key, value, PMOD(HASH(key), 100) as part + SELECT key, value, PMOD(HASH(key), 2) as part FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=0 POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=1 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=10 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=11 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=12 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=13 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=14 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=15 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=16 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=17 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=18 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=19 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=2 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=20 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=21 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=22 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=23 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=24 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=25 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=26 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=27 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=28 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=29 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=3 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=30 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=31 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=32 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=33 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=34 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=35 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=36 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=37 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=38 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=39 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=4 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=40 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=41 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=42 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=43 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=44 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=45 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=46 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=47 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=48 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=49 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=5 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=50 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=51 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=52 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=53 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=54 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=55 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=56 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=57 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=58 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=59 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=6 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=60 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=61 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=62 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=63 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=64 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=65 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=66 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=67 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=68 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=69 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=7 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=70 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=71 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=72 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=73 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=74 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=75 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=76 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=77 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=78 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=79 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=8 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=80 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=82 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=83 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=84 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=85 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=86 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=87 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=88 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=89 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=9 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=90 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=91 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=92 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=93 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=94 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=95 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=96 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=97 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=98 -POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=99 POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=13).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=14).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=14).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=15).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=16).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=16).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=17).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=17).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=18).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=18).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=19).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=19).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=20).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=20).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=21).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=21).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=22).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=22).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=23).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=23).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=24).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=24).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=25).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=25).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=26).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=26).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=27).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=27).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=28).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=28).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=29).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=29).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=30).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=30).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=31).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=31).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=32).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=32).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=33).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=33).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=34).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=34).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=35).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=35).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=36).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=36).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=37).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=37).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=38).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=38).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=39).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=39).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=40).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=40).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=41).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=41).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=42).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=42).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=43).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=43).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=44).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=44).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=45).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=45).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=46).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=46).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=47).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=47).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=48).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=48).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=49).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=49).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=4).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=50).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=50).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=51).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=51).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=52).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=52).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=53).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=53).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=54).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=54).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=55).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=55).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=56).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=56).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=57).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=57).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=58).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=58).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=59).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=59).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=5).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=60).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=60).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=61).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=61).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=62).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=62).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=63).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=63).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=64).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=64).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=65).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=65).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=66).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=66).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=67).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=67).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=68).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=68).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=69).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=69).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=6).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=6).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=70).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=70).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=71).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=71).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=72).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=72).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=73).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=73).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=74).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=74).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=75).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=75).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=76).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=76).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=77).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=77).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=78).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=78).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=79).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=79).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=7).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=7).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=80).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=80).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=82).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=82).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=83).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=83).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=84).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=84).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=85).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=85).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=86).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=86).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=87).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=87).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=88).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=88).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=89).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=89).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=8).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=90).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=90).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=91).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=91).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=92).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=92).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=93).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=93).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=94).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=94).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=95).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=95).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=96).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=96).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=97).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=97).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=98).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=98).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=99).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=99).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=9).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='50') +PREHOOK: query: DESC FORMATTED orcfile_merge1b partition (ds='1', part='0') PREHOOK: type: DESCTABLE -PREHOOK: Input: default@orcfile_merge1 -POSTHOOK: query: DESC FORMATTED orcfile_merge1 partition (ds='1', part='50') +PREHOOK: Input: default@orcfile_merge1b +POSTHOOK: query: DESC FORMATTED orcfile_merge1b partition (ds='1', part='0') POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@orcfile_merge1 +POSTHOOK: Input: default@orcfile_merge1b # col_name data_type comment key int @@ -844,18 +346,261 @@ ds string part string # Detailed Partition Information -Partition Value: [1, 50] +Partition Value: [1, 0] Database: default -Table: orcfile_merge1 +Table: orcfile_merge1b +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 242 + rawDataSize 22748 + totalSize 1298 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESC FORMATTED orcfile_merge1b partition (ds='1', part='1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orcfile_merge1b +POSTHOOK: query: DESC FORMATTED orcfile_merge1b partition (ds='1', part='1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orcfile_merge1b +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string +part string + +# Detailed Partition Information +Partition Value: [1, 1] +Database: default +Table: orcfile_merge1b #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 - numRows 3 - rawDataSize 282 - totalSize 312 + numRows 258 + rawDataSize 24252 + totalSize 1322 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1c + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + part + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orcfile_merge1c + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Tez +#### A masked pattern was here #### + Vertices: + Tez Merge File Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Tez +#### A masked pattern was here #### + Vertices: + Tez Merge File Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcfile_merge1c@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part) + SELECT key, value, PMOD(HASH(key), 2) as part + FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=0 +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=1 +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED orcfile_merge1c partition (ds='1', part='0') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orcfile_merge1c +POSTHOOK: query: DESC FORMATTED orcfile_merge1c partition (ds='1', part='0') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orcfile_merge1c +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string +part string + +# Detailed Partition Information +Partition Value: [1, 0] +Database: default +Table: orcfile_merge1c +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 242 + rawDataSize 22748 + totalSize 2384 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DESC FORMATTED orcfile_merge1c partition (ds='1', part='1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orcfile_merge1c +POSTHOOK: query: DESC FORMATTED orcfile_merge1c partition (ds='1', part='1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orcfile_merge1c +# col_name data_type comment + +key int +value string + +# Partition Information +# col_name data_type comment + +ds string +part string + +# Detailed Partition Information +Partition Value: [1, 1] +Database: default +Table: orcfile_merge1c +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 258 + rawDataSize 24252 + totalSize 2472 #### A masked pattern was here #### # Storage Information @@ -877,103 +622,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orcfile_merge1 PREHOOK: Input: default@orcfile_merge1@ds=1/part=0 PREHOOK: Input: default@orcfile_merge1@ds=1/part=1 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=10 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=11 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=12 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=13 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=14 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=15 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=16 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=17 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=18 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=19 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=2 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=20 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=21 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=22 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=23 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=24 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=25 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=26 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=27 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=28 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=29 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=3 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=30 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=31 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=32 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=33 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=34 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=35 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=36 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=37 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=38 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=39 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=4 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=40 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=41 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=42 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=43 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=44 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=45 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=46 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=47 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=48 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=49 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=5 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=50 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=51 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=52 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=53 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=54 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=55 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=56 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=57 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=58 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=59 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=6 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=60 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=61 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=62 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=63 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=64 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=65 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=66 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=67 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=68 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=69 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=7 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=70 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=71 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=72 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=73 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=74 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=75 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=76 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=77 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=78 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=79 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=8 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=80 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=82 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=83 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=84 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=85 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=86 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=87 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=88 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=89 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=9 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=90 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=91 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=92 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=93 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=94 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=95 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=96 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=97 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=98 -PREHOOK: Input: default@orcfile_merge1@ds=1/part=99 #### A masked pattern was here #### POSTHOOK: query: -- Verify SELECT SUM(HASH(c)) FROM ( @@ -984,105 +632,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcfile_merge1 POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0 POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=10 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=11 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=12 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=13 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=14 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=15 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=16 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=17 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=18 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=19 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=2 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=20 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=21 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=22 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=23 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=24 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=25 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=26 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=27 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=28 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=29 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=3 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=30 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=31 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=32 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=33 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=34 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=35 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=36 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=37 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=38 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=39 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=4 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=40 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=41 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=42 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=43 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=44 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=45 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=46 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=47 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=48 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=49 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=5 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=50 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=51 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=52 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=53 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=54 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=55 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=56 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=57 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=58 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=59 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=6 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=60 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=61 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=62 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=63 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=64 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=65 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=66 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=67 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=68 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=69 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=7 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=70 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=71 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=72 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=73 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=74 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=75 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=76 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=77 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=78 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=79 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=8 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=80 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=82 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=83 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=84 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=85 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=86 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=87 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=88 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=89 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=9 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=90 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=91 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=92 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=93 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=94 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=95 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=96 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=97 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=98 -POSTHOOK: Input: default@orcfile_merge1@ds=1/part=99 #### A masked pattern was here #### -59521204047 +-21975308766 PREHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM orcfile_merge1b WHERE ds='1' @@ -1091,103 +642,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orcfile_merge1b PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0 PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=10 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=11 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=12 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=13 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=14 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=15 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=16 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=17 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=18 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=19 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=2 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=20 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=21 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=22 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=23 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=24 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=25 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=26 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=27 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=28 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=29 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=3 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=30 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=31 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=32 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=33 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=34 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=35 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=36 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=37 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=38 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=39 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=4 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=40 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=41 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=42 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=43 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=44 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=45 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=46 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=47 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=48 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=49 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=5 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=50 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=51 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=52 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=53 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=54 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=55 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=56 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=57 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=58 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=59 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=6 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=60 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=61 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=62 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=63 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=64 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=65 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=66 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=67 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=68 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=69 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=7 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=70 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=71 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=72 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=73 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=74 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=75 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=76 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=77 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=78 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=79 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=8 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=80 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=82 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=83 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=84 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=85 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=86 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=87 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=88 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=89 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=9 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=90 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=91 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=92 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=93 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=94 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=95 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=96 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=97 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=98 -PREHOOK: Input: default@orcfile_merge1b@ds=1/part=99 #### A masked pattern was here #### POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) @@ -1197,105 +651,27 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orcfile_merge1b POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0 POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=10 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=11 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=12 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=13 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=14 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=15 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=16 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=17 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=18 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=19 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=2 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=20 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=21 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=22 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=23 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=24 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=25 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=26 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=27 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=28 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=29 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=3 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=30 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=31 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=32 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=33 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=34 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=35 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=36 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=37 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=38 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=39 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=4 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=40 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=41 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=42 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=43 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=44 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=45 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=46 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=47 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=48 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=49 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=5 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=50 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=51 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=52 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=53 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=54 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=55 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=56 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=57 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=58 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=59 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=6 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=60 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=61 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=62 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=63 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=64 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=65 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=66 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=67 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=68 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=69 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=7 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=70 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=71 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=72 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=73 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=74 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=75 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=76 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=77 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=78 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=79 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=8 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=80 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=82 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=83 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=84 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=85 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=86 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=87 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=88 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=89 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=9 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=90 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=91 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=92 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=93 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=94 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=95 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=96 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=97 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=98 -POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=99 #### A masked pattern was here #### -59521204047 +-21975308766 +PREHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1c WHERE ds='1' +) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orcfile_merge1c +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0 +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(*) USING 'tr \t _' AS (c) + FROM orcfile_merge1c WHERE ds='1' +) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcfile_merge1c +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0 +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1 +#### A masked pattern was here #### +-21975308766 PREHOOK: query: DROP TABLE orcfile_merge1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@orcfile_merge1 @@ -1312,3 +688,11 @@ POSTHOOK: query: DROP TABLE orcfile_merge1b POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@orcfile_merge1b POSTHOOK: Output: default@orcfile_merge1b +PREHOOK: query: DROP TABLE orcfile_merge1c +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orcfile_merge1c +PREHOOK: Output: default@orcfile_merge1c +POSTHOOK: query: DROP TABLE orcfile_merge1c +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orcfile_merge1c +POSTHOOK: Output: default@orcfile_merge1c diff --git ql/src/test/results/clientpositive/tez/orc_merge5.q.out ql/src/test/results/clientpositive/tez/orc_merge5.q.out new file mode 100644 index 0000000..27e3b31 --- /dev/null +++ ql/src/test/results/clientpositive/tez/orc_merge5.q.out @@ -0,0 +1,482 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: create table orc_merge5b (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: create table orc_merge5b (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_merge5 + filterExpr: (userid <= 13) (type: boolean) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (userid <= 13) (type: boolean) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 3 files total +analyze table orc_merge5b compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: -- 3 files total +analyze table orc_merge5b compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: desc formatted orc_merge5b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5b +POSTHOOK: query: desc formatted orc_merge5b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5b +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from orc_merge5b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 +2 foo 0.8 1 1969-12-31 16:00:00 +5 eat 0.8 6 1969-12-31 16:00:20 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_merge5 + filterExpr: (userid <= 13) (type: boolean) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (userid <= 13) (type: boolean) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Tez +#### A masked pattern was here #### + Vertices: + Tez Merge File Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Tez +#### A masked pattern was here #### + Vertices: + Tez Merge File Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5b compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5b compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: desc formatted orc_merge5b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5b +POSTHOOK: query: desc formatted orc_merge5b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5b +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from orc_merge5b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 +2 foo 0.8 1 1969-12-31 16:00:00 +5 eat 0.8 6 1969-12-31 16:00:20 +PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: analyze table orc_merge5b compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: analyze table orc_merge5b compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: desc formatted orc_merge5b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5b +POSTHOOK: query: desc formatted orc_merge5b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5b +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from orc_merge5b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 +2 foo 0.8 1 1969-12-31 16:00:00 +5 eat 0.8 6 1969-12-31 16:00:20 +PREHOOK: query: explain alter table orc_merge5b concatenate +PREHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: query: explain alter table orc_merge5b concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-0 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: alter table orc_merge5b concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_merge5b +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: alter table orc_merge5b concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_merge5b +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5b compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5b compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: desc formatted orc_merge5b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5b +POSTHOOK: query: desc formatted orc_merge5b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5b +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from orc_merge5b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 +2 foo 0.8 1 1969-12-31 16:00:00 +5 eat 0.8 6 1969-12-31 16:00:20 diff --git ql/src/test/results/clientpositive/tez/orc_merge6.q.out ql/src/test/results/clientpositive/tez/orc_merge6.q.out new file mode 100644 index 0000000..575564e --- /dev/null +++ ql/src/test/results/clientpositive/tez/orc_merge6.q.out @@ -0,0 +1,844 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- orc file merge tests for static partitions +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- orc file merge tests for static partitions +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (year string, hour int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (year string, hour int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5a +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_merge5 + filterExpr: (userid <= 13) (type: boolean) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (userid <= 13) (type: boolean) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + hour 24 + year 2000 + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 3 files total +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: -- 3 files total +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +PREHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +PREHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2000, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2001, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +year=2000/hour=24 +year=2001/hour=24 +PREHOOK: query: select * from orc_merge5a +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@year=2000/hour=24 +PREHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 2000 24 +13 bar 80.0 2 1969-12-31 16:00:05 2001 24 +2 foo 0.8 1 1969-12-31 16:00:00 2000 24 +2 foo 0.8 1 1969-12-31 16:00:00 2001 24 +5 eat 0.8 6 1969-12-31 16:00:20 2000 24 +5 eat 0.8 6 1969-12-31 16:00:20 2001 24 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_merge5 + filterExpr: (userid <= 13) (type: boolean) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (userid <= 13) (type: boolean) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + hour 24 + year 2000 + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Tez +#### A masked pattern was here #### + Vertices: + Tez Merge File Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Tez +#### A masked pattern was here #### + Vertices: + Tez Merge File Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +PREHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +PREHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2000, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2001, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +year=2000/hour=24 +year=2001/hour=24 +PREHOOK: query: select * from orc_merge5a +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@year=2000/hour=24 +PREHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 2000 24 +13 bar 80.0 2 1969-12-31 16:00:05 2001 24 +2 foo 0.8 1 1969-12-31 16:00:00 2000 24 +2 foo 0.8 1 1969-12-31 16:00:00 2001 24 +5 eat 0.8 6 1969-12-31 16:00:20 2000 24 +5 eat 0.8 6 1969-12-31 16:00:20 2001 24 +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2000,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: insert overwrite table orc_merge5a partition (year="2001",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(year=2001,hour=24).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +PREHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +PREHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2000, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2001, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 3 + rawDataSize 765 + totalSize 1141 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +year=2000/hour=24 +year=2001/hour=24 +PREHOOK: query: select * from orc_merge5a +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@year=2000/hour=24 +PREHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 2000 24 +13 bar 80.0 2 1969-12-31 16:00:05 2001 24 +2 foo 0.8 1 1969-12-31 16:00:00 2000 24 +2 foo 0.8 1 1969-12-31 16:00:00 2001 24 +5 eat 0.8 6 1969-12-31 16:00:20 2000 24 +5 eat 0.8 6 1969-12-31 16:00:20 2001 24 +PREHOOK: query: explain alter table orc_merge5a partition(year="2000",hour=24) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: query: explain alter table orc_merge5a partition(year="2000",hour=24) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-0 + + Stage: Stage-1 + Move Operator + tables: + partition: + hour 24 + year 2000 + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: alter table orc_merge5a partition(year="2000",hour=24) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: alter table orc_merge5a partition(year="2000",hour=24) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +PREHOOK: query: alter table orc_merge5a partition(year="2001",hour=24) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: alter table orc_merge5a partition(year="2001",hour=24) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(year="2000",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2000/hour=24 +PREHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@year=2001/hour=24 +POSTHOOK: query: analyze table orc_merge5a partition(year="2001",hour=24) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@year=2001/hour=24 +PREHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2000",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2000, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(year="2001",hour=24) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +year string +hour int + +# Detailed Partition Information +Partition Value: [2001, 24] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 3 + rawDataSize 765 + totalSize 907 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +year=2000/hour=24 +year=2001/hour=24 +PREHOOK: query: select * from orc_merge5a +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@year=2000/hour=24 +PREHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@year=2000/hour=24 +POSTHOOK: Input: default@orc_merge5a@year=2001/hour=24 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 2000 24 +13 bar 80.0 2 1969-12-31 16:00:05 2001 24 +2 foo 0.8 1 1969-12-31 16:00:00 2000 24 +2 foo 0.8 1 1969-12-31 16:00:00 2001 24 +5 eat 0.8 6 1969-12-31 16:00:20 2000 24 +5 eat 0.8 6 1969-12-31 16:00:20 2001 24 diff --git ql/src/test/results/clientpositive/tez/orc_merge7.q.out ql/src/test/results/clientpositive/tez/orc_merge7.q.out new file mode 100644 index 0000000..011f279 --- /dev/null +++ ql/src/test/results/clientpositive/tez/orc_merge7.q.out @@ -0,0 +1,947 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- orc merge file tests for dynamic partition case + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- orc merge file tests for dynamic partition case + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (st double) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (st double) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5a +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_merge5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp), subtype (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + st + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 3 files total +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: -- 3 files total +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: desc formatted orc_merge5a partition(st=80.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=80.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [80.0] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1 + rawDataSize 255 + totalSize 521 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(st=0.8) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=0.8) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [0.8] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 2 + numRows 2 + rawDataSize 510 + totalSize 1058 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +st=0.8 +st=1.8 +st=8.0 +st=80.0 +PREHOOK: query: select * from orc_merge5a where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@st=0.8 +PREHOOK: Input: default@orc_merge5a@st=1.8 +PREHOOK: Input: default@orc_merge5a@st=8.0 +PREHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@st=0.8 +POSTHOOK: Input: default@orc_merge5a@st=1.8 +POSTHOOK: Input: default@orc_merge5a@st=8.0 +POSTHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_merge5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp), subtype (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + st + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Tez +#### A masked pattern was here #### + Vertices: + Tez Merge File Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-6 + Tez +#### A masked pattern was here #### + Vertices: + Tez Merge File Work + Merge File Operator + Map Operator Tree: + ORC File Merge Operator + merge level: stripe + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: desc formatted orc_merge5a partition(st=80.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=80.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [80.0] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1 + rawDataSize 255 + totalSize 521 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(st=0.8) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=0.8) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [0.8] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 2 + rawDataSize 510 + totalSize 852 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +st=0.8 +st=1.8 +st=8.0 +st=80.0 +PREHOOK: query: select * from orc_merge5a where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@st=0.8 +PREHOOK: Input: default@orc_merge5a@st=1.8 +PREHOOK: Input: default@orc_merge5a@st=8.0 +PREHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@st=0.8 +POSTHOOK: Input: default@orc_merge5a@st=1.8 +POSTHOOK: Input: default@orc_merge5a@st=8.0 +POSTHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: analyze table orc_merge5a partition(st=80.0) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: analyze table orc_merge5a partition(st=80.0) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: desc formatted orc_merge5a partition(st=80.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=80.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [80.0] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1 + rawDataSize 255 + totalSize 521 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(st=0.8) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=0.8) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [0.8] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 2 + numRows 2 + rawDataSize 510 + totalSize 1058 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +st=0.8 +st=1.8 +st=8.0 +st=80.0 +PREHOOK: query: select * from orc_merge5a where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@st=0.8 +PREHOOK: Input: default@orc_merge5a@st=1.8 +PREHOOK: Input: default@orc_merge5a@st=8.0 +PREHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@st=0.8 +POSTHOOK: Input: default@orc_merge5a@st=1.8 +POSTHOOK: Input: default@orc_merge5a@st=8.0 +POSTHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +PREHOOK: query: explain alter table orc_merge5a partition(st=80.0) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: query: explain alter table orc_merge5a partition(st=80.0) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-0 + + Stage: Stage-1 + Move Operator + tables: + partition: + st 80.0 + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: alter table orc_merge5a partition(st=0.8) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: alter table orc_merge5a partition(st=0.8) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: -- 1 file after merging +analyze table orc_merge5a partition(st=80.0) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: desc formatted orc_merge5a partition(st=80.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=80.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [80.0] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 1 + rawDataSize 255 + totalSize 521 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(st=0.8) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=0.8) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [0.8] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows 2 + rawDataSize 510 + totalSize 852 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +st=0.8 +st=1.8 +st=8.0 +st=80.0 +PREHOOK: query: select * from orc_merge5a where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@st=0.8 +PREHOOK: Input: default@orc_merge5a@st=1.8 +PREHOOK: Input: default@orc_merge5a@st=8.0 +PREHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@st=0.8 +POSTHOOK: Input: default@orc_merge5a@st=1.8 +POSTHOOK: Input: default@orc_merge5a@st=8.0 +POSTHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 diff --git ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out new file mode 100644 index 0000000..2bf4634 --- /dev/null +++ ql/src/test/results/clientpositive/tez/orc_merge_incompat1.q.out @@ -0,0 +1,295 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: create table orc_merge5b (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: create table orc_merge5b (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 mappers +explain insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_merge5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (userid <= 13) (type: boolean) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5b + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert into table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert into table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert into table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert into table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert into table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert into table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert into table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: insert into table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5b +POSTHOOK: Lineage: orc_merge5b.decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5b.string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5b.userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: -- 5 files total +analyze table orc_merge5b compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: -- 5 files total +analyze table orc_merge5b compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: desc formatted orc_merge5b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5b +POSTHOOK: query: desc formatted orc_merge5b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5b +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 5 + numRows 15 + rawDataSize 3825 + totalSize 2862 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from orc_merge5b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 +13 bar 80.0 2 1969-12-31 16:00:05 +13 bar 80.0 2 1969-12-31 16:00:05 +13 bar 80.0 2 1969-12-31 16:00:05 +13 bar 80.0 2 1969-12-31 16:00:05 +2 foo 0.8 1 1969-12-31 16:00:00 +2 foo 0.8 1 1969-12-31 16:00:00 +2 foo 0.8 1 1969-12-31 16:00:00 +2 foo 0.8 1 1969-12-31 16:00:00 +2 foo 0.8 1 1969-12-31 16:00:00 +5 eat 0.8 6 1969-12-31 16:00:20 +5 eat 0.8 6 1969-12-31 16:00:20 +5 eat 0.8 6 1969-12-31 16:00:20 +5 eat 0.8 6 1969-12-31 16:00:20 +5 eat 0.8 6 1969-12-31 16:00:20 +PREHOOK: query: alter table orc_merge5b concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_merge5b +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: alter table orc_merge5b concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_merge5b +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: -- 3 file after merging - all 0.12 format files will be merged and 0.11 files will be left behind +analyze table orc_merge5b compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5b +POSTHOOK: query: -- 3 file after merging - all 0.12 format files will be merged and 0.11 files will be left behind +analyze table orc_merge5b compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5b +PREHOOK: query: desc formatted orc_merge5b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5b +POSTHOOK: query: desc formatted orc_merge5b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5b +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 15 + rawDataSize 3825 + totalSize 2325 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select * from orc_merge5b +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5b +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 +13 bar 80.0 2 1969-12-31 16:00:05 +13 bar 80.0 2 1969-12-31 16:00:05 +13 bar 80.0 2 1969-12-31 16:00:05 +13 bar 80.0 2 1969-12-31 16:00:05 +2 foo 0.8 1 1969-12-31 16:00:00 +2 foo 0.8 1 1969-12-31 16:00:00 +2 foo 0.8 1 1969-12-31 16:00:00 +2 foo 0.8 1 1969-12-31 16:00:00 +2 foo 0.8 1 1969-12-31 16:00:00 +5 eat 0.8 6 1969-12-31 16:00:20 +5 eat 0.8 6 1969-12-31 16:00:20 +5 eat 0.8 6 1969-12-31 16:00:20 +5 eat 0.8 6 1969-12-31 16:00:20 +5 eat 0.8 6 1969-12-31 16:00:20 diff --git ql/src/test/results/clientpositive/tez/orc_merge_incompat2.q.out ql/src/test/results/clientpositive/tez/orc_merge_incompat2.q.out new file mode 100644 index 0000000..4d21749 --- /dev/null +++ ql/src/test/results/clientpositive/tez/orc_merge_incompat2.q.out @@ -0,0 +1,534 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- orc merge file tests for dynamic partition case + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- orc merge file tests for dynamic partition case + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (st double) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: create table orc_merge5a (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (st double) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_merge5a +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_merge5 +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_merge5 +PREHOOK: query: explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_merge5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(10,0)), ts (type: timestamp), subtype (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + st + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert into table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert into table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert into table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert into table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: insert into table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5 +PREHOOK: Output: default@orc_merge5a +POSTHOOK: query: insert into table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5 +POSTHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: Output: default@orc_merge5a@st=1.8 +POSTHOOK: Output: default@orc_merge5a@st=8.0 +POSTHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=0.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=1.8).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=8.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).decimal1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:decimal1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).string1 SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).subtype SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:subtype, type:double, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).ts SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_merge5a PARTITION(st=80.0).userid SIMPLE [(orc_merge5)orc_merge5.FieldSchema(name:userid, type:bigint, comment:null), ] +PREHOOK: query: analyze table orc_merge5a partition(st=80.0) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: analyze table orc_merge5a partition(st=80.0) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: desc formatted orc_merge5a partition(st=80.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=80.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [80.0] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 4 + rawDataSize 1020 + totalSize 2092 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(st=0.8) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=0.8) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [0.8] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 4 + numRows 8 + rawDataSize 2040 + totalSize 2204 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +st=0.8 +st=1.8 +st=8.0 +st=80.0 +PREHOOK: query: select * from orc_merge5a where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@st=0.8 +PREHOOK: Input: default@orc_merge5a@st=1.8 +PREHOOK: Input: default@orc_merge5a@st=8.0 +PREHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@st=0.8 +POSTHOOK: Input: default@orc_merge5a@st=1.8 +POSTHOOK: Input: default@orc_merge5a@st=8.0 +POSTHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +PREHOOK: query: explain alter table orc_merge5a partition(st=80.0) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: query: explain alter table orc_merge5a partition(st=80.0) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-0 + + Stage: Stage-1 + Move Operator + tables: + partition: + st 80.0 + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_merge5a + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: alter table orc_merge5a partition(st=0.8) concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: alter table orc_merge5a partition(st=0.8) concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: analyze table orc_merge5a partition(st=80.0) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=80.0 +POSTHOOK: query: analyze table orc_merge5a partition(st=80.0) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=80.0 +PREHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Output: default@orc_merge5a +PREHOOK: Output: default@orc_merge5a@st=0.8 +POSTHOOK: query: analyze table orc_merge5a partition(st=0.8) compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Output: default@orc_merge5a +POSTHOOK: Output: default@orc_merge5a@st=0.8 +PREHOOK: query: desc formatted orc_merge5a partition(st=80.0) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=80.0) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [80.0] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 4 + rawDataSize 1020 + totalSize 1851 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted orc_merge5a partition(st=0.8) +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: desc formatted orc_merge5a partition(st=0.8) +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@orc_merge5a +# col_name data_type comment + +userid bigint +string1 string +subtype double +decimal1 decimal(10,0) +ts timestamp + +# Partition Information +# col_name data_type comment + +st double + +# Detailed Partition Information +Partition Value: [0.8] +Database: default +Table: orc_merge5a +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 3 + numRows 8 + rawDataSize 2040 + totalSize 1944 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show partitions orc_merge5a +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@orc_merge5a +POSTHOOK: query: show partitions orc_merge5a +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@orc_merge5a +st=0.8 +st=1.8 +st=8.0 +st=80.0 +PREHOOK: query: select * from orc_merge5a where userid<=13 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_merge5a +PREHOOK: Input: default@orc_merge5a@st=0.8 +PREHOOK: Input: default@orc_merge5a@st=1.8 +PREHOOK: Input: default@orc_merge5a@st=8.0 +PREHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_merge5a where userid<=13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_merge5a +POSTHOOK: Input: default@orc_merge5a@st=0.8 +POSTHOOK: Input: default@orc_merge5a@st=1.8 +POSTHOOK: Input: default@orc_merge5a@st=8.0 +POSTHOOK: Input: default@orc_merge5a@st=80.0 +#### A masked pattern was here #### +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +13 bar 80.0 2 1969-12-31 16:00:05 80.0 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +2 foo 0.8 1 1969-12-31 16:00:00 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 +5 eat 0.8 6 1969-12-31 16:00:20 0.8 diff --git ql/src/test/results/clientpositive/union_remove_10.q.out ql/src/test/results/clientpositive/union_remove_10.q.out index 74b9e68..017944f 100644 --- ql/src/test/results/clientpositive/union_remove_10.q.out +++ ql/src/test/results/clientpositive/union_remove_10.q.out @@ -188,12 +188,16 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-3 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-5 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/union_remove_11.q.out ql/src/test/results/clientpositive/union_remove_11.q.out index e0bd498..82d91a9 100644 --- ql/src/test/results/clientpositive/union_remove_11.q.out +++ ql/src/test/results/clientpositive/union_remove_11.q.out @@ -185,12 +185,16 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-4 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/union_remove_12.q.out ql/src/test/results/clientpositive/union_remove_12.q.out index 033475f..2ffe1a5 100644 --- ql/src/test/results/clientpositive/union_remove_12.q.out +++ ql/src/test/results/clientpositive/union_remove_12.q.out @@ -119,12 +119,16 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-4 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/union_remove_13.q.out ql/src/test/results/clientpositive/union_remove_13.q.out index 3c7f8fa..f2a7324 100644 --- ql/src/test/results/clientpositive/union_remove_13.q.out +++ ql/src/test/results/clientpositive/union_remove_13.q.out @@ -142,12 +142,16 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-4 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/union_remove_14.q.out ql/src/test/results/clientpositive/union_remove_14.q.out index 315c958..29d8ac1 100644 --- ql/src/test/results/clientpositive/union_remove_14.q.out +++ ql/src/test/results/clientpositive/union_remove_14.q.out @@ -121,12 +121,16 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-4 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/union_remove_16.q.out ql/src/test/results/clientpositive/union_remove_16.q.out index 721caa0..e92931c 100644 --- ql/src/test/results/clientpositive/union_remove_16.q.out +++ ql/src/test/results/clientpositive/union_remove_16.q.out @@ -139,12 +139,16 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-4 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat diff --git ql/src/test/results/clientpositive/union_remove_9.q.out ql/src/test/results/clientpositive/union_remove_9.q.out index 5a9c5a2..3ab6ed5 100644 --- ql/src/test/results/clientpositive/union_remove_9.q.out +++ ql/src/test/results/clientpositive/union_remove_9.q.out @@ -149,12 +149,16 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-4 - Merge Work + Merge File Operator + Map Operator Tree: + RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat