Index: build-common.xml =================================================================== --- build-common.xml (revision 1444230) +++ build-common.xml (working copy) @@ -57,7 +57,7 @@ - + Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1444230) +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy) @@ -320,6 +320,13 @@ "with distincts. Either set hive.new.job.grouping.set.cardinality to a high number " + "(higher than the number of rows per input row due to grouping sets in the query), or " + "rewrite the query to not use distincts."), + TRUNCATE_COLUMN_INDEXED_TABLE(10227, "Can not truncate columns from table with indexes"), + TRUNCATE_COLUMN_NOT_RC(10228, "Only RCFileFormat supports column truncation."), + TRUNCATE_COLUMN_ARCHIVED(10229, "Column truncation cannot be performed on archived partitions."), + TRUNCATE_BUCKETED_COLUMN(10230, + "A column on which a partition/table is bucketed cannot be truncated."), + TRUNCATE_LIST_BUCKETED_COLUMN(10231, + "A column on which a partition/table is list bucketed cannot be truncated."), OPERATOR_NOT_ALLOWED_WITH_MAPJOIN(10227, "Not all clauses are supported with mapjoin hint. Please remove mapjoin hint."), Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 1444230) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -40,10 +40,10 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.Map.Entry; import org.antlr.stringtemplate.StringTemplate; import org.apache.commons.lang.StringEscapeUtils; @@ -87,6 +87,8 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask; import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork; +import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask; +import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork; import org.apache.hadoop.hive.ql.lockmgr.HiveLock; import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager; import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode; @@ -111,7 +113,6 @@ import org.apache.hadoop.hive.ql.plan.AlterDatabaseDesc; import org.apache.hadoop.hive.ql.plan.AlterIndexDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc; -import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc; import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc; import org.apache.hadoop.hive.ql.plan.CreateIndexDesc; @@ -150,6 +151,7 @@ import org.apache.hadoop.hive.ql.plan.SwitchDatabaseDesc; import org.apache.hadoop.hive.ql.plan.TruncateTableDesc; import org.apache.hadoop.hive.ql.plan.UnlockTableDesc; +import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.security.authorization.Privilege; import org.apache.hadoop.hive.serde.serdeConstants; @@ -3901,6 +3903,21 @@ } private int truncateTable(Hive db, TruncateTableDesc truncateTableDesc) throws HiveException { + + if (truncateTableDesc.getColumnIndexes() != null) { + ColumnTruncateWork truncateWork = new ColumnTruncateWork( + truncateTableDesc.getColumnIndexes(), truncateTableDesc.getInputDir(), + truncateTableDesc.getOutputDir()); + truncateWork.setListBucketingCtx(truncateTableDesc.getLbCtx()); + truncateWork.setMapperCannotSpanPartns(true); + DriverContext driverCxt = new DriverContext(); + ColumnTruncateTask taskExec = new ColumnTruncateTask(); + taskExec.initialize(db.getConf(), null, driverCxt); + taskExec.setWork(truncateWork); + taskExec.setQueryPlan(this.getQueryPlan()); + return taskExec.execute(driverCxt); + } + String tableName = truncateTableDesc.getTableName(); Map partSpec = truncateTableDesc.getPartSpec(); Index: ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java (revision 1444230) +++ ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java (working copy) @@ -243,6 +243,12 @@ this.numberRows = numberRows; } + public void nullColumn(int columnIndex) { + eachColumnValueLen[columnIndex] = 0; + eachColumnUncompressedValueLen[columnIndex] = 0; + allCellValLenBuffer[columnIndex] = new NonSyncDataOutputBuffer(); + } + /** * add in a new column's meta data. * @@ -546,6 +552,14 @@ } } + public void nullColumn(int columnIndex) { + if (codec != null) { + compressedColumnsValueBuffer[columnIndex].reset(); + } else { + loadedColumnsValueBuffer[columnIndex].reset(); + } + } + public void clearColumnBuffer() throws IOException { decompressBuffer.reset(); } @@ -1070,6 +1084,7 @@ public int rowReadIndex; public int runLength; public int prvLength; + public boolean isNulled; } private final Path file; private final FSDataInputStream in; @@ -1484,6 +1499,7 @@ col.rowReadIndex = 0; col.runLength = 0; col.prvLength = -1; + col.isNulled = colValLenBufferReadIn[selIx].getLength() == 0; } return currentKeyLength; @@ -1687,18 +1703,22 @@ SelectedColumn col = selectedColumns[j]; int i = col.colIndex; - BytesRefWritable ref = ret.unCheckedGet(i); + if (col.isNulled) { + ret.set(i, null); + } else { + BytesRefWritable ref = ret.unCheckedGet(i); - colAdvanceRow(j, col); + colAdvanceRow(j, col); - if (currentValue.decompressedFlag[j]) { - ref.set(currentValue.loadedColumnsValueBuffer[j].getData(), - col.rowReadIndex, col.prvLength); - } else { - ref.set(currentValue.lazyDecompressCallbackObjs[j], - col.rowReadIndex, col.prvLength); + if (currentValue.decompressedFlag[j]) { + ref.set(currentValue.loadedColumnsValueBuffer[j].getData(), + col.rowReadIndex, col.prvLength); + } else { + ref.set(currentValue.lazyDecompressCallbackObjs[j], + col.rowReadIndex, col.prvLength); + } + col.rowReadIndex += col.prvLength; } - col.rowReadIndex += col.prvLength; } } else { // This version of the loop eliminates a condition check and branch @@ -1707,12 +1727,16 @@ SelectedColumn col = selectedColumns[j]; int i = col.colIndex; - BytesRefWritable ref = ret.unCheckedGet(i); + if (col.isNulled) { + ret.set(i, null); + } else { + BytesRefWritable ref = ret.unCheckedGet(i); - colAdvanceRow(j, col); - ref.set(currentValue.loadedColumnsValueBuffer[j].getData(), - col.rowReadIndex, col.prvLength); - col.rowReadIndex += col.prvLength; + colAdvanceRow(j, col); + ref.set(currentValue.loadedColumnsValueBuffer[j].getData(), + col.rowReadIndex, col.prvLength); + col.rowReadIndex += col.prvLength; + } } } rowFetched = true; Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileKeyBufferWrapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileKeyBufferWrapper.java (revision 1444230) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileKeyBufferWrapper.java (working copy) @@ -38,7 +38,7 @@ protected CompressionCodec codec; - protected RCFileKeyBufferWrapper() { + public RCFileKeyBufferWrapper() { } public static RCFileKeyBufferWrapper create(KeyBuffer currentKeyBufferObj) { @@ -62,4 +62,52 @@ return this.keyBuffer.compareTo(o.keyBuffer); } + public KeyBuffer getKeyBuffer() { + return keyBuffer; + } + + public void setKeyBuffer(KeyBuffer keyBuffer) { + this.keyBuffer = keyBuffer; + } + + public int getRecordLength() { + return recordLength; + } + + public void setRecordLength(int recordLength) { + this.recordLength = recordLength; + } + + public int getKeyLength() { + return keyLength; + } + + public void setKeyLength(int keyLength) { + this.keyLength = keyLength; + } + + public int getCompressedKeyLength() { + return compressedKeyLength; + } + + public void setCompressedKeyLength(int compressedKeyLength) { + this.compressedKeyLength = compressedKeyLength; + } + + public Path getInputPath() { + return inputPath; + } + + public void setInputPath(Path inputPath) { + this.inputPath = inputPath; + } + + public CompressionCodec getCodec() { + return codec; + } + + public void setCodec(CompressionCodec codec) { + this.codec = codec; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileValueBufferWrapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileValueBufferWrapper.java (revision 1444230) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileValueBufferWrapper.java (working copy) @@ -48,4 +48,12 @@ return this.valueBuffer.compareTo(o.valueBuffer); } + public ValueBuffer getValueBuffer() { + return valueBuffer; + } + + public void setValueBuffer(ValueBuffer valueBuffer) { + this.valueBuffer = valueBuffer; + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java (working copy) @@ -0,0 +1,244 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.rcfile.truncate; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.RCFile; +import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper; +import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileValueBufferWrapper; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; +import org.apache.hadoop.hive.shims.CombineHiveKey; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MapReduceBase; +import org.apache.hadoop.mapred.Mapper; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; + +@SuppressWarnings("deprecation") +public class ColumnTruncateMapper extends MapReduceBase implements + Mapper { + + private JobConf jc; + Class outputClass; + RCFile.Writer outWriter; + + Path finalPath; + FileSystem fs; + + boolean exception = false; + boolean autoDelete = false; + Path outPath; + + CompressionCodec codec = null; + int columnNumber = 0; + + boolean tmpPathFixedConcatenate = false; + boolean tmpPathFixed = false; + Path tmpPath; + Path taskTmpPath; + Path dpPath; + ColumnTruncateWork work; + + public final static Log LOG = LogFactory.getLog(ColumnTruncateMapper.class.getName()); + + public ColumnTruncateMapper() { + } + + @Override + public void configure(JobConf job) { + jc = job; + work = (ColumnTruncateWork) Utilities.getMapRedWork(job); + + String specPath = work.getOutputDir(); + Path tmpPath = Utilities.toTempPath(specPath); + Path taskTmpPath = Utilities.toTaskTempPath(specPath); + updatePaths(tmpPath, taskTmpPath); + try { + fs = (new Path(specPath)).getFileSystem(job); + autoDelete = ShimLoader.getHadoopShims().fileSystemDeleteOnExit(fs, + outPath); + } catch (IOException e) { + this.exception = true; + throw new RuntimeException(e); + } + } + + private void updatePaths(Path tmpPath, Path taskTmpPath) { + String taskId = Utilities.getTaskId(jc); + this.tmpPath = tmpPath; + this.taskTmpPath = taskTmpPath; + String inputFile = HiveConf.getVar(jc, HiveConf.ConfVars.HADOOPMAPFILENAME); + int lastSeparator = inputFile.lastIndexOf(Path.SEPARATOR) + 1; + finalPath = new Path(tmpPath, inputFile.substring(lastSeparator)); + outPath = new Path(taskTmpPath, Utilities.toTempPath(taskId)); + } + + @Override + public void map(Object k, RCFileValueBufferWrapper value, + OutputCollector output, Reporter reporter) + throws IOException { + try { + + RCFileKeyBufferWrapper key = null; + if (k instanceof CombineHiveKey) { + key = (RCFileKeyBufferWrapper) ((CombineHiveKey) k).getKey(); + } else { + key = (RCFileKeyBufferWrapper) k; + } + + if (work.getListBucketingCtx().calculateListBucketingLevel() > 0) { + if (!this.tmpPathFixedConcatenate) { + fixTmpPathConcatenate(key.getInputPath().getParent(), + work.getListBucketingCtx().calculateListBucketingLevel()); + tmpPathFixedConcatenate = true; + } + } + + if (outWriter == null) { + codec = key.getCodec(); + columnNumber = key.getKeyBuffer().getColumnNumber(); + jc.setInt(RCFile.COLUMN_NUMBER_CONF_STR, columnNumber); + outWriter = new RCFile.Writer(fs, jc, outPath, null, codec); + } + + for (Integer i : work.getDroppedColumns()) { + key.getKeyBuffer().nullColumn(i); + value.getValueBuffer().nullColumn(i); + } + + int keyLength = key.getKeyBuffer().getSize(); + int recordLength = key.getKeyBuffer().getSize(); + for (int columnLen : key.getKeyBuffer().getEachColumnValueLen()) { + recordLength += columnLen; + } + + outWriter.flushBlock(key.getKeyBuffer(), value.getValueBuffer(), recordLength, + keyLength, key.getCompressedKeyLength()); + } catch (Throwable e) { + this.exception = true; + close(); + throw new IOException(e); + } + } + + /** + * Fixes tmpPath to point to the correct list bucketing sub-directories. + * Before this is called, tmpPath will default to the root tmp table dir + * Reason to add a new method instead of changing fixTmpPath() + * Reason 1: logic has slightly difference + * fixTmpPath(..) needs 2 variables in order to decide path delta which is in variable newPath. + * 1. inputPath.depth() + * 2. tmpPath.depth() + * fixTmpPathConcatenate needs 2 variables too but one of them is different from fixTmpPath(..) + * 1. inputPath.depth() + * 2. listBucketingDepth + * Reason 2: less risks + * The existing logic is a little not trivial around map() and fixTmpPath(). In order to ensure + * minimum impact on existing flow, we try to avoid change on existing code/flow but add new code + * for new feature. + * + * @param inputPath + * @throws HiveException + * @throws IOException + */ + private void fixTmpPathConcatenate(Path inputPath, int listBucketingDepth) + throws HiveException, IOException { + dpPath = inputPath; + Path newPath = new Path("."); + + int depth = listBucketingDepth; + // Build the path from bottom up. pick up list bucketing subdirectories + while ((inputPath != null) && (depth > 0)) { + newPath = new Path(inputPath.getName(), newPath); + inputPath = inputPath.getParent(); + depth--; + } + + Path newTmpPath = new Path(tmpPath, newPath); + Path newTaskTmpPath = new Path(taskTmpPath, newPath); + if (!fs.exists(newTmpPath)) { + fs.mkdirs(newTmpPath); + } + updatePaths(newTmpPath, newTaskTmpPath); + } + + + @Override + public void close() throws IOException { + // close writer + if (outWriter == null) { + return; + } + + outWriter.close(); + outWriter = null; + + if (!exception) { + FileStatus fss = fs.getFileStatus(outPath); + LOG.info("renamed path " + outPath + " to " + finalPath + + " . File size is " + fss.getLen()); + if (!fs.rename(outPath, finalPath)) { + throw new IOException("Unable to rename output to " + finalPath); + } + } else { + if (!autoDelete) { + fs.delete(outPath, true); + } + } + } + + public static String BACKUP_PREFIX = "_backup."; + + public static Path backupOutputPath(FileSystem fs, Path outpath, JobConf job) + throws IOException, HiveException { + if (fs.exists(outpath)) { + Path backupPath = new Path(outpath.getParent(), BACKUP_PREFIX + + outpath.getName()); + Utilities.rename(fs, outpath, backupPath); + return backupPath; + } else { + return null; + } + } + + public static void jobClose(String outputPath, boolean success, JobConf job, + LogHelper console, DynamicPartitionCtx dynPartCtx, Reporter reporter + ) throws HiveException, IOException { + Path outpath = new Path(outputPath); + FileSystem fs = outpath.getFileSystem(job); + Path backupPath = backupOutputPath(fs, outpath, job); + Utilities.mvFileToFinalPath(outputPath, job, success, LOG, dynPartCtx, null, + reporter); + fs.delete(backupPath, true); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java (working copy) @@ -0,0 +1,263 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.rcfile.truncate; + +import java.io.IOException; +import java.io.Serializable; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.exec.HadoopJobExecHelper; +import org.apache.hadoop.hive.ql.exec.HadoopJobExecHook; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.Throttle; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; +import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.Counters; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RunningJob; + +@SuppressWarnings( { "deprecation", "unchecked" }) +public class ColumnTruncateTask extends Task implements Serializable, + HadoopJobExecHook { + + private static final long serialVersionUID = 1L; + + protected transient JobConf job; + protected HadoopJobExecHelper jobExecHelper; + + @Override + public void initialize(HiveConf conf, QueryPlan queryPlan, + DriverContext driverContext) { + super.initialize(conf, queryPlan, driverContext); + job = new JobConf(conf, ColumnTruncateTask.class); + jobExecHelper = new HadoopJobExecHelper(job, this.console, this, this); + } + + @Override + public boolean requireLock() { + return true; + } + + boolean success = true; + + @Override + /** + * start a new map-reduce job to do the truncation, almost the same as ExecDriver. + */ + public int execute(DriverContext driverContext) { + HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, + BucketizedHiveInputFormat.class.getName()); + success = true; + ShimLoader.getHadoopShims().prepareJobOutput(job); + job.setOutputFormat(HiveOutputFormatImpl.class); + job.setMapperClass(work.getMapperClass()); + + Context ctx = driverContext.getCtx(); + boolean ctxCreated = false; + try { + if (ctx == null) { + ctx = new Context(job); + ctxCreated = true; + } + }catch (IOException e) { + e.printStackTrace(); + console.printError("Error launching map-reduce job", "\n" + + org.apache.hadoop.util.StringUtils.stringifyException(e)); + return 5; + } + + job.setMapOutputKeyClass(NullWritable.class); + job.setMapOutputValueClass(NullWritable.class); + if(work.getNumMapTasks() != null) { + job.setNumMapTasks(work.getNumMapTasks()); + } + + // zero reducers + job.setNumReduceTasks(0); + + if (work.getMinSplitSize() != null) { + HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work + .getMinSplitSize().longValue()); + } + + if (work.getInputformat() != null) { + HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work + .getInputformat()); + } + + String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); + if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { + inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); + } + + LOG.info("Using " + inpFormat); + + try { + job.setInputFormat((Class) (Class + .forName(inpFormat))); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e.getMessage()); + } + + String outputPath = this.work.getOutputDir(); + Path tempOutPath = Utilities.toTempPath(new Path(outputPath)); + try { + FileSystem fs = tempOutPath.getFileSystem(job); + if (!fs.exists(tempOutPath)) { + fs.mkdirs(tempOutPath); + } + } catch (IOException e) { + console.printError("Can't make path " + outputPath + " : " + e.getMessage()); + return 6; + } + + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(NullWritable.class); + + int returnVal = 0; + RunningJob rj = null; + boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, + HiveConf.ConfVars.HADOOPJOBNAME)); + + String jobName = null; + if (noName && this.getQueryPlan() != null) { + int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); + jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), + maxlen - 6); + } + + if (noName) { + // This is for a special case to ensure unit tests pass + HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, + jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt()); + } + + try { + addInputPaths(job, work); + + Utilities.setMapRedWork(job, work, ctx.getMRTmpFileURI()); + + // remove the pwd from conf file so that job tracker doesn't show this + // logs + String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); + if (pwd != null) { + HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); + } + JobClient jc = new JobClient(job); + + String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR); + if (!addedJars.isEmpty()) { + job.set("tmpjars", addedJars); + } + + // make this client wait if job trcker is not behaving well. + Throttle.checkJobTracker(job, LOG); + + // Finally SUBMIT the JOB! + rj = jc.submitJob(job); + + returnVal = jobExecHelper.progress(rj, jc); + success = (returnVal == 0); + + } catch (Exception e) { + e.printStackTrace(); + String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; + if (rj != null) { + mesg = "Ended Job = " + rj.getJobID() + mesg; + } else { + mesg = "Job Submission failed" + mesg; + } + + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils + console.printError(mesg, "\n" + + org.apache.hadoop.util.StringUtils.stringifyException(e)); + + success = false; + returnVal = 1; + } finally { + try { + if (ctxCreated) { + ctx.clear(); + } + if (rj != null) { + if (returnVal != 0) { + rj.killJob(); + } + HadoopJobExecHelper.runningJobKillURIs.remove(rj.getJobID()); + jobID = rj.getID().toString(); + } + ColumnTruncateMapper.jobClose(outputPath, success, job, console, + work.getDynPartCtx(), null); + } catch (Exception e) { + } + } + + return (returnVal); + } + + private void addInputPaths(JobConf job, ColumnTruncateWork work) { + FileInputFormat.addInputPath(job, new Path(work.getInputDir())); + } + + @Override + public String getName() { + return "RCFile ColumnTruncate"; + } + + @Override + public StageType getType() { + return StageType.MAPRED; + } + + @Override + public boolean checkFatalErrors(Counters ctrs, StringBuilder errMsg) { + return false; + } + + @Override + public void logPlanProgress(SessionState ss) throws IOException { + // no op + } + + @Override + public void updateCounters(Counters ctrs, RunningJob rj) throws IOException { + // no op + } + + @Override + protected void localizeMRTmpFilesImpl(Context ctx) { + // no op + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java (working copy) @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.rcfile.truncate; + +import java.io.Serializable; +import java.util.LinkedHashMap; +import java.util.List; + +import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat; +import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; +import org.apache.hadoop.hive.ql.plan.Explain; +import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; +import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.mapred.Mapper; + +@Explain(displayName = "Column Truncate") +public class ColumnTruncateWork extends MapredWork implements Serializable { + + private static final long serialVersionUID = 1L; + + private String inputDir; + private String outputDir; + private boolean hasDynamicPartitions; + private DynamicPartitionCtx dynPartCtx; + private boolean isListBucketingAlterTableConcatenate; + private ListBucketingCtx listBucketingCtx; + private List droppedColumns; + + public ColumnTruncateWork() { + } + + public ColumnTruncateWork(List droppedColumns, String inputDir, String outputDir) { + this(droppedColumns, inputDir, outputDir, false, null); + } + + public ColumnTruncateWork(List droppedColumns, String inputDir, String outputDir, + boolean hasDynamicPartitions, DynamicPartitionCtx dynPartCtx) { + super(); + this.droppedColumns = droppedColumns; + this.inputDir = inputDir; + this.outputDir = outputDir; + this.hasDynamicPartitions = hasDynamicPartitions; + this.dynPartCtx = dynPartCtx; + PartitionDesc partDesc = new PartitionDesc(); + partDesc.setInputFileFormatClass(RCFileBlockMergeInputFormat.class); + if(this.getPathToPartitionInfo() == null) { + this.setPathToPartitionInfo(new LinkedHashMap()); + } + if(this.getNumReduceTasks() == null) { + this.setNumReduceTasks(0); + } + this.getPathToPartitionInfo().put(inputDir, partDesc); + } + + public String getInputDir() { + return inputDir; + } + + public void setInputPaths(String inputDir) { + this.inputDir = inputDir; + } + + public String getOutputDir() { + return outputDir; + } + + public void setOutputDir(String outputDir) { + this.outputDir = outputDir; + } + + public Class getMapperClass() { + return ColumnTruncateMapper.class; + } + + @Override + public Long getMinSplitSize() { + return null; + } + + @Override + public String getInputformat() { + return BucketizedHiveInputFormat.class.getName(); + } + + @Override + public boolean isGatheringStats() { + return false; + } + + public boolean hasDynamicPartitions() { + return this.hasDynamicPartitions; + } + + public void setHasDynamicPartitions(boolean hasDynamicPartitions) { + this.hasDynamicPartitions = hasDynamicPartitions; + } + + public DynamicPartitionCtx getDynPartCtx() { + return dynPartCtx; + } + + public void setDynPartCtx(DynamicPartitionCtx dynPartCtx) { + this.dynPartCtx = dynPartCtx; + } + + /** + * @return the listBucketingCtx + */ + public ListBucketingCtx getListBucketingCtx() { + return listBucketingCtx; + } + + /** + * @param listBucketingCtx the listBucketingCtx to set + */ + public void setListBucketingCtx(ListBucketingCtx listBucketingCtx) { + this.listBucketingCtx = listBucketingCtx; + } + + /** + * @return the isListBucketingAlterTableConcatenate + */ + public boolean isListBucketingAlterTableConcatenate() { + return isListBucketingAlterTableConcatenate; + } + + public List getDroppedColumns() { + return droppedColumns; + } + + public void setDroppedColumns(List droppedColumns) { + this.droppedColumns = droppedColumns; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (revision 1444230) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (working copy) @@ -777,7 +777,148 @@ } TruncateTableDesc truncateTblDesc = new TruncateTableDesc(tableName, partSpec); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), truncateTblDesc), conf)); + + DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), truncateTblDesc); + Task truncateTask = TaskFactory.get(ddlWork, conf); + + // Is this a truncate column command + List columnNames = null; + if (ast.getChildCount() == 2) { + try { + columnNames = getColumnNames((ASTNode)ast.getChild(1)); + + // Throw an error if the table is indexed + List indexes = db.getIndexes(table.getDbName(), tableName, (short)1); + if (indexes != null && indexes.size() > 0) { + throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_INDEXED_TABLE.getMsg()); + } + + List bucketCols = null; + Class inputFormatClass = null; + boolean isArchived = false; + Path newTblPartLoc = null; + Path oldTblPartLoc = null; + List cols = null; + ListBucketingCtx lbCtx = null; + boolean isListBucketed = false; + List listBucketColNames = null; + + if (table.isPartitioned()) { + Partition part = db.getPartition(table, partSpec, false); + + Path tabPath = table.getPath(); + Path partPath = part.getPartitionPath(); + + // if the table is in a different dfs than the partition, + // replace the partition's dfs with the table's dfs. + newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri() + .getAuthority(), partPath.toUri().getPath()); + + oldTblPartLoc = partPath; + + cols = part.getCols(); + bucketCols = part.getBucketCols(); + inputFormatClass = part.getInputFormatClass(); + isArchived = ArchiveUtils.isArchived(part); + lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(), + part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf); + isListBucketed = part.isStoredAsSubDirectories(); + listBucketColNames = part.getSkewedColNames(); + } else { + // input and output are the same + oldTblPartLoc = table.getPath(); + newTblPartLoc = table.getPath(); + cols = table.getCols(); + bucketCols = table.getBucketCols(); + inputFormatClass = table.getInputFormatClass(); + lbCtx = constructListBucketingCtx(table.getSkewedColNames(), table.getSkewedColValues(), + table.getSkewedColValueLocationMaps(), table.isStoredAsSubDirectories(), conf); + isListBucketed = table.isStoredAsSubDirectories(); + listBucketColNames = table.getSkewedColNames(); + } + + // throw a HiveException for non-rcfile. + if (!inputFormatClass.equals(RCFileInputFormat.class)) { + throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_NOT_RC.getMsg()); + } + + // throw a HiveException if the table/partition is archived + if (isArchived) { + throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_ARCHIVED.getMsg()); + } + + Set columnIndexes = new HashSet(); + for (String columnName : columnNames) { + boolean found = false; + for (int columnIndex = 0; columnIndex < cols.size(); columnIndex++) { + if (columnName.equalsIgnoreCase(cols.get(columnIndex).getName())) { + columnIndexes.add(columnIndex); + found = true; + break; + } + } + // Throw an exception if the user is trying to truncate a column which doesn't exist + if (!found) { + throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(columnName)); + } + // Throw an exception if the table/partition is bucketed on one of the columns + for (String bucketCol : bucketCols) { + if (bucketCol.equalsIgnoreCase(columnName)) { + throw new SemanticException(ErrorMsg.TRUNCATE_BUCKETED_COLUMN.getMsg(columnName)); + } + } + if (isListBucketed) { + for (String listBucketCol : listBucketColNames) { + if (listBucketCol.equalsIgnoreCase(columnName)) { + throw new SemanticException( + ErrorMsg.TRUNCATE_LIST_BUCKETED_COLUMN.getMsg(columnName)); + } + } + } + } + + truncateTblDesc.setColumnIndexes(new ArrayList(columnIndexes)); + + truncateTblDesc.setInputDir(oldTblPartLoc.toString()); + addInputsOutputsAlterTable(tableName, partSpec); + + truncateTblDesc.setLbCtx(lbCtx); + + addInputsOutputsAlterTable(tableName, partSpec); + ddlWork.setNeedLock(true); + TableDesc tblDesc = Utilities.getTableDesc(table); + // Write the output to temporary directory and move it to the final location at the end + // so the operation is atomic. + String queryTmpdir = ctx.getExternalTmpFileURI(newTblPartLoc.toUri()); + truncateTblDesc.setOutputDir(queryTmpdir); + LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, queryTmpdir, tblDesc, + partSpec == null ? new HashMap() : partSpec); + ltd.setLbCtx(lbCtx); + Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), + conf); + truncateTask.addDependentTask(moveTsk); + + // Recalculate the HDFS stats if auto gather stats is set + if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { + StatsWork statDesc; + if (oldTblPartLoc.equals(newTblPartLoc)) { + // If we're merging to the same location, we can avoid some metastore calls + tableSpec tablepart = new tableSpec(this.db, conf, root); + statDesc = new StatsWork(tablepart); + } else { + statDesc = new StatsWork(ltd); + } + statDesc.setNoStatsAggregator(true); + statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + Task statTask = TaskFactory.get(statDesc, conf); + moveTsk.addDependentTask(statTask); + } + } catch (HiveException e) { + throw new SemanticException(e); + } + } + + rootTasks.add(truncateTask); } private boolean isFullSpec(Table table, Map partSpec) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (revision 1444230) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (working copy) @@ -517,7 +517,7 @@ truncateTableStatement @init { msgs.push("truncate table statement"); } @after { msgs.pop(); } - : KW_TRUNCATE KW_TABLE tablePartitionPrefix -> ^(TOK_TRUNCATETABLE tablePartitionPrefix); + : KW_TRUNCATE KW_TABLE tablePartitionPrefix (KW_COLUMNS LPAREN columnNameList RPAREN)? -> ^(TOK_TRUNCATETABLE tablePartitionPrefix columnNameList?); createIndexStatement @init { msgs.push("create index statement");} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java (revision 1444230) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TruncateTableDesc.java (working copy) @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.plan; +import java.util.List; import java.util.Map; /** @@ -30,6 +31,10 @@ private String tableName; private Map partSpec; + private List columnIndexes; + private String inputDir; + private String outputDir; + private ListBucketingCtx lbCtx; public TruncateTableDesc() { } @@ -56,4 +61,37 @@ public void setPartSpec(Map partSpec) { this.partSpec = partSpec; } + + @Explain(displayName = "Column Indexes") + public List getColumnIndexes() { + return columnIndexes; + } + + public void setColumnIndexes(List columnIndexes) { + this.columnIndexes = columnIndexes; + } + + public String getInputDir() { + return inputDir; + } + + public void setInputDir(String inputDir) { + this.inputDir = inputDir; + } + + public String getOutputDir() { + return outputDir; + } + + public void setOutputDir(String outputDir) { + this.outputDir = outputDir; + } + + public ListBucketingCtx getLbCtx() { + return lbCtx; + } + + public void setLbCtx(ListBucketingCtx lbCtx) { + this.lbCtx = lbCtx; + } } Index: ql/src/test/queries/clientnegative/truncate_bucketed_column.q =================================================================== --- ql/src/test/queries/clientnegative/truncate_bucketed_column.q (revision 0) +++ ql/src/test/queries/clientnegative/truncate_bucketed_column.q (working copy) @@ -0,0 +1,7 @@ +-- Tests truncating a bucketed column + +CREATE TABLE test_tab (key STRING, value STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS RCFILE; + +INSERT OVERWRITE TABLE test_tab SELECT * FROM src; + +TRUNCATE TABLE test_tab COLUMNS (key); Index: ql/src/test/queries/clientnegative/truncate_column_indexed_table.q =================================================================== --- ql/src/test/queries/clientnegative/truncate_column_indexed_table.q (revision 0) +++ ql/src/test/queries/clientnegative/truncate_column_indexed_table.q (working copy) @@ -0,0 +1,9 @@ +-- Tests truncating a column from an indexed table + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE; + +INSERT OVERWRITE TABLE test_tab SELECT * FROM src; + +CREATE INDEX test_tab_index ON TABLE test_tab (key) as 'COMPACT' WITH DEFERRED REBUILD; + +TRUNCATE TABLE test_tab COLUMNS (value); Index: ql/src/test/queries/clientnegative/truncate_column_list_bucketing.q =================================================================== --- ql/src/test/queries/clientnegative/truncate_column_list_bucketing.q (revision 0) +++ ql/src/test/queries/clientnegative/truncate_column_list_bucketing.q (working copy) @@ -0,0 +1,14 @@ +set hive.mapred.supports.subdirectories=true; +set mapred.input.dir.recursive=true; + +-- Tests truncating a column on which a table is list bucketed + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE; + +ALTER TABLE test_tab +SKEWED BY (key) ON ("484") +STORED AS DIRECTORIES; + +INSERT OVERWRITE TABLE test_tab SELECT * FROM src; + +TRUNCATE TABLE test_tab COLUMNS (key); Index: ql/src/test/queries/clientnegative/truncate_column_seqfile.q =================================================================== --- ql/src/test/queries/clientnegative/truncate_column_seqfile.q (revision 0) +++ ql/src/test/queries/clientnegative/truncate_column_seqfile.q (working copy) @@ -0,0 +1,7 @@ +-- Tests truncating a column from a table stored as a sequence file + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS SEQUENCEFILE; + +INSERT OVERWRITE TABLE test_tab SELECT * FROM src; + +TRUNCATE TABLE test_tab COLUMNS (key); Index: ql/src/test/queries/clientnegative/truncate_nonexistant_column.q =================================================================== --- ql/src/test/queries/clientnegative/truncate_nonexistant_column.q (revision 0) +++ ql/src/test/queries/clientnegative/truncate_nonexistant_column.q (working copy) @@ -0,0 +1,7 @@ +-- Tests attempting to truncate a column in a table that doesn't exist + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE; + +INSERT OVERWRITE TABLE test_tab SELECT * FROM src; + +TRUNCATE TABLE test_tab COLUMNS (doesnt_exist); Index: ql/src/test/queries/clientpositive/truncate_column.q =================================================================== --- ql/src/test/queries/clientpositive/truncate_column.q (revision 0) +++ ql/src/test/queries/clientpositive/truncate_column.q (working copy) @@ -0,0 +1,79 @@ +-- Tests truncating column(s) from a table, also tests that stats are updated + +CREATE TABLE test_tab (key STRING, value STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED AS RCFILE; + +set hive.stats.autogather=true; + +INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10; + +DESC FORMATTED test_tab; + +SELECT * FROM test_tab; + +-- Truncate 1 column +TRUNCATE TABLE test_tab COLUMNS (key); + +DESC FORMATTED test_tab; + +-- First column should be null +SELECT * FROM test_tab; + +-- Truncate multiple columns +INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10; + +TRUNCATE TABLE test_tab COLUMNS (key, value); + +DESC FORMATTED test_tab; + +-- Both columns should be null +SELECT * FROM test_tab; + +-- Truncate columns again +TRUNCATE TABLE test_tab COLUMNS (key, value); + +DESC FORMATTED test_tab; + +-- Both columns should be null +SELECT * FROM test_tab; + +-- Test truncating with a binary serde +ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; + +INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10; + +DESC FORMATTED test_tab; + +SELECT * FROM test_tab; + +-- Truncate 1 column +TRUNCATE TABLE test_tab COLUMNS (key); + +DESC FORMATTED test_tab; + +-- First column should be null +SELECT * FROM test_tab; + +-- Truncate 2 columns +TRUNCATE TABLE test_tab COLUMNS (key, value); + +DESC FORMATTED test_tab; + +-- Both columns should be null +SELECT * FROM test_tab; + +-- Test truncating a partition +CREATE TABLE test_tab_part (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE; + +INSERT OVERWRITE TABLE test_tab_part PARTITION (part = '1') SELECT * FROM src LIMIT 10; + +DESC FORMATTED test_tab_part PARTITION (part = '1'); + +SELECT * FROM test_tab_part WHERE part = '1'; + +TRUNCATE TABLE test_tab_part PARTITION (part = '1') COLUMNS (key); + +DESC FORMATTED test_tab_part PARTITION (part = '1'); + +-- First column should be null +SELECT * FROM test_tab_part WHERE part = '1'; Index: ql/src/test/queries/clientpositive/truncate_column_buckets.q =================================================================== --- ql/src/test/queries/clientpositive/truncate_column_buckets.q (revision 0) +++ ql/src/test/queries/clientpositive/truncate_column_buckets.q (working copy) @@ -0,0 +1,23 @@ +-- Tests truncating columns from a bucketed table, table should remain bucketed + +CREATE TABLE test_tab (key STRING, value STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS RCFILE; + +set hive.enforce.bucketing=true; + +INSERT OVERWRITE TABLE test_tab SELECT * FROM src; + +-- Check how many rows there are in each bucket, there should be two rows +SELECT cnt FROM ( +SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM +test_tab GROUP BY INPUT__FILE__NAME +ORDER BY file_name DESC)a; + +-- Truncate a column on which the table is not bucketed +TRUNCATE TABLE test_tab COLUMNS (value); + +-- Check how many rows there are in each bucket, this should produce the same rows as before +-- because truncate should not break bucketing +SELECT cnt FROM ( +SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM +test_tab GROUP BY INPUT__FILE__NAME +ORDER BY file_name DESC)a; Index: ql/src/test/queries/clientpositive/truncate_column_list_bucket.q =================================================================== --- ql/src/test/queries/clientpositive/truncate_column_list_bucket.q (revision 0) +++ ql/src/test/queries/clientpositive/truncate_column_list_bucket.q (working copy) @@ -0,0 +1,33 @@ +set hive.mapred.supports.subdirectories=true; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; +set mapred.input.dir.recursive=true; + +-- Tests truncating a column from a list bucketing table + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) + +CREATE TABLE test_tab (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE; + +ALTER TABLE test_tab +SKEWED BY (key) ON ("484") +STORED AS DIRECTORIES; + +INSERT OVERWRITE TABLE test_tab PARTITION (part = '1') SELECT * FROM src; + +set hive.optimize.listbucketing=true; +SELECT * FROM test_tab WHERE part = '1' AND key = '0'; + +TRUNCATE TABLE test_tab PARTITION (part ='1') COLUMNS (value); + +-- In the following select statements the list bucketing optimization should still be used +-- In both cases value should be null + +EXPLAIN EXTENDED SELECT * FROM test_tab WHERE part = '1' AND key = '484'; + +SELECT * FROM test_tab WHERE part = '1' AND key = '484'; + +EXPLAIN EXTENDED SELECT * FROM test_tab WHERE part = '1' AND key = '0'; + +SELECT * FROM test_tab WHERE part = '1' AND key = '0'; Index: ql/src/test/queries/clientpositive/truncate_table_columns.q =================================================================== --- ql/src/test/queries/clientpositive/truncate_table_columns.q (revision 0) +++ ql/src/test/queries/clientpositive/truncate_table_columns.q (working copy) @@ -0,0 +1,35 @@ +create table src_truncate (key string, value string); +load data local inpath '../data/files/kv1.txt' into table src_truncate;; + +create table srcpart_truncate (key string, value string) partitioned by (ds string, hr string); +alter table srcpart_truncate add partition (ds='2008-04-08', hr='11'); +alter table srcpart_truncate add partition (ds='2008-04-08', hr='12'); +alter table srcpart_truncate add partition (ds='2008-04-09', hr='11'); +alter table srcpart_truncate add partition (ds='2008-04-09', hr='12'); + +load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-08', hr='11'); +load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-08', hr='12'); +load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='11'); +load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='12'); + +set hive.fetch.task.convertion=more; + +-- truncate columns of non-partitioned table +explain TRUNCATE TABLE src_truncate COLUMNS(value); +TRUNCATE TABLE src_truncate COLUMNS(value); +select * from src_truncate; + +-- truncate columns of a partition +explain TRUNCATE TABLE srcpart_truncate partition (ds='2008-04-08', hr='11') COLUMNS(value); +TRUNCATE TABLE srcpart_truncate partition (ds='2008-04-08', hr='11') COLUMNS(value); +select * from srcpart_truncate where ds='2008-04-08' and hr='11'; + +-- truncate columns of partitions with partial spec +explain TRUNCATE TABLE srcpart_truncate partition (ds, hr='12') COLUMNS(value); +TRUNCATE TABLE srcpart_truncate partition (ds, hr='12') COLUMNS(value); +select * from srcpart_truncate where hr='12'; + +-- truncate partitioned table +explain TRUNCATE TABLE srcpart_truncate COLUMNS(value); +TRUNCATE TABLE srcpart_truncate COLUMNS(value); +select * from srcpart_truncate; Index: ql/src/test/results/clientnegative/truncate_bucketed_column.q.out =================================================================== --- ql/src/test/results/clientnegative/truncate_bucketed_column.q.out (revision 0) +++ ql/src/test/results/clientnegative/truncate_bucketed_column.q.out (working copy) @@ -0,0 +1,20 @@ +PREHOOK: query: -- Tests truncating a bucketed column + +CREATE TABLE test_tab (key STRING, value STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Tests truncating a bucketed column + +CREATE TABLE test_tab (key STRING, value STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_tab +PREHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab +POSTHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: A column on which a partition/table is bucketed cannot be truncated. key Index: ql/src/test/results/clientnegative/truncate_column_archived.q.out =================================================================== --- ql/src/test/results/clientnegative/truncate_column_archived.q.out (revision 0) +++ ql/src/test/results/clientnegative/truncate_column_archived.q.out (working copy) @@ -0,0 +1,20 @@ +PREHOOK: query: -- Tests truncating a column from an archived partition + +CREATE TABLE test_tab (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Tests truncating a column from an archived partition + +CREATE TABLE test_tab (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_tab +PREHOOK: query: INSERT OVERWRITE TABLE test_tab PARTITION (part = '1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_tab PARTITION (part = '1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab@part=1 +POSTHOOK: Lineage: test_tab PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException [Error 10107]: Archiving methods are currently disabled. Please see the Hive wiki for more information about enabling archiving Index: ql/src/test/results/clientnegative/truncate_column_indexed_table.q.out =================================================================== --- ql/src/test/results/clientnegative/truncate_column_indexed_table.q.out (revision 0) +++ ql/src/test/results/clientnegative/truncate_column_indexed_table.q.out (working copy) @@ -0,0 +1,27 @@ +PREHOOK: query: -- Tests truncating a column from an indexed table + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Tests truncating a column from an indexed table + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_tab +PREHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab +POSTHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE INDEX test_tab_index ON TABLE test_tab (key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX test_tab_index ON TABLE test_tab (key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Output: default@default__test_tab_test_tab_index__ +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Can not truncate columns from table with indexes Index: ql/src/test/results/clientnegative/truncate_column_list_bucketing.q.out =================================================================== --- ql/src/test/results/clientnegative/truncate_column_list_bucketing.q.out (revision 0) +++ ql/src/test/results/clientnegative/truncate_column_list_bucketing.q.out (working copy) @@ -0,0 +1,32 @@ +PREHOOK: query: -- Tests truncating a column on which a table is list bucketed + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Tests truncating a column on which a table is list bucketed + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_tab +PREHOOK: query: ALTER TABLE test_tab +SKEWED BY (key) ON ("484") +STORED AS DIRECTORIES +PREHOOK: type: ALTERTABLE_SKEWED +PREHOOK: Input: default@test_tab +PREHOOK: Output: default@test_tab +POSTHOOK: query: ALTER TABLE test_tab +SKEWED BY (key) ON ("484") +STORED AS DIRECTORIES +POSTHOOK: type: ALTERTABLE_SKEWED +POSTHOOK: Input: default@test_tab +POSTHOOK: Output: default@test_tab +PREHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab +POSTHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: A column on which a partition/table is list bucketed cannot be truncated. key Index: ql/src/test/results/clientnegative/truncate_column_seqfile.q.out =================================================================== --- ql/src/test/results/clientnegative/truncate_column_seqfile.q.out (revision 0) +++ ql/src/test/results/clientnegative/truncate_column_seqfile.q.out (working copy) @@ -0,0 +1,20 @@ +PREHOOK: query: -- Tests truncating a column from a table stored as a sequence file + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS SEQUENCEFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Tests truncating a column from a table stored as a sequence file + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS SEQUENCEFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_tab +PREHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab +POSTHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Only RCFileFormat supports column truncation. Index: ql/src/test/results/clientnegative/truncate_nonexistant_column.q.out =================================================================== --- ql/src/test/results/clientnegative/truncate_nonexistant_column.q.out (revision 0) +++ ql/src/test/results/clientnegative/truncate_nonexistant_column.q.out (working copy) @@ -0,0 +1,20 @@ +PREHOOK: query: -- Tests attempting to truncate a column in a table that doesn't exist + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Tests attempting to truncate a column in a table that doesn't exist + +CREATE TABLE test_tab (key STRING, value STRING) STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_tab +PREHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab +POSTHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: Invalid column reference doesnt_exist Index: ql/src/test/results/clientpositive/truncate_column.q.out =================================================================== --- ql/src/test/results/clientpositive/truncate_column.q.out (revision 0) +++ ql/src/test/results/clientpositive/truncate_column.q.out (working copy) @@ -0,0 +1,761 @@ +PREHOOK: query: -- Tests truncating column(s) from a table, also tests that stats are updated + +CREATE TABLE test_tab (key STRING, value STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Tests truncating column(s) from a table, also tests that stats are updated + +CREATE TABLE test_tab (key STRING, value STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_tab +PREHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab +POSTHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED test_tab +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED test_tab +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 94 + totalSize 185 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: SELECT * FROM test_tab +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +98 val_98 +484 val_484 +PREHOOK: query: -- Truncate 1 column +TRUNCATE TABLE test_tab COLUMNS (key) +PREHOOK: type: TRUNCATETABLE +PREHOOK: Input: default@test_tab +PREHOOK: Output: default@test_tab +POSTHOOK: query: -- Truncate 1 column +TRUNCATE TABLE test_tab COLUMNS (key) +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Input: default@test_tab +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED test_tab +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED test_tab +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 94 + totalSize 150 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- First column should be null +SELECT * FROM test_tab +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: query: -- First column should be null +SELECT * FROM test_tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL val_238 +NULL val_86 +NULL val_311 +NULL val_27 +NULL val_165 +NULL val_409 +NULL val_255 +NULL val_278 +NULL val_98 +NULL val_484 +PREHOOK: query: -- Truncate multiple columns +INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab +POSTHOOK: query: -- Truncate multiple columns +INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: TRUNCATE TABLE test_tab COLUMNS (key, value) +PREHOOK: type: TRUNCATETABLE +PREHOOK: Input: default@test_tab +PREHOOK: Output: default@test_tab +POSTHOOK: query: TRUNCATE TABLE test_tab COLUMNS (key, value) +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Input: default@test_tab +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED test_tab +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED test_tab +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 94 + totalSize 75 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Both columns should be null +SELECT * FROM test_tab +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: query: -- Both columns should be null +SELECT * FROM test_tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: -- Truncate columns again +TRUNCATE TABLE test_tab COLUMNS (key, value) +PREHOOK: type: TRUNCATETABLE +PREHOOK: Input: default@test_tab +PREHOOK: Output: default@test_tab +POSTHOOK: query: -- Truncate columns again +TRUNCATE TABLE test_tab COLUMNS (key, value) +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Input: default@test_tab +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED test_tab +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED test_tab +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 94 + totalSize 75 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Both columns should be null +SELECT * FROM test_tab +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: query: -- Both columns should be null +SELECT * FROM test_tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: -- Test truncating with a binary serde +ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@test_tab +PREHOOK: Output: default@test_tab +POSTHOOK: query: -- Test truncating with a binary serde +ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@test_tab +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab +POSTHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED test_tab +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED test_tab +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string from deserializer +value string from deserializer + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 94 + totalSize 185 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: SELECT * FROM test_tab +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +98 val_98 +484 val_484 +PREHOOK: query: -- Truncate 1 column +TRUNCATE TABLE test_tab COLUMNS (key) +PREHOOK: type: TRUNCATETABLE +PREHOOK: Input: default@test_tab +PREHOOK: Output: default@test_tab +POSTHOOK: query: -- Truncate 1 column +TRUNCATE TABLE test_tab COLUMNS (key) +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Input: default@test_tab +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED test_tab +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED test_tab +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string from deserializer +value string from deserializer + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 94 + totalSize 150 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- First column should be null +SELECT * FROM test_tab +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: query: -- First column should be null +SELECT * FROM test_tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL val_238 +NULL val_86 +NULL val_311 +NULL val_27 +NULL val_165 +NULL val_409 +NULL val_255 +NULL val_278 +NULL val_98 +NULL val_484 +PREHOOK: query: -- Truncate 2 columns +TRUNCATE TABLE test_tab COLUMNS (key, value) +PREHOOK: type: TRUNCATETABLE +PREHOOK: Input: default@test_tab +PREHOOK: Output: default@test_tab +POSTHOOK: query: -- Truncate 2 columns +TRUNCATE TABLE test_tab COLUMNS (key, value) +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Input: default@test_tab +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED test_tab +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED test_tab +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string from deserializer +value string from deserializer + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 94 + totalSize 75 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Both columns should be null +SELECT * FROM test_tab +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: query: -- Both columns should be null +SELECT * FROM test_tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: -- Test truncating a partition +CREATE TABLE test_tab_part (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Test truncating a partition +CREATE TABLE test_tab_part (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_tab_part +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_tab_part PARTITION (part = '1') SELECT * FROM src LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab_part@part=1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_tab_part PARTITION (part = '1') SELECT * FROM src LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab_part@part=1 +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED test_tab_part PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED test_tab_part PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_tab_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 10 + rawDataSize 94 + totalSize 185 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: SELECT * FROM test_tab_part WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab_part@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM test_tab_part WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab_part@part=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 1 +86 val_86 1 +311 val_311 1 +27 val_27 1 +165 val_165 1 +409 val_409 1 +255 val_255 1 +278 val_278 1 +98 val_98 1 +484 val_484 1 +PREHOOK: query: TRUNCATE TABLE test_tab_part PARTITION (part = '1') COLUMNS (key) +PREHOOK: type: TRUNCATETABLE +PREHOOK: Input: default@test_tab_part +PREHOOK: Output: default@test_tab_part@part=1 +POSTHOOK: query: TRUNCATE TABLE test_tab_part PARTITION (part = '1') COLUMNS (key) +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Input: default@test_tab_part +POSTHOOK: Output: default@test_tab_part@part=1 +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED test_tab_part PARTITION (part = '1') +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESC FORMATTED test_tab_part PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +# col_name data_type comment + +key string None +value string None + +# Partition Information +# col_name data_type comment + +part string None + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_tab_part +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + numFiles 1 + numRows 10 + rawDataSize 94 + totalSize 150 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- First column should be null +SELECT * FROM test_tab_part WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab_part@part=1 +#### A masked pattern was here #### +POSTHOOK: query: -- First column should be null +SELECT * FROM test_tab_part WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab_part@part=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab_part PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL val_238 1 +NULL val_86 1 +NULL val_311 1 +NULL val_27 1 +NULL val_165 1 +NULL val_409 1 +NULL val_255 1 +NULL val_278 1 +NULL val_98 1 +NULL val_484 1 Index: ql/src/test/results/clientpositive/truncate_column_buckets.q.out =================================================================== --- ql/src/test/results/clientpositive/truncate_column_buckets.q.out (revision 0) +++ ql/src/test/results/clientpositive/truncate_column_buckets.q.out (working copy) @@ -0,0 +1,73 @@ +PREHOOK: query: -- Tests truncating columns from a bucketed table, table should remain bucketed + +CREATE TABLE test_tab (key STRING, value STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Tests truncating columns from a bucketed table, table should remain bucketed + +CREATE TABLE test_tab (key STRING, value STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_tab +PREHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_tab +POSTHOOK: query: INSERT OVERWRITE TABLE test_tab SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Check how many rows there are in each bucket, there should be two rows +SELECT cnt FROM ( +SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM +test_tab GROUP BY INPUT__FILE__NAME +ORDER BY file_name DESC)a +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: query: -- Check how many rows there are in each bucket, there should be two rows +SELECT cnt FROM ( +SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM +test_tab GROUP BY INPUT__FILE__NAME +ORDER BY file_name DESC)a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +258 +242 +PREHOOK: query: -- Truncate a column on which the table is not bucketed +TRUNCATE TABLE test_tab COLUMNS (value) +PREHOOK: type: TRUNCATETABLE +PREHOOK: Input: default@test_tab +PREHOOK: Output: default@test_tab +POSTHOOK: query: -- Truncate a column on which the table is not bucketed +TRUNCATE TABLE test_tab COLUMNS (value) +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Input: default@test_tab +POSTHOOK: Output: default@test_tab +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Check how many rows there are in each bucket, this should produce the same rows as before +-- because truncate should not break bucketing +SELECT cnt FROM ( +SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM +test_tab GROUP BY INPUT__FILE__NAME +ORDER BY file_name DESC)a +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: query: -- Check how many rows there are in each bucket, this should produce the same rows as before +-- because truncate should not break bucketing +SELECT cnt FROM ( +SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM +test_tab GROUP BY INPUT__FILE__NAME +ORDER BY file_name DESC)a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tab +#### A masked pattern was here #### +POSTHOOK: Lineage: test_tab.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +258 +242 Index: serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefArrayWritable.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefArrayWritable.java (revision 1444230) +++ serde/src/java/org/apache/hadoop/hive/serde2/columnar/BytesRefArrayWritable.java (working copy) @@ -31,7 +31,7 @@ * BytesRefArrayWritable holds an array reference to BytesRefWritable, * and is able to resize without recreating new array if not necessary. *

- * + * * Each BytesRefArrayWritable holds instance has a valid field, * which is the desired valid number of BytesRefWritable it holds. * resetValid can reset the valid, but it will not care the underlying @@ -47,7 +47,7 @@ /** * Constructs an empty array with the specified capacity. - * + * * @param capacity * initial capacity * @exception IllegalArgumentException @@ -70,7 +70,7 @@ /** * Returns the number of valid elements. - * + * * @return the number of valid elements */ public int size() { @@ -80,7 +80,7 @@ /** * Gets the BytesRefWritable at the specified position. Make sure the position * is valid by first call resetValid. - * + * * @param index * the position index, starting from zero * @throws IndexOutOfBoundsException @@ -95,7 +95,7 @@ /** * Gets the BytesRefWritable at the specified position without checking. - * + * * @param index * the position index, starting from zero * @throws IndexOutOfBoundsException @@ -107,7 +107,7 @@ /** * Set the BytesRefWritable at the specified position with the specified * BytesRefWritable. - * + * * @param index * index position * @param bytesRefWritable @@ -116,9 +116,6 @@ * if the specified new element is null */ public void set(int index, BytesRefWritable bytesRefWritable) { - if (bytesRefWritable == null) { - throw new IllegalArgumentException("Can not assign null."); - } ensureCapacity(index + 1); bytesRefWritables[index] = bytesRefWritable; if (valid <= index) { @@ -154,7 +151,7 @@ /** * Returns true if this instance contains one or more the specified * BytesRefWritable. - * + * * @param bytesRefWritable * BytesRefWritable element to be tested * @return true if contains the specified element @@ -196,7 +193,7 @@ * elements specified by newValidCapacity argument. It will also narrow the * valid capacity when needed. Notice: it only enlarge or narrow the valid * capacity with no care of the already stored invalid BytesRefWritable. - * + * * @param newValidCapacity * the desired capacity */