Index: shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java =================================================================== --- shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (revision 6843) +++ shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (working copy) @@ -17,10 +17,12 @@ */ package org.apache.hadoop.hive.shims; +import java.io.DataInput; +import java.io.DataOutput; import java.io.IOException; import java.lang.reflect.Constructor; -import java.io.DataOutput; -import java.io.DataInput; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -29,23 +31,25 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapred.JobStatus; +import org.apache.hadoop.mapred.OutputCommitter; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.RunningJob; +import org.apache.hadoop.mapred.TaskAttemptContext; import org.apache.hadoop.mapred.TaskCompletionEvent; import org.apache.hadoop.mapred.TaskID; import org.apache.hadoop.mapred.lib.CombineFileInputFormat; import org.apache.hadoop.mapred.lib.CombineFileSplit; -import org.apache.hadoop.mapred.OutputCommitter; -import org.apache.hadoop.mapred.TaskAttemptContext; -import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapred.lib.NullOutputFormat; -import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.tools.HadoopArchives; +import org.apache.hadoop.util.ToolRunner; /** * Implemention of shims against Hadoop 0.20.0. @@ -377,15 +381,50 @@ conf.setFloat(varName, val); } + @Override + public int createHadoopArchive(Configuration conf, Path sourceDir, Path destDir, + String archiveName) throws Exception { + + HadoopArchives har = new HadoopArchives(conf); + List args = new ArrayList(); + + if (conf.get("hive.archive.har.parentdir.settable") == null) { + throw new RuntimeException("hive.archive.har.parentdir.settable is not set"); + } + boolean parentSettable = + conf.getBoolean("hive.archive.har.parentdir.settable", false); + + if (parentSettable) { + args.add("-archiveName"); + args.add(archiveName); + args.add("-p"); + args.add(sourceDir.toString()); + args.add(destDir.toString()); + } else { + args.add("-archiveName"); + args.add(archiveName); + args.add(sourceDir.toString()); + args.add(destDir.toString()); + } + + return ToolRunner.run(har, args.toArray(new String[0])); + } + public static class NullOutputCommitter extends OutputCommitter { + @Override public void setupJob(JobContext jobContext) { } + @Override public void cleanupJob(JobContext jobContext) { } + @Override public void setupTask(TaskAttemptContext taskContext) { } + @Override public boolean needsTaskCommit(TaskAttemptContext taskContext) { return false; } + @Override public void commitTask(TaskAttemptContext taskContext) { } + @Override public void abortTask(TaskAttemptContext taskContext) { } } Index: shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java =================================================================== --- shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java (revision 0) +++ shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java (revision 0) @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.shims; + +import org.apache.hadoop.fs.HarFileSystem; + +/** + * HiveHarFileSystem - fixes issue with block locations + * + */ +public class HiveHarFileSystem extends HarFileSystem { + /* + @Override + public BlockLocation[] getFileBlockLocations(FileStatus file, long start, + long len) throws IOException { + + // In some places (e.g. FileInputFormat) this BlockLocation is used to + // figure out sizes/offsets and so a completely blank one will not work. + String [] hosts = {"DUMMY_HOST"}; + return new BlockLocation[]{new BlockLocation(null, hosts, 0, file.getLen())}; + } + */ +} Index: shims/src/0.17/java/org/apache/hadoop/hive/shims/Hadoop17Shims.java =================================================================== --- shims/src/0.17/java/org/apache/hadoop/hive/shims/Hadoop17Shims.java (revision 6843) +++ shims/src/0.17/java/org/apache/hadoop/hive/shims/Hadoop17Shims.java (working copy) @@ -127,4 +127,11 @@ public void setNullOutputFormat(JobConf conf) { conf.setOutputFormat(NullOutputFormat.class); } + + @Override + public int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, + String archiveName) throws Exception { + throw new RuntimeException("Not implemented in this Hadoop version"); + } + } Index: shims/src/0.18/java/org/apache/hadoop/hive/shims/Hadoop18Shims.java =================================================================== --- shims/src/0.18/java/org/apache/hadoop/hive/shims/Hadoop18Shims.java (revision 6843) +++ shims/src/0.18/java/org/apache/hadoop/hive/shims/Hadoop18Shims.java (working copy) @@ -128,6 +128,12 @@ conf.set(varName, Float.toString(val)); } + @Override + public int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, + String archiveName) throws Exception { + throw new RuntimeException("Not implemented in this Hadoop version"); + } + public void setNullOutputFormat(JobConf conf) { conf.setOutputFormat(NullOutputFormat.class); } Index: shims/src/0.19/java/org/apache/hadoop/hive/shims/Hadoop19Shims.java =================================================================== --- shims/src/0.19/java/org/apache/hadoop/hive/shims/Hadoop19Shims.java (revision 6843) +++ shims/src/0.19/java/org/apache/hadoop/hive/shims/Hadoop19Shims.java (working copy) @@ -485,6 +485,12 @@ conf.set(varName, Float.toString(val)); } + @Override + public int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, + String archiveName) throws Exception { + throw new RuntimeException("Not implemented in this Hadoop version"); + } + public static class NullOutputCommitter extends OutputCommitter { public void setupJob(JobContext jobContext) { } public void cleanupJob(JobContext jobContext) { } Index: shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java =================================================================== --- shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java (revision 6843) +++ shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java (working copy) @@ -97,7 +97,7 @@ * This is simply the first two components of the version number * (e.g "0.18" or "0.20") */ - private static String getMajorVersion() { + public static String getMajorVersion() { String vers = VersionInfo.getVersion(); String[] parts = vers.split("\\."); Index: shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java =================================================================== --- shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java (revision 6843) +++ shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java (working copy) @@ -131,6 +131,8 @@ */ String[] getTaskJobIDs(TaskCompletionEvent t); + int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, + String archiveName) throws Exception; /** * Hive uses side effect files exclusively for it's output. It also manages * the setup/cleanup/commit of output from the hive client. As a result it does Index: shims/build.xml =================================================================== --- shims/build.xml (revision 6843) +++ shims/build.xml (working copy) @@ -18,8 +18,8 @@ --> - @@ -27,6 +27,7 @@ + Index: conf/hive-default.xml =================================================================== --- conf/hive-default.xml (revision 6843) +++ conf/hive-default.xml (working copy) @@ -563,4 +563,24 @@ The default partition name in case the dynamic partition column value is null/empty string or anyother values that cannot be escaped. This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the dynamic partition value should not contain this value to avoid confusions. + + fs.har.impl + org.apache.hadoop.hive.shims.HiveHarFileSystem + The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop vers less than 0.20 + + + + hive.archive.enabled + false + Whether archiving operations are permitted + + + + hive.archive.har.parentdir.settable + false + In new Hadoop versions, the parent directory must be set while + creating a HAR. Because this functionality is hard to detect with just version + numbers, this conf var needs to be set manually. + + Index: build.properties =================================================================== --- build.properties (revision 6843) +++ build.properties (working copy) @@ -21,6 +21,7 @@ hadoop.root.default=${build.dir.hadoop}/hadoop-${hadoop.version.ant-internal} hadoop.root=${hadoop.root.default} hadoop.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-core.jar +hadoop.tools.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-tools.jar hadoop.test.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-test.jar jetty.test.jar=${hadoop.root}/lib/jetty-5.1.4.jar servlet.test.jar=${hadoop.root}/lib/servlet-api.jar Index: metastore/src/gen-py/hive_metastore/constants.py =================================================================== --- metastore/src/gen-py/hive_metastore/constants.py (revision 6843) +++ metastore/src/gen-py/hive_metastore/constants.py (working copy) @@ -9,6 +9,10 @@ DDL_TIME = "transient_lastDdlTime" +IS_ARCHIVED = "is_archived" + +ORIGINAL_LOCATION = "original_location" + META_TABLE_COLUMNS = "columns" META_TABLE_COLUMN_TYPES = "columns.types" Index: metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (revision 6843) +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (working copy) @@ -1075,13 +1075,23 @@ boolean success = false; Path partPath = null; Table tbl = null; + Partition part = null; + boolean isArchived = false; + Path archiveParentDir = null; + try { ms.openTransaction(); - Partition part = get_partition(db_name, tbl_name, part_vals); + part = get_partition(db_name, tbl_name, part_vals); + if (part == null) { throw new NoSuchObjectException("Partition doesn't exist. " + part_vals); } + + isArchived = MetaStoreUtils.isArchived(part); + if (isArchived) { + archiveParentDir = MetaStoreUtils.getOriginalLocation(part); + } if (part.getSd() == null || part.getSd().getLocation() == null) { throw new MetaException("Partition metadata is corrupted"); } @@ -1094,9 +1104,17 @@ } finally { if (!success) { ms.rollbackTransaction(); - } else if (deleteData && (partPath != null)) { + } else if (deleteData && ((partPath != null) || (archiveParentDir != null))) { if (tbl != null && !isExternal(tbl)) { - wh.deleteDir(partPath, true); + // Archived partitions have har:/to_har_file as their location. + // The original directory was saved in params + if (isArchived) { + assert(archiveParentDir != null); + wh.deleteDir(archiveParentDir, true); + } else { + assert(partPath != null); + wh.deleteDir(partPath, true); + } // ok even if the data is not deleted } } Index: metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java (revision 6843) +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java (working copy) @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.metastore; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import java.util.List; import org.apache.commons.lang.StringUtils; @@ -148,9 +150,25 @@ List parts = msdb.getPartitions(dbname, name, 0); for (Partition part : parts) { String oldPartLoc = part.getSd().getLocation(); - if (oldPartLoc.contains(oldTblLoc)) { - part.getSd().setLocation( - part.getSd().getLocation().replace(oldTblLoc, newTblLoc)); + String oldTblLocPath = new Path(oldTblLoc).toUri().getPath(); + String newTblLocPath = new Path(newTblLoc).toUri().getPath(); + if (oldPartLoc.contains(oldTblLocPath)) { + URI newPartLocUri = null; + try { + URI oldPartLocUri = new URI(oldPartLoc); + newPartLocUri = new URI( + oldPartLocUri.getScheme(), + oldPartLocUri.getUserInfo(), + oldPartLocUri.getHost(), + oldPartLocUri.getPort(), + oldPartLocUri.getPath().replace(oldTblLocPath, newTblLocPath), + oldPartLocUri.getQuery(), + oldPartLocUri.getFragment()); + } catch (URISyntaxException e) { + throw new InvalidOperationException("Old partition location " + + " is invalid. (" + oldPartLoc + ")"); + } + part.getSd().setLocation(newPartLocUri.toString()); msdb.alterPartition(dbname, name, part); } } Index: metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (revision 6843) +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (working copy) @@ -834,6 +834,26 @@ return "TRUE".equalsIgnoreCase(params.get("EXTERNAL")); } + public static boolean isArchived( + org.apache.hadoop.hive.metastore.api.Partition part) { + Map params = part.getParameters(); + if ("true".equalsIgnoreCase(params.get(Constants.IS_ARCHIVED))) { + return true; + } else { + return false; + } + } + + public static Path getOriginalLocation( + org.apache.hadoop.hive.metastore.api.Partition part) { + Map params = part.getParameters(); + assert(isArchived(part)); + String originalLocation = params.get(Constants.ORIGINAL_LOCATION); + assert( originalLocation != null); + + return new Path(originalLocation); + } + public static boolean isNonNativeTable(Table table) { if (table == null) { return false; Index: metastore/src/gen-cpp/hive_metastore_constants.h =================================================================== --- metastore/src/gen-cpp/hive_metastore_constants.h (revision 6843) +++ metastore/src/gen-cpp/hive_metastore_constants.h (working copy) @@ -15,6 +15,8 @@ hive_metastoreConstants(); std::string DDL_TIME; + std::string IS_ARCHIVED; + std::string ORIGINAL_LOCATION; std::string META_TABLE_COLUMNS; std::string META_TABLE_COLUMN_TYPES; std::string BUCKET_FIELD_NAME; Index: metastore/src/gen-cpp/hive_metastore_constants.cpp =================================================================== --- metastore/src/gen-cpp/hive_metastore_constants.cpp (revision 6843) +++ metastore/src/gen-cpp/hive_metastore_constants.cpp (working copy) @@ -12,6 +12,10 @@ hive_metastoreConstants::hive_metastoreConstants() { DDL_TIME = "transient_lastDdlTime"; + IS_ARCHIVED = "is_archived"; + + ORIGINAL_LOCATION = "original_location"; + META_TABLE_COLUMNS = "columns"; META_TABLE_COLUMN_TYPES = "columns.types"; Index: metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java =================================================================== --- metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java (revision 6843) +++ metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java (working copy) @@ -18,6 +18,10 @@ public static final String DDL_TIME = "transient_lastDdlTime"; + public static final String IS_ARCHIVED = "is_archived"; + + public static final String ORIGINAL_LOCATION = "original_location"; + public static final String META_TABLE_COLUMNS = "columns"; public static final String META_TABLE_COLUMN_TYPES = "columns.types"; Index: metastore/src/gen-php/hive_metastore_constants.php =================================================================== --- metastore/src/gen-php/hive_metastore_constants.php (revision 6843) +++ metastore/src/gen-php/hive_metastore_constants.php (working copy) @@ -10,6 +10,10 @@ $GLOBALS['hive_metastore_CONSTANTS']['DDL_TIME'] = "transient_lastDdlTime"; +$GLOBALS['hive_metastore_CONSTANTS']['IS_ARCHIVED'] = "is_archived"; + +$GLOBALS['hive_metastore_CONSTANTS']['ORIGINAL_LOCATION'] = "original_location"; + $GLOBALS['hive_metastore_CONSTANTS']['META_TABLE_COLUMNS'] = "columns"; $GLOBALS['hive_metastore_CONSTANTS']['META_TABLE_COLUMN_TYPES'] = "columns.types"; Index: metastore/if/hive_metastore.thrift =================================================================== --- metastore/if/hive_metastore.thrift (revision 6843) +++ metastore/if/hive_metastore.thrift (working copy) @@ -246,6 +246,15 @@ throws(1: MetaException o1) } +// For storing info about archived partitions in parameters + +// Whether the partition is archived +const string IS_ARCHIVED = "is_archived", +// The original location of the partition, before archiving. After archiving, +// this directory will contain the archive. When the partition +// is dropped, this directory will be deleted +const string ORIGINAL_LOCATION = "original_location", + // these should be needed only for backward compatibility with filestore const string META_TABLE_COLUMNS = "columns", const string META_TABLE_COLUMN_TYPES = "columns.types", @@ -261,3 +270,5 @@ const string FILE_OUTPUT_FORMAT = "file.outputformat", const string META_TABLE_STORAGE = "storage_handler", + + Index: build-common.xml =================================================================== --- build-common.xml (revision 6843) +++ build-common.xml (working copy) @@ -203,6 +203,7 @@ + Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 6843) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -124,9 +124,17 @@ // datastore. Once reloaded, the this value is reset to false. Used for // testing only. METASTOREFORCERELOADCONF("hive.metastore.force.reload.conf", false), - METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200), METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", Integer.MAX_VALUE), + // Intermediate dir suffixes used for archiving. Not important what they + // are, as long as collisions are avoided + METASTORE_INT_ORIGINAL("hive.metastore.archive.intermediate.original", + "_INTERMEDIATE_ORIGINAL"), + METASTORE_INT_ARCHIVED("hive.metastore.archive.intermediate.archived", + "_INTERMEDIATE_ARCHIVED"), + METASTORE_INT_EXTRACTED("hive.metastore.archive.intermediate.extracted", + "_INTERMEDIATE_EXTRACTED"), + // CLI CLIIGNOREERRORS("hive.cli.errors.ignore", false), @@ -238,6 +246,10 @@ HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true), + + // For har files + HIVEARCHIVEENABLED("hive.archive.enabled", false), + HIVEHARPARENTDIRSETTABLE("hive.archive.har.parentdir.settable", false), ; public final String varname; Index: ql/src/test/results/clientnegative/archive1.q.out =================================================================== --- ql/src/test/results/clientnegative/archive1.q.out (revision 0) +++ ql/src/test/results/clientnegative/archive1.q.out (revision 0) @@ -0,0 +1,33 @@ +PREHOOK: query: -- Tests trying to archive a partition twice. +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +CREATE TABLE srcpart_archived LIKE srcpart +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Tests trying to archive a partition twice. +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +CREATE TABLE srcpart_archived LIKE srcpart +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcpart_archived +PREHOOK: query: INSERT OVERWRITE TABLE srcpart_archived PARTITION (ds='2008-04-08', hr='12') +SELECT key, value FROM srcpart WHERE ds='2008-04-08' AND hr='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_archived@ds=2008-04-08/hr=12 +POSTHOOK: query: INSERT OVERWRITE TABLE srcpart_archived PARTITION (ds='2008-04-08', hr='12') +SELECT key, value FROM srcpart WHERE ds='2008-04-08' AND hr='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_archived@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: srcpart_archived PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_archived PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: query: ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: Lineage: srcpart_archived PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_archived PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_ARCHIVE +FAILED: Error in metadata: Specified partition is already archived +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask Index: ql/src/test/results/clientnegative/archive5.q.out =================================================================== --- ql/src/test/results/clientnegative/archive5.q.out (revision 0) +++ ql/src/test/results/clientnegative/archive5.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: Partition value contains a reserved substring (User value: 14_INTERMEDIATE_ORIGINAL Reserved substring: _INTERMEDIATE_ORIGINAL) Index: ql/src/test/results/clientnegative/archive4.q.out =================================================================== --- ql/src/test/results/clientnegative/archive4.q.out (revision 0) +++ ql/src/test/results/clientnegative/archive4.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: ARCHIVE can only be run on a single partition Index: ql/src/test/results/clientnegative/archive3.q.out =================================================================== --- ql/src/test/results/clientnegative/archive3.q.out (revision 0) +++ ql/src/test/results/clientnegative/archive3.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: ARCHIVE can only be run on partitions Index: ql/src/test/results/clientnegative/archive2.q.out =================================================================== --- ql/src/test/results/clientnegative/archive2.q.out (revision 0) +++ ql/src/test/results/clientnegative/archive2.q.out (revision 0) @@ -0,0 +1,7 @@ +PREHOOK: query: -- Tests trying to unarchive a non-archived partition +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_UNARCHIVE +FAILED: Error in metadata: Specified partition is not archived +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask Index: ql/src/test/results/clientpositive/archive.q.out =================================================================== --- ql/src/test/results/clientpositive/archive.q.out (revision 0) +++ ql/src/test/results/clientpositive/archive.q.out (revision 0) @@ -0,0 +1,195 @@ +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-31_915_8404207959149265563/10000 +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-31_915_8404207959149265563/10000 +48479881068 +PREHOOK: query: ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: query: ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_ARCHIVE +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-39_278_6500531861845897423/10000 +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-39_278_6500531861845897423/10000 +48479881068 +PREHOOK: query: ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_UNARCHIVE +POSTHOOK: query: ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_UNARCHIVE +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-45_152_7929745238260502728/10000 +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-45_152_7929745238260502728/10000 +48479881068 +PREHOOK: query: CREATE TABLE harbucket(key INT) +PARTITIONED by (ds STRING) +CLUSTERED BY (key) INTO 10 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE harbucket(key INT) +PARTITIONED by (ds STRING) +CLUSTERED BY (key) INTO 10 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@harbucket +PREHOOK: query: INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM src WHERE key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@harbucket@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM src WHERE key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@harbucket@ds=1 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@harbucket@ds=1 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-55_224_4935516234179357829/10000 +POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@harbucket@ds=1 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-55_224_4935516234179357829/10000 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +0 +0 +0 +10 +20 +30 +PREHOOK: query: ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: query: ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@harbucket@ds=1 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-01_089_7613007639376060720/10000 +POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@harbucket@ds=1 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-01_089_7613007639376060720/10000 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +0 +0 +0 +10 +20 +30 +PREHOOK: query: ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_UNARCHIVE +POSTHOOK: query: ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_UNARCHIVE +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@harbucket@ds=1 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-05_256_2444261282224863204/10000 +POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@harbucket@ds=1 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-05_256_2444261282224863204/10000 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +0 +0 +0 +10 +20 +30 +PREHOOK: query: DROP TABLE harbucket +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE harbucket +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@harbucket +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE old_name(key INT) +PARTITIONED by (ds STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE old_name(key INT) +PARTITIONED by (ds STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@old_name +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM src WHERE key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@old_name@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM src WHERE key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@old_name@ds=1 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE old_name ARCHIVE PARTITION (ds='1') +PREHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: query: ALTER TABLE old_name ARCHIVE PARTITION (ds='1') +POSTHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@old_name@ds=1 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-14_435_1169638822418513482/10000 +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@old_name@ds=1 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-14_435_1169638822418513482/10000 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +48656137 +PREHOOK: query: ALTER TABLE old_name RENAME TO new_name +PREHOOK: type: ALTERTABLE_RENAME +POSTHOOK: query: ALTER TABLE old_name RENAME TO new_name +POSTHOOK: type: ALTERTABLE_RENAME +POSTHOOK: Input: default@old_name +POSTHOOK: Output: default@new_name +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@new_name@ds=1 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-19_685_3074346646787769085/10000 +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@new_name@ds=1 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-19_685_3074346646787769085/10000 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +NULL +PREHOOK: query: DROP TABLE new_name +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE new_name +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@new_name +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] Index: ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (revision 6843) +++ ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (working copy) @@ -29,8 +29,10 @@ import java.net.URI; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Set; import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -80,6 +82,7 @@ private final String outDir; private final String logDir; private final TreeMap qMap; + private final Set qSkipSet; private final LinkedList srcTables; private ParseDriver pd; @@ -173,6 +176,7 @@ conf = new HiveConf(Driver.class); this.miniMr = miniMr; qMap = new TreeMap(); + qSkipSet = new HashSet(); if (miniMr) { dfs = ShimLoader.getHadoopShims().getMiniDfs(conf, 4, true, null); @@ -230,12 +234,44 @@ DataInputStream dis = new DataInputStream(bis); StringBuilder qsb = new StringBuilder(); + // Look for a hint to not run a test on some Hadoop versions + Pattern pattern = Pattern.compile("-- EXCLUDE_HADOOP_MAJOR_VERSIONS(.*)"); + + // Read the entire query + boolean excludeQuery = false; + String hadoopVer = ShimLoader.getMajorVersion(); while (dis.available() != 0) { - qsb.append(dis.readLine() + "\n"); + String line = dis.readLine(); + + // While we are reading the lines, detect whether this query wants to be + // excluded from running because the Hadoop version is incorrect + Matcher matcher = pattern.matcher(line); + if (matcher.find()) { + String group = matcher.group(); + int start = group.indexOf('('); + int end = group.indexOf(')'); + assert end > start; + // versions might be something like '0.17, 0.19' + String versions = group.substring(start+1, end); + + Set excludedVersionSet = new HashSet(); + for (String s : versions.split("\\,")) { + s = s.trim(); + excludedVersionSet.add(s); + } + if (excludedVersionSet.contains(hadoopVer)) { + excludeQuery = true; + } + } + qsb.append(line + "\n"); } qMap.put(qf.getName(), qsb.toString()); - + if(excludeQuery) { + System.out.println("Due to the Hadoop Version ("+ hadoopVer + "), " + + "adding query " + qf.getName() + " to the set of tests to skip"); + qSkipSet.add(qf.getName()); + } dis.close(); } @@ -504,6 +540,10 @@ return cliDriver.processLine(qMap.get(tname)); } + public boolean shouldBeSkipped(String tname) { + return qSkipSet.contains(tname); + } + public void convertSequenceFileToTextFile() throws Exception { // Create an instance of hive in order to create the tables testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE); @@ -745,6 +785,7 @@ public int checkCliDriverResults(String tname) throws Exception { String[] cmdArray; + assert(qMap.containsKey(tname)); cmdArray = new String[] { "diff", "-a", @@ -816,7 +857,7 @@ /** * QTRunner: Runnable class for running a a single query file. - * + * **/ public static class QTRunner implements Runnable { private final QTestUtil qt; @@ -845,7 +886,7 @@ /** * executes a set of query files either in sequence or in parallel. Uses * QTestUtil to do so - * + * * @param qfiles * array of input query files containing arbitrary number of hive * queries @@ -855,7 +896,7 @@ * @param mt * whether to run in multithreaded mode or not * @return true if all the query files were executed successfully, else false - * + * * In multithreaded mode each query file is run in a separate thread. * the caller has to arrange that different query files do not collide * (in terms of destination tables) @@ -923,7 +964,7 @@ } return (!failed); } - + public static void outputTestFailureHelpMessage() { System.err.println("See build/ql/tmp/hive.log, " + "or try \"ant test ... -Dtest.silent=false\" to get more logs."); Index: ql/src/test/queries/clientnegative/archive2.q =================================================================== --- ql/src/test/queries/clientnegative/archive2.q (revision 0) +++ ql/src/test/queries/clientnegative/archive2.q (revision 0) @@ -0,0 +1,5 @@ +set hive.archive.enabled = true; +-- Tests trying to unarchive a non-archived partition +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); Index: ql/src/test/queries/clientnegative/archive3.q =================================================================== --- ql/src/test/queries/clientnegative/archive3.q (revision 0) +++ ql/src/test/queries/clientnegative/archive3.q (revision 0) @@ -0,0 +1,5 @@ +set hive.archive.enabled = true; +-- Tests archiving a table +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +ALTER TABLE srcpart ARCHIVE; Index: ql/src/test/queries/clientnegative/archive4.q =================================================================== --- ql/src/test/queries/clientnegative/archive4.q (revision 0) +++ ql/src/test/queries/clientnegative/archive4.q (revision 0) @@ -0,0 +1,5 @@ +set hive.archive.enabled = true; +-- Tests archiving multiple partitions +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') PARTITION (ds='2008-04-08', hr='11'); Index: ql/src/test/queries/clientnegative/archive1.q =================================================================== --- ql/src/test/queries/clientnegative/archive1.q (revision 0) +++ ql/src/test/queries/clientnegative/archive1.q (revision 0) @@ -0,0 +1,11 @@ +set hive.archive.enabled = true; +-- Tests trying to archive a partition twice. +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +CREATE TABLE srcpart_archived LIKE srcpart; + +INSERT OVERWRITE TABLE srcpart_archived PARTITION (ds='2008-04-08', hr='12') +SELECT key, value FROM srcpart WHERE ds='2008-04-08' AND hr='12'; + +ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12'); +ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12'); Index: ql/src/test/queries/clientnegative/archive5.q =================================================================== --- ql/src/test/queries/clientnegative/archive5.q (revision 0) +++ ql/src/test/queries/clientnegative/archive5.q (revision 0) @@ -0,0 +1,5 @@ +set hive.archive.enabled = true; +-- Tests creating a partition where the partition value will collide with the +-- a intermediate directory + +ALTER TABLE srcpart ADD PARTITION (ds='2008-04-08', hr='14_INTERMEDIATE_ORIGINAL') Index: ql/src/test/queries/clientpositive/archive.q =================================================================== --- ql/src/test/queries/clientpositive/archive.q (revision 0) +++ ql/src/test/queries/clientpositive/archive.q (revision 0) @@ -0,0 +1,44 @@ +set hive.archive.enabled = true; +set hive.enforce.bucketing = true; + +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2; + +ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2; + +ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2; + +CREATE TABLE harbucket(key INT) +PARTITIONED by (ds STRING) +CLUSTERED BY (key) INTO 10 BUCKETS; + +INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM src WHERE key < 50; + +SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; +ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); +SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; +ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); +SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; + +DROP TABLE harbucket; + +CREATE TABLE old_name(key INT) +PARTITIONED by (ds STRING); + +INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM src WHERE key < 50; +ALTER TABLE old_name ARCHIVE PARTITION (ds='1'); +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2; +ALTER TABLE old_name RENAME TO new_name; +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2; + +DROP TABLE new_name; Index: ql/src/test/templates/TestCliDriver.vm =================================================================== --- ql/src/test/templates/TestCliDriver.vm (revision 6843) +++ ql/src/test/templates/TestCliDriver.vm (working copy) @@ -84,6 +84,10 @@ qt.addFile("$qf.getCanonicalPath()"); + if (qt.shouldBeSkipped("$fname")) { + return; + } + qt.cliInit("$fname"); int ecode = qt.executeClient("$fname"); if (ecode != 0) { Index: ql/src/test/templates/TestNegativeCliDriver.vm =================================================================== --- ql/src/test/templates/TestNegativeCliDriver.vm (revision 6843) +++ ql/src/test/templates/TestNegativeCliDriver.vm (working copy) @@ -59,6 +59,11 @@ qt.addFile("$qf.getCanonicalPath()"); + if (qt.shouldBeSkipped("$fname")) { + System.out.println("Test $fname skipped"); + return; + } + qt.cliInit("$fname"); int ecode = qt.executeClient("$fname"); if (ecode == 0) { Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 6843) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy) @@ -51,7 +51,7 @@ /** * A Hive Table Partition: is a fundamental storage unit within a Table. - * + * * Please note that the ql code should always go through methods of this class to access the * metadata, instead of directly accessing org.apache.hadoop.hive.metastore.api.Partition. * This helps to isolate the metastore code and the ql code. @@ -72,7 +72,7 @@ private Class outputFormatClass; private Class inputFormatClass; private URI uri; - + /** * @return The values of the partition * @see org.apache.hadoop.hive.metastore.api.Partition#getValues() @@ -82,17 +82,17 @@ } /** - * Used only for serialization. + * Used only for serialization. */ public Partition() { } - + /** * create an empty partition. * SemanticAnalyzer code requires that an empty partition when the table is not partitioned. */ public Partition(Table tbl) throws HiveException { - org.apache.hadoop.hive.metastore.api.Partition tPart = + org.apache.hadoop.hive.metastore.api.Partition tPart = new org.apache.hadoop.hive.metastore.api.Partition(); tPart.setSd(tbl.getTTable().getSd()); // TODO: get a copy initialize(tbl, tPart); @@ -105,7 +105,7 @@ /** * Create partition object with the given info. - * + * * @param tbl * Table the partition will be in. * @param partSpec @@ -158,7 +158,7 @@ /** * Initializes this object with the given variables - * + * * @param table * Table the partition belongs to * @param tPartition @@ -265,7 +265,7 @@ clsName = tPartition.getSd().getInputFormat(); } if (clsName == null) { - clsName = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName(); + clsName = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName(); } try { inputFormatClass = ((Class) Class.forName(clsName, true, @@ -285,10 +285,10 @@ clsName = tPartition.getSd().getOutputFormat(); } if (clsName == null) { - clsName = HiveSequenceFileOutputFormat.class.getName(); + clsName = HiveSequenceFileOutputFormat.class.getName(); } try { - Class c = (Class)(Class.forName(clsName, true, + Class c = (Class.forName(clsName, true, JavaUtils.getClassLoader())); // Replace FileOutputFormat for backward compatibility if (!HiveOutputFormat.class.isAssignableFrom(c)) { @@ -312,7 +312,7 @@ /* * TODO: Keeping this code around for later use when we will support * sampling on tables which are not created with CLUSTERED INTO clause - * + * * // read from table meta data int numBuckets = this.table.getNumBuckets(); * if (numBuckets == -1) { // table meta data does not have bucket * information // check if file system has multiple buckets(files) in this @@ -344,7 +344,9 @@ @SuppressWarnings("nls") public Path getBucketPath(int bucketNum) { try { - FileSystem fs = FileSystem.get(table.getDataLocation(), Hive.get() + // Previously, this got the filesystem of the Table, which could be + // different from the filesystem of the partition. + FileSystem fs = FileSystem.get(getPartitionPath().toUri(), Hive.get() .getConf()); String pathPattern = getPartitionPath().toString(); if (getBucketCount() > 0) { @@ -445,7 +447,7 @@ public org.apache.hadoop.hive.metastore.api.Partition getTPartition() { return tPartition; } - + /** * Should be only used by serialization. */ @@ -462,5 +464,11 @@ return tPartition.getSd().getCols(); } - + public String getLocation() { + return tPartition.getSd().getLocation(); + } + + public void setLocation(String location) { + tPartition.getSd().setLocation(location); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 6843) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -28,6 +28,8 @@ import java.io.OutputStreamWriter; import java.io.Serializable; import java.io.Writer; +import java.net.URI; +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; @@ -43,8 +45,10 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; @@ -66,6 +70,7 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc; +import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc; import org.apache.hadoop.hive.ql.plan.CreateViewDesc; @@ -78,7 +83,7 @@ import org.apache.hadoop.hive.ql.plan.ShowPartitionsDesc; import org.apache.hadoop.hive.ql.plan.ShowTableStatusDesc; import org.apache.hadoop.hive.ql.plan.ShowTablesDesc; -import org.apache.hadoop.hive.ql.plan.TouchDesc; +import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; @@ -88,8 +93,9 @@ import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; - +import org.apache.hadoop.util.ToolRunner; /** * DDLTask implementation. * @@ -102,6 +108,12 @@ private static final int separator = Utilities.tabCode; private static final int terminator = Utilities.newLineCode; + // These are suffixes attached to intermediate directory names used in the + // archiving / un-archiving process. + private static String INTERMEDIATE_ARCHIVED_DIR_SUFFIX; + private static String INTERMEDIATE_ORIGINAL_DIR_SUFFIX; + private static String INTERMEDIATE_EXTRACTED_DIR_SUFFIX; + public DDLTask() { super(); } @@ -110,6 +122,13 @@ public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) { super.initialize(conf, queryPlan, ctx); this.conf = conf; + + INTERMEDIATE_ARCHIVED_DIR_SUFFIX = + HiveConf.getVar(conf, ConfVars.METASTORE_INT_ARCHIVED); + INTERMEDIATE_ORIGINAL_DIR_SUFFIX = + HiveConf.getVar(conf, ConfVars.METASTORE_INT_ORIGINAL); + INTERMEDIATE_EXTRACTED_DIR_SUFFIX = + HiveConf.getVar(conf, ConfVars.METASTORE_INT_EXTRACTED); } @Override @@ -150,12 +169,17 @@ return addPartition(db, addPartitionDesc); } - TouchDesc touchDesc = work.getTouchDesc(); - if (touchDesc != null) { - return touch(db, touchDesc); + AlterTableSimpleDesc simpleDesc = work.getAlterTblSimpleDesc(); + + if(simpleDesc != null) { + if (simpleDesc.getType() == AlterTableTypes.TOUCH) { + return touch(db, simpleDesc); + } else if (simpleDesc.getType() == AlterTableTypes.ARCHIVE) { + return archive(db, simpleDesc, driverContext); + } else if (simpleDesc.getType() == AlterTableTypes.UNARCHIVE) { + return unarchive(db, simpleDesc); + } } - - MsckDesc msckDesc = work.getMsckDesc(); if (msckDesc != null) { return msck(db, msckDesc); @@ -257,7 +281,7 @@ * @return * @throws HiveException */ - private int touch(Hive db, TouchDesc touchDesc) + private int touch(Hive db, AlterTableSimpleDesc touchDesc) throws HiveException { String dbName = touchDesc.getDbName(); @@ -290,7 +314,500 @@ } return 0; } + /** + * Determines whether a partition has been archived + * + * @param p + * @return + */ + private boolean isArchived(Partition p) { + Map params = p.getParameters(); + if ("true".equalsIgnoreCase(params.get( + org.apache.hadoop.hive.metastore.api.Constants.IS_ARCHIVED))) { + return true; + } else { + return false; + } + } + + private void setIsArchived(Partition p, boolean state) { + Map params = p.getParameters(); + if (state) { + params.put(org.apache.hadoop.hive.metastore.api.Constants.IS_ARCHIVED, + "true"); + } else { + params.remove(org.apache.hadoop.hive.metastore.api.Constants.IS_ARCHIVED); + } + } + + private String getOriginalLocation(Partition p) { + Map params = p.getParameters(); + return params.get( + org.apache.hadoop.hive.metastore.api.Constants.ORIGINAL_LOCATION); + } + + private void setOriginalLocation(Partition p, String loc) { + Map params = p.getParameters(); + if (loc == null) { + params.remove(org.apache.hadoop.hive.metastore.api.Constants.ORIGINAL_LOCATION); + } else { + params.put(org.apache.hadoop.hive.metastore.api.Constants.ORIGINAL_LOCATION, loc); + } + } + + // Returns only the path component of the URI + private String getArchiveDirOnly(Path parentDir, String archiveName) { + URI parentUri = parentDir.toUri(); + Path harDir = new Path(parentUri.getPath(), archiveName); + return harDir.toString(); + } + + /** + * Sets the appropriate attributes in the supplied Partition object to mark + * it as archived. Note that the metastore is not touched - a separate + * call to alter_partition is needed. + * + * @param p - the partition object to modify + * @param parentDir - the parent directory of the archive, which is the + * original directory that the partition's files resided in + * @param dirInArchive - the directory within the archive file that contains + * the partitions files + * @param archiveName - the name of the archive + * @throws URISyntaxException + */ + private void setArchived(Partition p, Path parentDir, String dirInArchive, String archiveName) + throws URISyntaxException { + assert(isArchived(p) == false); + Map params = p.getParameters(); + + URI parentUri = parentDir.toUri(); + String parentHost = parentUri.getHost(); + String harHost = null; + if (parentHost == null) { + harHost = ""; + } else { + harHost = parentUri.getScheme() + "-" + parentHost; + } + + // harUri is used to access the partition's files, which are in the archive + // The format of the RI is something like: + // har://underlyingfsscheme-host:port/archivepath + URI harUri = null; + if (dirInArchive.length() == 0) { + harUri = new URI("har", parentUri.getUserInfo(), harHost, parentUri.getPort(), + getArchiveDirOnly(parentDir, archiveName), + parentUri.getQuery(), parentUri.getFragment()); + } else { + harUri = new URI("har", parentUri.getUserInfo(), harHost, parentUri.getPort(), + new Path(getArchiveDirOnly(parentDir, archiveName), dirInArchive).toUri().getPath(), + parentUri.getQuery(), parentUri.getFragment()); + } + setIsArchived(p, true); + setOriginalLocation(p, parentDir.toString()); + p.setLocation(harUri.toString()); + } + + /** + * Sets the appropriate attributes in the supplied Partition object to mark + * it as not archived. Note that the metastore is not touched - a separate + * call to alter_partition is needed. + * + * @param p - the partition to modify + */ + private void setUnArchived(Partition p) { + assert(isArchived(p) == true); + String parentDir = getOriginalLocation(p); + setIsArchived(p, false); + setOriginalLocation(p, null); + assert(parentDir != null); + p.setLocation(parentDir); + } + + private boolean pathExists(FileSystem fs, Path p) throws HiveException { + try { + return fs.exists(p); + } catch (IOException e) { + throw new HiveException(e); + } + } + + private void moveDir(FileSystem fs, Path from, Path to) throws HiveException { + try { + if (!fs.rename(from, to)) { + throw new HiveException("Moving " + from + " to " + to + " failed!"); + } + } catch (IOException e) { + throw new HiveException(e); + } + } + + private void deleteDir(Path dir) throws HiveException { + try { + Warehouse wh = new Warehouse(conf); + wh.deleteDir(dir, true); + } catch (MetaException e) { + throw new HiveException(e); + } + } + + private int archive(Hive db, AlterTableSimpleDesc simpleDesc, DriverContext driverContext) + throws HiveException { + String dbName = simpleDesc.getDbName(); + String tblName = simpleDesc.getTableName(); + + Table tbl = db.getTable(dbName, tblName); + validateAlterTableType(tbl, AlterTableDesc.AlterTableTypes.ARCHIVE); + + Map partSpec = simpleDesc.getPartSpec(); + Partition p = db.getPartition(tbl, partSpec, false); + + if (tbl.getTableType() != TableType.MANAGED_TABLE) { + throw new HiveException("ARCHIVE can only be performed on managed tables"); + } + + if (p == null) { + throw new HiveException("Specified partition does not exist"); + } + + if (isArchived(p)) { + // If there were a failure right after the metadata was updated in an + // archiving operation, it's possible that the original, unarchived files + // weren't deleted. + Path originalDir = new Path(getOriginalLocation(p)); + Path leftOverIntermediateOriginal = new Path(originalDir.getParent(), + originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX); + try { + if (pathExists(leftOverIntermediateOriginal.getFileSystem(conf), + leftOverIntermediateOriginal)) { + console.printInfo("Deleting " + leftOverIntermediateOriginal + + " left over from a previous archiving operation"); + deleteDir(leftOverIntermediateOriginal); + } + } catch (IOException e) { + throw new HiveException(e); + } + throw new HiveException("Specified partition is already archived"); + } + + Path originalDir = p.getPartitionPath(); + Path intermediateArchivedDir = new Path(originalDir.getParent(), + originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX); + Path intermediateOriginalDir = new Path(originalDir.getParent(), + originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX); + String archiveName = "data.har"; + FileSystem fs = null; + try { + fs = originalDir.getFileSystem(conf); + } catch (IOException e) { + throw new HiveException(e); + } + + // The following steps seem roundabout, but they are meant to aid in + // recovery if a failure occurs and to keep a consistent state in the FS + + // Steps: + // 1. Create the archive in a temporary folder + // 2. Move the archive dir to an intermediate dir that is in at the same + // dir as the original partition dir. Call the new dir + // intermediate-archive. + // 3. Rename the original partition dir to an intermediate dir. Call the + // renamed dir intermediate-original + // 4. Rename intermediate-archive to the original partition dir + // 5. Change the metadata + // 6. Delete the original partition files in intermediate-original + + // The original partition files are deleted after the metadata change + // because the presence of those files are used to indicate whether + // the original partition directory contains archived or unarchived files. + + // Create an archived version of the partition in a directory ending in + // ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition, + // if it does not already exist. If it does exist, we assume the dir is good + // to use as the move operation that created it is atomic. + if (!pathExists(fs, intermediateArchivedDir) && + !pathExists(fs, intermediateOriginalDir)) { + + // First create the archive in a tmp dir so that if the job fails, the + // bad files don't pollute the filesystem + Path tmpDir = new Path(driverContext.getCtx().getMRScratchDir(), "partlevel"); + + console.printInfo("Creating " + archiveName + " for " + originalDir.toString()); + console.printInfo("in " + tmpDir); + console.printInfo("Please wait... (this may take a while)"); + + // Create the Hadoop archive + HadoopShims shim = ShimLoader.getHadoopShims(); + int ret=0; + try { + ret = shim.createHadoopArchive(conf, originalDir, tmpDir, archiveName); + } catch (Exception e) { + throw new HiveException(e); + } + if (ret != 0) { + throw new HiveException("Error while creating HAR"); + } + // Move from the tmp dir to an intermediate directory, in the same level as + // the partition directory. e.g. .../hr=12-intermediate-archived + try { + console.printInfo("Moving " + tmpDir + " to " + intermediateArchivedDir); + if (pathExists(fs, intermediateArchivedDir)) { + throw new HiveException("The intermediate archive directory already exists."); + } + fs.rename(tmpDir, intermediateArchivedDir); + } catch (IOException e) { + throw new HiveException("Error while moving tmp directory"); + } + } else { + if (pathExists(fs, intermediateArchivedDir)) { + console.printInfo("Intermediate archive directory " + intermediateArchivedDir + + " already exists. Assuming it contains an archived version of the partition"); + } + } + + // If we get to here, we know that we've archived the partition files, but + // they may be in the original partition location, or in the intermediate + // original dir. + + // Move the original parent directory to the intermediate original directory + // if the move hasn't been made already + if (!pathExists(fs, intermediateOriginalDir)) { + console.printInfo("Moving " + originalDir + " to " + + intermediateOriginalDir); + moveDir(fs, originalDir, intermediateOriginalDir); + } else { + console.printInfo(intermediateOriginalDir + " already exists. " + + "Assuming it contains the original files in the partition"); + } + + // If there's a failure from here to when the metadata is updated, + // there will be no data in the partition, or an error while trying to read + // the partition (if the archive files have been moved to the original + // partition directory.) But re-running the archive command will allow + // recovery + + // Move the intermediate archived directory to the original parent directory + if (!pathExists(fs, originalDir)) { + console.printInfo("Moving " + intermediateArchivedDir + " to " + + originalDir); + moveDir(fs, intermediateArchivedDir, originalDir); + } else { + console.printInfo(originalDir + " already exists. " + + "Assuming it contains the archived version of the partition"); + } + + // Record this change in the metastore + try { + boolean parentSettable = + conf.getBoolVar(HiveConf.ConfVars.HIVEHARPARENTDIRSETTABLE); + + // dirInArchive is the directory within the archive that has all the files + // for this partition. With older versions of Hadoop, archiving a + // a directory would produce the same directory structure + // in the archive. So if you created myArchive.har of /tmp/myDir, the + // files in /tmp/myDir would be located under myArchive.har/tmp/myDir/* + // In this case, dirInArchive should be tmp/myDir + + // With newer versions of Hadoop, the parent directory could be specified. + // Assuming the parent directory was set to /tmp/myDir when creating the + // archive, the files can be found under myArchive.har/* + // In this case, dirInArchive should be empty + + String dirInArchive = ""; + if (!parentSettable) { + dirInArchive = originalDir.toUri().getPath(); + if(dirInArchive.length() > 1 && dirInArchive.charAt(0)=='/') { + dirInArchive = dirInArchive.substring(1); + } + } + setArchived(p, originalDir, dirInArchive, archiveName); + db.alterPartition(tblName, p); + } catch (Exception e) { + throw new HiveException("Unable to change the partition info for HAR", e); + } + + // If a failure occurs here, the directory containing the original files + // will not be deleted. The user will run ARCHIVE again to clear this up + deleteDir(intermediateOriginalDir); + + + return 0; + } + + private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) + throws HiveException { + String dbName = simpleDesc.getDbName(); + String tblName = simpleDesc.getTableName(); + + Table tbl = db.getTable(dbName, tblName); + validateAlterTableType(tbl, AlterTableDesc.AlterTableTypes.UNARCHIVE); + + // Means user specified a table, not a partition + if (simpleDesc.getPartSpec() == null) { + throw new HiveException("ARCHIVE is for partitions only"); + } + + Map partSpec = simpleDesc.getPartSpec(); + Partition p = db.getPartition(tbl, partSpec, false); + + if (tbl.getTableType() != TableType.MANAGED_TABLE) { + throw new HiveException("UNARCHIVE can only be performed on managed tables"); + } + + if (p == null) { + throw new HiveException("Specified partition does not exist"); + } + + if (!isArchived(p)) { + Path location = new Path(p.getLocation()); + Path leftOverArchiveDir = new Path(location.getParent(), + location.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX); + + try { + if (pathExists(location.getFileSystem(conf), leftOverArchiveDir)) { + console.printInfo("Deleting " + leftOverArchiveDir + " left over " + + "from a previous unarchiving operation"); + deleteDir(leftOverArchiveDir); + } + } catch (IOException e) { + throw new HiveException(e); + } + throw new HiveException("Specified partition is not archived"); + } + + Path originalLocation = new Path(getOriginalLocation(p)); + Path sourceDir = new Path(p.getLocation()); + Path intermediateArchiveDir = new Path(originalLocation.getParent(), + originalLocation.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX); + Path intermediateExtractedDir = new Path(originalLocation.getParent(), + originalLocation.getName() + INTERMEDIATE_EXTRACTED_DIR_SUFFIX); + + Path tmpDir = new Path(driverContext.getCtx().getMRScratchDir()); + + FileSystem fs = null; + try { + fs = tmpDir.getFileSystem(conf); + // Verify that there are no files in the tmp dir, because if there are, it + // would be copied to the partition + FileStatus [] filesInTmpDir = fs.listStatus(tmpDir); + if (filesInTmpDir.length != 0) { + for (FileStatus file : filesInTmpDir) { + console.printInfo(file.getPath().toString()); + } + throw new HiveException("Temporary directory " + tmpDir + " is not empty"); + } + + } catch (IOException e) { + throw new HiveException(e); + } + + // Some sanity checks + if (originalLocation == null) { + throw new HiveException("Missing archive data in the partition"); + } + if (!"har".equals(sourceDir.toUri().getScheme())) { + throw new HiveException("Location should refer to a HAR"); + } + + // Clarification of terms: + // - The originalLocation directory represents the original directory of the + // partition's files. They now contain an archived version of those files + // eg. hdfs:/warehouse/myTable/ds=1/ + // - The source directory is the directory containing all the files that + // should be in the partition. e.g. har:/warehouse/myTable/ds=1/myTable.har/ + // Note the har:/ scheme + + // Steps: + // 1. Extract the archive in a temporary folder + // 2. Move the archive dir to an intermediate dir that is in at the same + // dir as originalLocation. Call the new dir intermediate-extracted. + // 3. Rename the original partition dir to an intermediate dir. Call the + // renamed dir intermediate-archive + // 4. Rename intermediate-extracted to the original partition dir + // 5. Change the metadata + // 6. Delete the archived partition files in intermediate-archive + + if (!pathExists(fs, intermediateExtractedDir) && + !pathExists(fs, intermediateArchiveDir)) { + try { + + // Copy the files out of the archive into the temporary directory + String copySource = (new Path(sourceDir, "*")).toString(); + String copyDest = tmpDir.toString(); + List args = new ArrayList(); + args.add("-cp"); + args.add(copySource); + args.add(copyDest); + + console.printInfo("Copying " + copySource + " to " + copyDest); + FsShell fss = new FsShell(conf); + int ret = 0; + try { + ret = ToolRunner.run(fss, args.toArray(new String[0])); + } catch (Exception e) { + throw new HiveException(e); + } + if (ret != 0) { + throw new HiveException("Error while copying files from archive"); + } + + console.printInfo("Moving " + tmpDir + " to " + intermediateExtractedDir); + if (fs.exists(intermediateExtractedDir)) { + throw new HiveException("Invalid state: the intermediate extracted " + + "directory already exists."); + } + fs.rename(tmpDir, intermediateExtractedDir); + } catch (Exception e) { + throw new HiveException(e); + } + } + + // At this point, we know that the extracted files are in the intermediate + // extracted dir, or in the the original directory. + + if (!pathExists(fs, intermediateArchiveDir)) { + try { + console.printInfo("Moving " + originalLocation + " to " + intermediateArchiveDir); + fs.rename(originalLocation, intermediateArchiveDir); + } catch (IOException e) { + throw new HiveException(e); + } + } else { + console.printInfo(intermediateArchiveDir + " already exists. " + + "Assuming it contains the archived version of the partition"); + } + + // If there is a failure from here to until when the metadata is changed, + // the partition will be empty or throw errors on read. + + // If the original location exists here, then it must be the extracted files + // because in the previous step, we moved the previous original location + // (containing the archived version of the files) to intermediateArchiveDir + if (!pathExists(fs, originalLocation)) { + try { + console.printInfo("Moving " + intermediateExtractedDir + " to " + originalLocation); + fs.rename(intermediateExtractedDir, originalLocation); + } catch (IOException e) { + throw new HiveException(e); + } + } else { + console.printInfo(originalLocation + " already exists. " + + "Assuming it contains the extracted files in the partition"); + } + + setUnArchived(p); + try { + db.alterPartition(tblName, p); + } catch (InvalidOperationException e) { + throw new HiveException(e); + } + // If a failure happens here, the intermediate archive files won't be + // deleted. The user will need to call unarchive again to clear those up. + deleteDir(intermediateArchiveDir); + + return 0; + } + private void validateAlterTableType( Table tbl, AlterTableDesc.AlterTableTypes alterType) throws HiveException { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (revision 6843) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (working copy) @@ -42,7 +42,7 @@ private ShowPartitionsDesc showPartsDesc; private DescTableDesc descTblDesc; private AddPartitionDesc addPartitionDesc; - private TouchDesc touchDesc; + private AlterTableSimpleDesc alterTblSimpleDesc; private MsckDesc msckDesc; private ShowTableStatusDesc showTblStatusDesc; @@ -183,10 +183,10 @@ * information about the table/partitions that we want to touch */ public DDLWork(HashSet inputs, HashSet outputs, - TouchDesc touchDesc) { + AlterTableSimpleDesc simpleDesc) { this(inputs, outputs); - this.touchDesc = touchDesc; + this.alterTblSimpleDesc = simpleDesc; } public DDLWork(HashSet inputs, HashSet outputs, @@ -383,18 +383,18 @@ } /** - * @return information about the table/partitionss we want to touch. + * @return information about the table/partitions we want to alter. */ - public TouchDesc getTouchDesc() { - return touchDesc; + public AlterTableSimpleDesc getAlterTblSimpleDesc() { + return alterTblSimpleDesc; } /** - * @param touchDesc - * information about the table/partitions we want to touch. + * @param desc + * information about the table/partitions we want to alter. */ - public void setTouchDesc(TouchDesc touchDesc) { - this.touchDesc = touchDesc; + public void setAlterTblSimpleDesc(AlterTableSimpleDesc desc) { + this.alterTblSimpleDesc = desc; } /** Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java (revision 0) @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * ArchiveDesc. + * + */ +public class ArchiveDesc extends DDLDesc { + +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java (revision 6843) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java (working copy) @@ -1,81 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.plan; - -import java.util.LinkedHashMap; -import java.util.Map; - -/** - * Contains information needed to touch a partition (cause pre/post hooks to - * fire). - */ -public class TouchDesc extends DDLDesc { - private String tableName; - private String dbName; - private LinkedHashMap partSpec; - - - public TouchDesc() { - } - - /** - * @param dbName - * database that contains the table / partition - * @param tableName - * table containing the partition - * @param partSpec - * partition specification. Null if touching a table. - */ - public TouchDesc(String dbName, String tableName, - Map partSpec) { - super(); - this.dbName = dbName; - this.tableName = tableName; - if(partSpec == null) { - this.partSpec = null; - } else { - this.partSpec = new LinkedHashMap(partSpec); - } - } - - public String getTableName() { - return tableName; - } - - public void setTableName(String tableName) { - this.tableName = tableName; - } - - public String getDbName() { - return dbName; - } - - public void setDbName(String dbName) { - this.dbName = dbName; - } - - public LinkedHashMap getPartSpec() { - return partSpec; - } - - public void setPartSpec(LinkedHashMap partSpec) { - this.partSpec = partSpec; - } - -} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java (revision 0) @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.LinkedHashMap; + +/** + * ArchiveWork. + * + */ +@Explain(displayName = "Map Reduce") +public class ArchiveWork implements Serializable { + private static final long serialVersionUID = 1L; + private String tableName; + private String dbName; + private LinkedHashMap partSpec; + private ArchiveActionType type; + + public static enum ArchiveActionType { + ARCHIVE, UNARCHIVE + }; + + + public ArchiveActionType getType() { + return type; + } + + public void setType(ArchiveActionType type) { + this.type = type; + } + + public ArchiveWork() { + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java (revision 0) @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; + +/** + * Contains information needed to modify a partition or a table + */ +public class AlterTableSimpleDesc extends DDLDesc { + private String tableName; + private String dbName; + private LinkedHashMap partSpec; + + AlterTableTypes type; + + public AlterTableSimpleDesc() { + } + + /** + * @param dbName + * database that contains the table / partition + * @param tableName + * table containing the partition + * @param partSpec + * partition specification. Null if touching a table. + */ + public AlterTableSimpleDesc(String dbName, String tableName, + Map partSpec, AlterTableDesc.AlterTableTypes type) { + super(); + this.dbName = dbName; + this.tableName = tableName; + if(partSpec == null) { + this.partSpec = null; + } else { + this.partSpec = new LinkedHashMap(partSpec); + } + this.type = type; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public String getDbName() { + return dbName; + } + + public void setDbName(String dbName) { + this.dbName = dbName; + } + + public AlterTableDesc.AlterTableTypes getType() { + return type; + } + + public void setType(AlterTableDesc.AlterTableTypes type) { + this.type = type; + } + + public LinkedHashMap getPartSpec() { + return partSpec; + } + + public void setPartSpec(LinkedHashMap partSpec) { + this.partSpec = partSpec; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java (revision 6843) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java (working copy) @@ -42,7 +42,7 @@ public static enum AlterTableTypes { RENAME, ADDCOLS, REPLACECOLS, ADDPROPS, ADDSERDE, ADDSERDEPROPS, ADDFILEFORMAT, ADDCLUSTERSORTCOLUMN, RENAMECOLUMN, ADDPARTITION, - TOUCH + TOUCH, ARCHIVE, UNARCHIVE, }; AlterTableTypes op; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (revision 6843) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (working copy) @@ -149,7 +149,16 @@ UNSUPPORTED_TYPE("DATE, DATETIME, and TIMESTAMP types aren't supported yet. Please use " + "STRING instead."), CREATE_NON_NATIVE_AS("CREATE TABLE AS SELECT cannot be used for a non-native table"), - LOAD_INTO_NON_NATIVE("A non-native table cannot be used as target for LOAD"); + LOAD_INTO_NON_NATIVE("A non-native table cannot be used as target for LOAD"), + OVERWRITE_ARCHIVED_PART("Cannot overwrite an archived partition. " + + "Unarchive before running this command."), + ARCHIVE_METHODS_DISABLED("Archiving methods are currently disabled. " + + "Please see the Hive wiki for more information about enabling archiving."), + ARCHIVE_ON_MULI_PARTS("ARCHIVE can only be run on a single partition"), + UNARCHIVE_ON_MULI_PARTS("ARCHIVE can only be run on a single partition"), + ARCHIVE_ON_TABLE("ARCHIVE can only be run on partitions"), + RESERVED_PART_VAL("Partition value contains a reserved substring"), + ; private String mesg; private String sqlState; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (revision 6843) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (working copy) @@ -95,6 +95,8 @@ TOK_ALTERTABLE_ADDPARTS; TOK_ALTERTABLE_DROPPARTS; TOK_ALTERTABLE_TOUCH; +TOK_ALTERTABLE_ARCHIVE; +TOK_ALTERTABLE_UNARCHIVE; TOK_ALTERTABLE_SERDEPROPERTIES; TOK_ALTERTABLE_SERIALIZER; TOK_ALTERTABLE_FILEFORMAT; @@ -281,6 +283,8 @@ | alterStatementSuffixDropPartitions | alterStatementSuffixAddPartitions | alterStatementSuffixTouch + | alterStatementSuffixArchive + | alterStatementSuffixUnArchive | alterStatementSuffixProperties | alterStatementSuffixSerdeProperties | alterStatementSuffixFileFormat @@ -335,6 +339,20 @@ -> ^(TOK_ALTERTABLE_TOUCH Identifier (partitionSpec)*) ; +alterStatementSuffixArchive +@init { msgs.push("archive statement"); } +@after { msgs.pop(); } + : Identifier KW_ARCHIVE (partitionSpec)* + -> ^(TOK_ALTERTABLE_ARCHIVE Identifier (partitionSpec)*) + ; + +alterStatementSuffixUnArchive +@init { msgs.push("unarchive statement"); } +@after { msgs.pop(); } + : Identifier KW_UNARCHIVE (partitionSpec)* + -> ^(TOK_ALTERTABLE_UNARCHIVE Identifier (partitionSpec)*) + ; + partitionLocation @init { msgs.push("partition location"); } @after { msgs.pop(); } @@ -1625,6 +1643,8 @@ KW_SEMI: 'SEMI'; KW_LATERAL: 'LATERAL'; KW_TOUCH: 'TOUCH'; +KW_ARCHIVE: 'ARCHIVE'; +KW_UNARCHIVE: 'UNARCHIVE'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (revision 6843) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (working copy) @@ -46,6 +46,8 @@ commandType.put(HiveParser.TOK_ALTERTABLE_DROPPARTS, "ALTERTABLE_DROPPARTS"); commandType.put(HiveParser.TOK_ALTERTABLE_ADDPARTS, "ALTERTABLE_ADDPARTS"); commandType.put(HiveParser.TOK_ALTERTABLE_TOUCH, "ALTERTABLE_TOUCH"); + commandType.put(HiveParser.TOK_ALTERTABLE_ARCHIVE, "ALTERTABLE_ARCHIVE"); + commandType.put(HiveParser.TOK_ALTERTABLE_UNARCHIVE, "ALTERTABLE_UNARCHIVE"); commandType.put(HiveParser.TOK_ALTERTABLE_PROPERTIES, "ALTERTABLE_PROPERTIES"); commandType.put(HiveParser.TOK_ALTERTABLE_SERIALIZER, "ALTERTABLE_SERIALIZER"); commandType.put(HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES, "ALTERTABLE_SERDEPROPERTIES"); @@ -98,6 +100,8 @@ case HiveParser.TOK_ALTERTABLE_FILEFORMAT: case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: case HiveParser.TOK_ALTERTABLE_TOUCH: + case HiveParser.TOK_ALTERTABLE_ARCHIVE: + case HiveParser.TOK_ALTERTABLE_UNARCHIVE: return new DDLSemanticAnalyzer(conf); case HiveParser.TOK_CREATEFUNCTION: case HiveParser.TOK_DROPFUNCTION: Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 6843) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -3253,6 +3253,10 @@ dest_tab = dest_part.getTable(); dest_path = dest_part.getPath()[0]; + if ("har".equalsIgnoreCase(dest_path.toUri().getScheme())) { + throw new SemanticException(ErrorMsg.OVERWRITE_ARCHIVED_PART + .getMsg()); + } queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri()); table_desc = Utilities.getTableDesc(dest_tab); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (revision 6843) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (working copy) @@ -20,11 +20,14 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Set; +import java.util.Map.Entry; import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; @@ -32,6 +35,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; @@ -40,6 +44,7 @@ import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc; +import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; import org.apache.hadoop.hive.ql.plan.DescFunctionDesc; import org.apache.hadoop.hive.ql.plan.DescTableDesc; @@ -51,7 +56,6 @@ import org.apache.hadoop.hive.ql.plan.ShowTableStatusDesc; import org.apache.hadoop.hive.ql.plan.ShowTablesDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.plan.TouchDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; @@ -64,6 +68,8 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { private static final Log LOG = LogFactory.getLog("hive.ql.parse.DDLSemanticAnalyzer"); public static final Map TokenToTypeName = new HashMap(); + + public static final Set reservedPartitionValues = new HashSet(); static { TokenToTypeName.put(HiveParser.TOK_BOOLEAN, Constants.BOOLEAN_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_TINYINT, Constants.TINYINT_TYPE_NAME); @@ -89,6 +95,12 @@ public DDLSemanticAnalyzer(HiveConf conf) throws SemanticException { super(conf); + // Partition can't have this name + reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.DEFAULTPARTITIONNAME)); + // Partition value can't end in this suffix + reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_ORIGINAL)); + reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_ARCHIVED)); + reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_EXTRACTED)); } @Override @@ -121,6 +133,10 @@ analyzeAlterTableRename(ast); } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_TOUCH) { analyzeAlterTableTouch(ast); + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ARCHIVE) { + analyzeAlterTableArchive(ast, false); + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_UNARCHIVE) { + analyzeAlterTableArchive(ast, true); } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ADDCOLS) { analyzeAlterTableModifyCols(ast, AlterTableTypes.ADDCOLS); } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_REPLACECOLS) { @@ -551,6 +567,7 @@ break; case HiveParser.TOK_PARTSPEC: if (currentPart != null) { + validatePartitionValues(currentPart); AddPartitionDesc addPartitionDesc = new AddPartitionDesc( MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, currentPart, currentLocation, ifNotExists); @@ -572,6 +589,7 @@ // add the last one if (currentPart != null) { + validatePartitionValues(currentPart); AddPartitionDesc addPartitionDesc = new AddPartitionDesc( MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, currentPart, currentLocation, ifNotExists); @@ -599,20 +617,50 @@ List> partSpecs = getPartitionSpecs(ast); if (partSpecs.size() == 0) { - TouchDesc touchDesc = new TouchDesc( - MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, null); + AlterTableSimpleDesc touchDesc = new AlterTableSimpleDesc( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, null, + AlterTableDesc.AlterTableTypes.TOUCH); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), touchDesc), conf)); } else { for (Map partSpec : partSpecs) { - TouchDesc touchDesc = new TouchDesc( - MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec); + AlterTableSimpleDesc touchDesc = new AlterTableSimpleDesc( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec, + AlterTableDesc.AlterTableTypes.TOUCH); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), touchDesc), conf)); } } } + private void analyzeAlterTableArchive(CommonTree ast, boolean isUnArchive) + throws SemanticException { + + if (!conf.getBoolVar(HiveConf.ConfVars.HIVEARCHIVEENABLED)) { + throw new SemanticException(ErrorMsg.ARCHIVE_METHODS_DISABLED.getMsg()); + + } + String tblName = unescapeIdentifier(ast.getChild(0).getText()); + // partition name to value + List> partSpecs = getPartitionSpecs(ast); + if (partSpecs.size() > 1 ) { + throw new SemanticException(isUnArchive ? + ErrorMsg.UNARCHIVE_ON_MULI_PARTS.getMsg() : + ErrorMsg.ARCHIVE_ON_MULI_PARTS.getMsg()); + } + if (partSpecs.size() == 0) { + throw new SemanticException(ErrorMsg.ARCHIVE_ON_TABLE.getMsg()); + } + + Map partSpec = partSpecs.get(0); + AlterTableSimpleDesc archiveDesc = new AlterTableSimpleDesc( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec, + (isUnArchive ? AlterTableTypes.UNARCHIVE : AlterTableTypes.ARCHIVE)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + archiveDesc), conf)); + + } + /** * Verify that the information in the metastore matches up with the data on * the fs. @@ -667,4 +715,25 @@ } return partSpecs; } + + /** + * Certain partition values are are used by hive. e.g. the default partition + * in dynamic partitioning and the intermediate partition values used in the + * archiving process. Naturally, prohibit the user from creating partitions + * with these reserved values. The check that this function is more + * restrictive than the actual limitation, but it's simpler. Should be okay + * since the reserved names are fairly long and uncommon. + */ + private void validatePartitionValues(Map partSpec) + throws SemanticException { + + for (Entry e : partSpec.entrySet()) { + for (String s : reservedPartitionValues) { + if (e.getValue().contains(s)) { + throw new SemanticException(ErrorMsg.RESERVED_PART_VAL.getMsg( + "(User value: " + e.getValue() + " Reserved substring: " + s + ")")); + } + } + } + } }