Index: shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java =================================================================== --- shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (revision 6055) +++ shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (working copy) @@ -19,6 +19,8 @@ import java.io.IOException; import java.lang.reflect.Constructor; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -37,6 +39,8 @@ import org.apache.hadoop.mapred.TaskID; import org.apache.hadoop.mapred.lib.CombineFileInputFormat; import org.apache.hadoop.mapred.lib.CombineFileSplit; +import org.apache.hadoop.tools.HadoopArchives; +import org.apache.hadoop.util.ToolRunner; /** * Implemention of shims against Hadoop 0.20.0. @@ -76,7 +80,7 @@ * */ public class MiniDFSShim implements HadoopShims.MiniDFSShim { - private MiniDFSCluster cluster; + private final MiniDFSCluster cluster; public MiniDFSShim(MiniDFSCluster cluster) { this.cluster = cluster; @@ -335,4 +339,33 @@ public void setFloatConf(Configuration conf, String varName, float val) { conf.setFloat(varName, val); } + + @Override + public int createHadoopArchive(Configuration conf, Path sourceDir, Path destDir, + String archiveName) throws Exception { + + HadoopArchives har = new HadoopArchives(conf); + List args = new ArrayList(); + + if (conf.get("hive.archive.har.parentdir.settable") == null) { + throw new RuntimeException("hive.archive.har.parentdir.settable is not set"); + } + boolean parentSettable = + conf.getBoolean("hive.archive.har.parentdir.settable", false); + + if (parentSettable) { + args.add("-archiveName"); + args.add(archiveName); + args.add("-p"); + args.add(sourceDir.toString()); + args.add(destDir.toString()); + } else { + args.add("-archiveName"); + args.add(archiveName); + args.add(sourceDir.toString()); + args.add(destDir.toString()); + } + + return ToolRunner.run(har, args.toArray(new String[0])); + } } Index: shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java =================================================================== --- shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java (revision 0) +++ shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java (revision 0) @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.shims; + +import org.apache.hadoop.fs.HarFileSystem; + +/** + * HiveHarFileSystem - fixes issue with block locations + * + */ +public class HiveHarFileSystem extends HarFileSystem { + /* + @Override + public BlockLocation[] getFileBlockLocations(FileStatus file, long start, + long len) throws IOException { + + // In some places (e.g. FileInputFormat) this BlockLocation is used to + // figure out sizes/offsets and so a completely blank one will not work. + String [] hosts = {"DUMMY_HOST"}; + return new BlockLocation[]{new BlockLocation(null, hosts, 0, file.getLen())}; + } + */ +} Index: shims/src/0.17/java/org/apache/hadoop/hive/shims/Hadoop17Shims.java =================================================================== --- shims/src/0.17/java/org/apache/hadoop/hive/shims/Hadoop17Shims.java (revision 6055) +++ shims/src/0.17/java/org/apache/hadoop/hive/shims/Hadoop17Shims.java (working copy) @@ -114,4 +114,10 @@ public void setFloatConf(Configuration conf, String varName, float val) { conf.set(varName, Float.toString(val)); } + + @Override + public int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, + String archiveName) throws Exception { + throw new RuntimeException("Not implemented in this Hadoop version"); + } } Index: shims/src/0.18/java/org/apache/hadoop/hive/shims/Hadoop18Shims.java =================================================================== --- shims/src/0.18/java/org/apache/hadoop/hive/shims/Hadoop18Shims.java (revision 6055) +++ shims/src/0.18/java/org/apache/hadoop/hive/shims/Hadoop18Shims.java (working copy) @@ -121,4 +121,10 @@ public void setFloatConf(Configuration conf, String varName, float val) { conf.set(varName, Float.toString(val)); } + + @Override + public int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, + String archiveName) throws Exception { + throw new RuntimeException("Not implemented in this Hadoop version"); + } } Index: shims/src/0.19/java/org/apache/hadoop/hive/shims/Hadoop19Shims.java =================================================================== --- shims/src/0.19/java/org/apache/hadoop/hive/shims/Hadoop19Shims.java (revision 6055) +++ shims/src/0.19/java/org/apache/hadoop/hive/shims/Hadoop19Shims.java (working copy) @@ -474,4 +474,10 @@ public void setFloatConf(Configuration conf, String varName, float val) { conf.set(varName, Float.toString(val)); } + + @Override + public int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, + String archiveName) throws Exception { + throw new RuntimeException("Not implemented in this Hadoop version"); + } } Index: shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java =================================================================== --- shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java (revision 6055) +++ shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java (working copy) @@ -97,7 +97,7 @@ * This is simply the first two components of the version number * (e.g "0.18" or "0.20") */ - private static String getMajorVersion() { + public static String getMajorVersion() { String vers = VersionInfo.getVersion(); String[] parts = vers.split("\\."); Index: shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java =================================================================== --- shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java (revision 6055) +++ shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java (working copy) @@ -119,11 +119,13 @@ /** * getTaskJobIDs returns an array of String with two elements. The first * element is a string representing the task id and the second is a string - * representing the job id. This is necessary as TaskID and TaskAttemptID - * are not supported in Haddop 0.17 + * representing the job id. This is necessary as TaskID and TaskAttemptID + * are not supported in Hadoop 0.17 */ String[] getTaskJobIDs(TaskCompletionEvent t); + int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, + String archiveName) throws Exception; /** * InputSplitShim. * Index: shims/build.xml =================================================================== --- shims/build.xml (revision 6055) +++ shims/build.xml (working copy) @@ -18,8 +18,8 @@ --> - @@ -27,6 +27,7 @@ + Index: conf/hive-default.xml =================================================================== --- conf/hive-default.xml (revision 6055) +++ conf/hive-default.xml (working copy) @@ -551,4 +551,24 @@ The default partition name in case the dynamic partition column value is null/empty string or anyother values that cannot be escaped. This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the dynamic partition value should not contain this value to avoid confusions. + + fs.har.impl + org.apache.hadoop.hive.shims.HiveHarFileSystem + The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop vers less than 0.20 + + + + hive.archive.enabled + false + Whether archiving operations are permitted + + + + hive.archive.har.parentdir.settable + false + In new Hadoop versions, the parent directory must be set while + creating a HAR. Because this functionality is hard to detect with just version + numbers, this conf var needs to be set manually. + + Index: build.properties =================================================================== --- build.properties (revision 6055) +++ build.properties (working copy) @@ -21,6 +21,7 @@ hadoop.root.default=${build.dir.hadoop}/hadoop-${hadoop.version.ant-internal} hadoop.root=${hadoop.root.default} hadoop.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-core.jar +hadoop.tools.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-tools.jar hadoop.test.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-test.jar jetty.test.jar=${hadoop.root}/lib/jetty-5.1.4.jar servlet.test.jar=${hadoop.root}/lib/servlet-api.jar Index: metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (revision 6055) +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (working copy) @@ -1075,13 +1075,23 @@ boolean success = false; Path partPath = null; Table tbl = null; + Partition part = null; + boolean isArchived = false; + Path archiveParentDir = null; + try { ms.openTransaction(); - Partition part = get_partition(db_name, tbl_name, part_vals); + part = get_partition(db_name, tbl_name, part_vals); + if (part == null) { throw new NoSuchObjectException("Partition doesn't exist. " + part_vals); } + + isArchived = MetaStoreUtils.isArchived(part); + if (isArchived) { + archiveParentDir = MetaStoreUtils.getOriginalDir(part); + } if (part.getSd() == null || part.getSd().getLocation() == null) { throw new MetaException("Partition metadata is corrupted"); } @@ -1094,9 +1104,17 @@ } finally { if (!success) { ms.rollbackTransaction(); - } else if (deleteData && (partPath != null)) { + } else if (deleteData && ((partPath != null) || (archiveParentDir != null))) { if (tbl != null && !isExternal(tbl)) { - wh.deleteDir(partPath, true); + // Archived partitions have har:/to_har_file as their location. + // The original directory was saved in params + if (isArchived) { + assert(archiveParentDir != null); + wh.deleteDir(MetaStoreUtils.getOriginalDir(part), true); + } else { + assert(partPath != null); + wh.deleteDir(partPath, true); + } // ok even if the data is not deleted } } Index: metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (revision 6055) +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (working copy) @@ -61,6 +61,17 @@ public static final String DEFAULT_DATABASE_NAME = "default"; + // Keys stored in the parameters field of a Partition + + // Whether the partition is archived + public static final String IS_ARCHIVED_KEY = "IS_ARCHIVED"; + // The location of the archive in the underlying filesystem + public static final String ARCHIVE_PATH_KEY = "ARCHIVE_PATH"; + // The original location of the partition, before archiving. After archiving, + // this directory will contain the archive. When the partition + // is dropped, this directory will be deleted + public static final String ORIGINAL_LOCATION_KEY = "ORIGINAL_LOCATION"; + /** * printStackTrace * @@ -834,6 +845,24 @@ return "TRUE".equalsIgnoreCase(params.get("EXTERNAL")); } + public static boolean isArchived(org.apache.hadoop.hive.metastore.api.Partition part) { + Map params = part.getParameters(); + if ("true".equalsIgnoreCase(params.get(MetaStoreUtils.IS_ARCHIVED_KEY))) { + return true; + } else { + return false; + } + } + + public static Path getOriginalDir(org.apache.hadoop.hive.metastore.api.Partition part) { + Map params = part.getParameters(); + assert(isArchived(part)); + String parentDir = params.get(MetaStoreUtils.ARCHIVE_PATH_KEY); + assert( parentDir != null); + + return new Path(parentDir); + } + public static boolean isNonNativeTable(Table table) { if (table == null) { return false; Index: build-common.xml =================================================================== --- build-common.xml (revision 6055) +++ build-common.xml (working copy) @@ -203,6 +203,7 @@ + Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 6055) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -236,6 +236,10 @@ HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true), + + // For har files + HIVEARCHIVEENABLED("hive.archive.enabled", false), + HIVEHARPARENTDIRSETTABLE("hive.archive.har.parentdir.settable", false), ; public final String varname; Index: ql/src/test/results/clientnegative/archive1.q.out =================================================================== --- ql/src/test/results/clientnegative/archive1.q.out (revision 0) +++ ql/src/test/results/clientnegative/archive1.q.out (revision 0) @@ -0,0 +1,33 @@ +PREHOOK: query: -- Tests trying to archive a partition twice. +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 18, 0.19) + +CREATE TABLE srcpart_archived LIKE srcpart +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Tests trying to archive a partition twice. +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 18, 0.19) + +CREATE TABLE srcpart_archived LIKE srcpart +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcpart_archived +PREHOOK: query: INSERT OVERWRITE TABLE srcpart_archived PARTITION (ds='2008-04-08', hr='12') +SELECT key, value FROM srcpart WHERE ds='2008-04-08' AND hr='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_archived@ds=2008-04-08/hr=12 +POSTHOOK: query: INSERT OVERWRITE TABLE srcpart_archived PARTITION (ds='2008-04-08', hr='12') +SELECT key, value FROM srcpart WHERE ds='2008-04-08' AND hr='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_archived@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: srcpart_archived PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_archived PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: query: ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: Lineage: srcpart_archived PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_archived PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_ARCHIVE +FAILED: Error in metadata: Specified partition is already archived +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask Index: ql/src/test/results/clientnegative/archive4.q.out =================================================================== --- ql/src/test/results/clientnegative/archive4.q.out (revision 0) +++ ql/src/test/results/clientnegative/archive4.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: ARCHIVE can only be run on a single partition Index: ql/src/test/results/clientnegative/archive3.q.out =================================================================== --- ql/src/test/results/clientnegative/archive3.q.out (revision 0) +++ ql/src/test/results/clientnegative/archive3.q.out (revision 0) @@ -0,0 +1 @@ +FAILED: Error in semantic analysis: ARCHIVE can only be run on partitions Index: ql/src/test/results/clientnegative/archive2.q.out =================================================================== --- ql/src/test/results/clientnegative/archive2.q.out (revision 0) +++ ql/src/test/results/clientnegative/archive2.q.out (revision 0) @@ -0,0 +1,7 @@ +PREHOOK: query: -- Tests trying to unarchive a non-archived partition +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 18, 0.19) + +ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_UNARCHIVE +FAILED: Error in metadata: Specified partition is not archived +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask Index: ql/src/test/results/clientpositive/archive.q.out =================================================================== --- ql/src/test/results/clientpositive/archive.q.out (revision 0) +++ ql/src/test/results/clientpositive/archive.q.out (revision 0) @@ -0,0 +1,51 @@ +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 18, 0.19) + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-29_15-15-35_519_6232657807775695630/10000 +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 18, 0.19) + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-29_15-15-35_519_6232657807775695630/10000 +48479881068 +PREHOOK: query: ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: query: ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_ARCHIVE +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-29_15-15-43_238_5198051750596284293/10000 +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-29_15-15-43_238_5198051750596284293/10000 +48479881068 +PREHOOK: query: ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_UNARCHIVE +POSTHOOK: query: ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_UNARCHIVE +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-29_15-15-49_348_6249073250680631077/10000 +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-04-29_15-15-49_348_6249073250680631077/10000 +48479881068 Index: ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (revision 6055) +++ ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (working copy) @@ -29,8 +29,10 @@ import java.net.URI; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Set; import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -80,6 +82,7 @@ private final String outDir; private final String logDir; private final TreeMap qMap; + private final Set qSkipSet; private final LinkedList srcTables; private ParseDriver pd; @@ -173,6 +176,7 @@ conf = new HiveConf(Driver.class); this.miniMr = miniMr; qMap = new TreeMap(); + qSkipSet = new HashSet(); if (miniMr) { dfs = ShimLoader.getHadoopShims().getMiniDfs(conf, 4, true, null); @@ -230,12 +234,44 @@ DataInputStream dis = new DataInputStream(bis); StringBuilder qsb = new StringBuilder(); + // Look for a hint to not run a test on some Hadoop versions + Pattern pattern = Pattern.compile("-- EXCLUDE_HADOOP_MAJOR_VERSIONS(.*)"); + + // Read the entire query + boolean excludeQuery = false; + String hadoopVer = ShimLoader.getMajorVersion(); while (dis.available() != 0) { - qsb.append(dis.readLine() + "\n"); + String line = dis.readLine(); + + // While we are reading the lines, detect whether this query wants to be + // excluded from running because the Hadoop version is incorrect + Matcher matcher = pattern.matcher(line); + if (matcher.find()) { + String group = matcher.group(); + int start = group.indexOf('('); + int end = group.indexOf(')'); + assert end > start; + // versions might be something like '0.17, 0.19' + String versions = group.substring(start+1, end); + + Set excludedVersionSet = new HashSet(); + for (String s : versions.split("\\,")) { + s = s.trim(); + excludedVersionSet.add(s); + } + if (excludedVersionSet.contains(hadoopVer)) { + excludeQuery = true; + } + } + qsb.append(line + "\n"); } qMap.put(qf.getName(), qsb.toString()); - + if(excludeQuery) { + System.out.println("Due to the Hadoop Version ("+ hadoopVer + "), " + + "adding query " + qf.getName() + " to the set of tests to skip"); + qSkipSet.add(qf.getName()); + } dis.close(); } @@ -504,6 +540,10 @@ return cliDriver.processLine(qMap.get(tname)); } + public boolean shouldBeSkipped(String tname) { + return qSkipSet.contains(tname); + } + public void convertSequenceFileToTextFile() throws Exception { // Create an instance of hive in order to create the tables testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE); @@ -745,6 +785,7 @@ public int checkCliDriverResults(String tname) throws Exception { String[] cmdArray; + assert(qMap.containsKey(tname)); cmdArray = new String[] { "diff", "-a", @@ -816,7 +857,7 @@ /** * QTRunner: Runnable class for running a a single query file. - * + * **/ public static class QTRunner implements Runnable { private final QTestUtil qt; @@ -845,7 +886,7 @@ /** * executes a set of query files either in sequence or in parallel. Uses * QTestUtil to do so - * + * * @param qfiles * array of input query files containing arbitrary number of hive * queries @@ -855,7 +896,7 @@ * @param mt * whether to run in multithreaded mode or not * @return true if all the query files were executed successfully, else false - * + * * In multithreaded mode each query file is run in a separate thread. * the caller has to arrange that different query files do not collide * (in terms of destination tables) @@ -923,7 +964,7 @@ } return (!failed); } - + public static void outputTestFailureHelpMessage() { System.err.println("See build/ql/tmp/hive.log, " + "or try \"ant test ... -Dtest.silent=false\" to get more logs."); Index: ql/src/test/queries/clientnegative/archive2.q =================================================================== --- ql/src/test/queries/clientnegative/archive2.q (revision 0) +++ ql/src/test/queries/clientnegative/archive2.q (revision 0) @@ -0,0 +1,5 @@ +set hive.archive.enabled = true; +-- Tests trying to unarchive a non-archived partition +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 18, 0.19) + +ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); Index: ql/src/test/queries/clientnegative/archive3.q =================================================================== --- ql/src/test/queries/clientnegative/archive3.q (revision 0) +++ ql/src/test/queries/clientnegative/archive3.q (revision 0) @@ -0,0 +1,5 @@ +set hive.archive.enabled = true; +-- Tests archiving a table +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 18, 0.19) + +ALTER TABLE srcpart ARCHIVE; Index: ql/src/test/queries/clientnegative/archive4.q =================================================================== --- ql/src/test/queries/clientnegative/archive4.q (revision 0) +++ ql/src/test/queries/clientnegative/archive4.q (revision 0) @@ -0,0 +1,5 @@ +set hive.archive.enabled = true; +-- Tests archiving multiple partitions +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 18, 0.19) + +ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') PARTITION (ds='2008-04-08', hr='11'); Index: ql/src/test/queries/clientnegative/archive1.q =================================================================== --- ql/src/test/queries/clientnegative/archive1.q (revision 0) +++ ql/src/test/queries/clientnegative/archive1.q (revision 0) @@ -0,0 +1,11 @@ +set hive.archive.enabled = true; +-- Tests trying to archive a partition twice. +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 18, 0.19) + +CREATE TABLE srcpart_archived LIKE srcpart; + +INSERT OVERWRITE TABLE srcpart_archived PARTITION (ds='2008-04-08', hr='12') +SELECT key, value FROM srcpart WHERE ds='2008-04-08' AND hr='12'; + +ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12'); +ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12'); Index: ql/src/test/queries/clientpositive/archive.q =================================================================== --- ql/src/test/queries/clientpositive/archive.q (revision 0) +++ ql/src/test/queries/clientpositive/archive.q (revision 0) @@ -0,0 +1,15 @@ +set hive.archive.enabled = true; +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 18, 0.19) + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2; + +ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2; + +ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2; Index: ql/src/test/templates/TestCliDriver.vm =================================================================== --- ql/src/test/templates/TestCliDriver.vm (revision 6055) +++ ql/src/test/templates/TestCliDriver.vm (working copy) @@ -84,6 +84,10 @@ qt.addFile("$qf.getCanonicalPath()"); + if (qt.shouldBeSkipped("$fname")) { + return; + } + qt.cliInit("$fname"); int ecode = qt.executeClient("$fname"); if (ecode != 0) { Index: ql/src/test/templates/TestNegativeCliDriver.vm =================================================================== --- ql/src/test/templates/TestNegativeCliDriver.vm (revision 6055) +++ ql/src/test/templates/TestNegativeCliDriver.vm (working copy) @@ -59,6 +59,11 @@ qt.addFile("$qf.getCanonicalPath()"); + if (qt.shouldBeSkipped("$fname")) { + System.out.println("Test $fname skipped"); + return; + } + qt.cliInit("$fname"); int ecode = qt.executeClient("$fname"); if (ecode == 0) { Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 6055) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy) @@ -51,7 +51,7 @@ /** * A Hive Table Partition: is a fundamental storage unit within a Table. - * + * * Please note that the ql code should always go through methods of this class to access the * metadata, instead of directly accessing org.apache.hadoop.hive.metastore.api.Partition. * This helps to isolate the metastore code and the ql code. @@ -72,7 +72,7 @@ private Class outputFormatClass; private Class inputFormatClass; private URI uri; - + /** * @return The values of the partition * @see org.apache.hadoop.hive.metastore.api.Partition#getValues() @@ -82,17 +82,17 @@ } /** - * Used only for serialization. + * Used only for serialization. */ public Partition() { } - + /** * create an empty partition. * SemanticAnalyzer code requires that an empty partition when the table is not partitioned. */ public Partition(Table tbl) throws HiveException { - org.apache.hadoop.hive.metastore.api.Partition tPart = + org.apache.hadoop.hive.metastore.api.Partition tPart = new org.apache.hadoop.hive.metastore.api.Partition(); tPart.setSd(tbl.getTTable().getSd()); // TODO: get a copy initialize(tbl, tPart); @@ -105,7 +105,7 @@ /** * Create partition object with the given info. - * + * * @param tbl * Table the partition will be in. * @param partSpec @@ -158,7 +158,7 @@ /** * Initializes this object with the given variables - * + * * @param table * Table the partition belongs to * @param tPartition @@ -265,7 +265,7 @@ clsName = tPartition.getSd().getInputFormat(); } if (clsName == null) { - clsName = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName(); + clsName = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName(); } try { inputFormatClass = ((Class) Class.forName(clsName, true, @@ -285,10 +285,10 @@ clsName = tPartition.getSd().getOutputFormat(); } if (clsName == null) { - clsName = HiveSequenceFileOutputFormat.class.getName(); + clsName = HiveSequenceFileOutputFormat.class.getName(); } try { - Class c = (Class)(Class.forName(clsName, true, + Class c = (Class.forName(clsName, true, JavaUtils.getClassLoader())); // Replace FileOutputFormat for backward compatibility if (!HiveOutputFormat.class.isAssignableFrom(c)) { @@ -312,7 +312,7 @@ /* * TODO: Keeping this code around for later use when we will support * sampling on tables which are not created with CLUSTERED INTO clause - * + * * // read from table meta data int numBuckets = this.table.getNumBuckets(); * if (numBuckets == -1) { // table meta data does not have bucket * information // check if file system has multiple buckets(files) in this @@ -344,7 +344,9 @@ @SuppressWarnings("nls") public Path getBucketPath(int bucketNum) { try { - FileSystem fs = FileSystem.get(table.getDataLocation(), Hive.get() + // Previously, this got the filesystem of the Table, which could be + // different from the filesystem of the partition. + FileSystem fs = FileSystem.get(getPartitionPath().toUri(), Hive.get() .getConf()); String pathPattern = getPartitionPath().toString(); if (getBucketCount() > 0) { @@ -445,7 +447,7 @@ public org.apache.hadoop.hive.metastore.api.Partition getTPartition() { return tPartition; } - + /** * Should be only used by serialization. */ @@ -462,5 +464,11 @@ return tPartition.getSd().getCols(); } - + public String getLocation() { + return tPartition.getSd().getLocation(); + } + + public void setLocation(String location) { + tPartition.getSd().setLocation(location); + } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 6055) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -28,6 +28,8 @@ import java.io.OutputStreamWriter; import java.io.Serializable; import java.io.Writer; +import java.net.URI; +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; @@ -43,6 +45,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; @@ -66,6 +69,7 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc; +import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc; import org.apache.hadoop.hive.ql.plan.CreateViewDesc; @@ -78,7 +82,7 @@ import org.apache.hadoop.hive.ql.plan.ShowPartitionsDesc; import org.apache.hadoop.hive.ql.plan.ShowTableStatusDesc; import org.apache.hadoop.hive.ql.plan.ShowTablesDesc; -import org.apache.hadoop.hive.ql.plan.TouchDesc; +import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; @@ -88,8 +92,9 @@ import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; - +import org.apache.hadoop.util.ToolRunner; /** * DDLTask implementation. * @@ -102,6 +107,12 @@ private static final int separator = Utilities.tabCode; private static final int terminator = Utilities.newLineCode; + // These are suffixes attached to intermediate directory names used in the + // archiving / un-archiving process. + private static final String INTERMEDIATE_ARCHIVED_DIR_SUFFIX = "-intermediate-archived"; + private static final String INTERMEDIATE_ORIGINAL_DIR_SUFFIX = "-intermediate-original"; + private static final String INTERMEDIATE_EXTRACTED_DIR_SUFFIX = "-intermediate-extracted"; + public DDLTask() { super(); } @@ -150,12 +161,17 @@ return addPartition(db, addPartitionDesc); } - TouchDesc touchDesc = work.getTouchDesc(); - if (touchDesc != null) { - return touch(db, touchDesc); + AlterTableSimpleDesc simpleDesc = work.getAlterTblSimpleDesc(); + + if(simpleDesc != null) { + if (simpleDesc.getType() == AlterTableTypes.TOUCH) { + return touch(db, simpleDesc); + } else if (simpleDesc.getType() == AlterTableTypes.ARCHIVE) { + return archive(db, simpleDesc, driverContext); + } else if (simpleDesc.getType() == AlterTableTypes.UNARCHIVE) { + return unarchive(db, simpleDesc); + } } - - MsckDesc msckDesc = work.getMsckDesc(); if (msckDesc != null) { return msck(db, msckDesc); @@ -257,7 +273,7 @@ * @return * @throws HiveException */ - private int touch(Hive db, TouchDesc touchDesc) + private int touch(Hive db, AlterTableSimpleDesc touchDesc) throws HiveException { String dbName = touchDesc.getDbName(); @@ -290,7 +306,480 @@ } return 0; } + /** + * Determines whether a partition has been archived + * + * @param p + * @return + */ + public static boolean isArchived(Partition p) { + Map params = p.getParameters(); + if ("true".equalsIgnoreCase(params.get(MetaStoreUtils.IS_ARCHIVED_KEY))) { + return true; + } else { + return false; + } + } + // Returns only the path component of the URI + private String getArchiveDirOnly(Path parentDir, String archiveName) { + URI parentUri = parentDir.toUri(); + Path harDir = new Path(parentUri.getPath(), archiveName); + return harDir.toString(); + } + + /** + * + * @param p - the partition to modify + * @param parentDir - the parent directory of the archive, which is the + * original directory that the partition's files resided in + * @param dirInArchive - the directory within the archive file that contains + * the partitions files + * @param archiveName - the name of the archive + * @throws URISyntaxException + */ + private void setArchived(Partition p, Path parentDir, String dirInArchive, String archiveName) + throws URISyntaxException { + assert(isArchived(p) == false); + Map params = p.getParameters(); + + URI parentUri = parentDir.toUri(); + String parentHost = parentUri.getHost(); + String harHost = null; + if (parentHost == null) { + harHost = ""; + } else { + harHost = parentUri.getScheme() + "-" + parentHost; + } + + // harUri is used to access the partition's files, which are in the archive + // The format of the RI is something like: + // har://underlyingfsscheme-host:port/archivepath + URI harUri = null; + if (dirInArchive.length() == 0) { + harUri = new URI("har", parentUri.getUserInfo(), harHost, parentUri.getPort(), + getArchiveDirOnly(parentDir, archiveName), + parentUri.getQuery(), parentUri.getFragment()); + } else { + harUri = new URI("har", parentUri.getUserInfo(), harHost, parentUri.getPort(), + new Path(getArchiveDirOnly(parentDir, archiveName), dirInArchive).toUri().getPath(), + parentUri.getQuery(), parentUri.getFragment()); + } + String harPath = new Path(parentDir, archiveName).toString(); + params.put(MetaStoreUtils.IS_ARCHIVED_KEY, "true"); + params.put(MetaStoreUtils.ORIGINAL_LOCATION_KEY, parentDir.toString()); + params.put(MetaStoreUtils.ARCHIVE_PATH_KEY, harPath); + p.setLocation(harUri.toString()); + } + + private void setUnArchived(Partition p) { + assert(isArchived(p) == true); + Map params = p.getParameters(); + String parentDir = params.get(MetaStoreUtils.ORIGINAL_LOCATION_KEY); + params.remove(MetaStoreUtils.IS_ARCHIVED_KEY); + params.remove(MetaStoreUtils.ORIGINAL_LOCATION_KEY); + params.remove(MetaStoreUtils.ARCHIVE_PATH_KEY); + assert(parentDir != null); + p.setLocation(parentDir); + } + + private Path getOriginalLocation(Partition p) { + assert(isArchived(p)); + Map params = p.getParameters(); + String parentDir = params.get(MetaStoreUtils.ORIGINAL_LOCATION_KEY); + if (parentDir == null) { + return null; + } + return new Path(parentDir); + } + + private Path getLocation(Partition p) { + assert(isArchived(p)); + return new Path(p.getLocation()); + } + + private Path getArchivePath(Partition p) { + assert(isArchived(p)); + Map params = p.getParameters(); + String archiveDir = params.get(MetaStoreUtils.ARCHIVE_PATH_KEY); + if (archiveDir == null) { + return null; + } + return new Path(archiveDir); + } + + private boolean pathExists(FileSystem fs, Path p) throws HiveException { + try { + return fs.exists(p); + } catch (IOException e) { + throw new HiveException(e); + } + } + + private void moveDir(FileSystem fs, Path from, Path to) throws HiveException { + try { + if (!fs.rename(from, to)) { + throw new HiveException("Moving " + from + " to " + to + " failed!"); + } + } catch (IOException e) { + throw new HiveException(e); + } + } + + private int archive(Hive db, AlterTableSimpleDesc simpleDesc, DriverContext driverContext) + throws HiveException { + String dbName = simpleDesc.getDbName(); + String tblName = simpleDesc.getTableName(); + + Table tbl = db.getTable(dbName, tblName); + validateAlterTableType(tbl, AlterTableDesc.AlterTableTypes.ARCHIVE); + + Map partSpec = simpleDesc.getPartSpec(); + Partition p = db.getPartition(tbl, partSpec, false); + + if (tbl.getTableType() != TableType.MANAGED_TABLE) { + throw new HiveException("ARCHIVE can only be performed on managed tables"); + } + + if (p == null) { + throw new HiveException("Specified partition does not exist"); + } + + if (isArchived(p)) { + throw new HiveException("Specified partition is already archived"); + } + + Path originalDir = p.getPartitionPath(); + String archiveName = tblName + ".har"; + FileSystem fs = null; + try { + fs = originalDir.getFileSystem(conf); + } catch (IOException e) { + throw new HiveException(e); + } + + Path intermediateArchivedDir = new Path(originalDir.getParent(), + originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX); + Path intermediateOriginalDir = new Path(originalDir.getParent(), + originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX); + + // The following steps seem roundabout, but they are meant to aid in + // recovery if a failure occurs, and to keep a consistent state in the FS + + // Steps: + // 1. Create the archive in a temporary folder + // 2. Move the archive dir to an intermediate dir that is in at the same + // dir as the original partition dir. Call the new dir + // intermediate-archive. + // 3. Rename the original partition dir to an intermediate dir. Call the + // renamed dir intermediate-original + // 4. Rename intermediate-archive to the original partition dir + // 5. Change the metadata + // 6. Delete the original partition files in intermediate-original + + // The original partition files are deleted after the metadata change + // because the presence of those files are used to indicate whether + // the original partition directory contains archived or unarchived files. + + // Create an archived version of the partition in a directory ending in + // ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition, + // if it does not already exist. If it does exist, we assume the dir is good + // to use as the move operation that created it is atomic. + if (!pathExists(fs, intermediateArchivedDir) && + !pathExists(fs, intermediateOriginalDir)) { + + // First create the archive in a tmp dir so that if the job fails, the + // bad files don't pollute the filesystem + Path tmpDir = new Path(driverContext.getCtx().getMRScratchDir(), "partlevel"); + + console.printInfo("Creating " + archiveName + " for " + originalDir.toString()); + console.printInfo("in " + tmpDir); + console.printInfo("Please wait... (this may take a while)"); + + // Create the Hadoop archive + HadoopShims shim = ShimLoader.getHadoopShims(); + int ret=0; + try { + ret = shim.createHadoopArchive(conf, originalDir, tmpDir, archiveName); + } catch (Exception e) { + throw new HiveException(e); + } + if (ret != 0) { + throw new HiveException("Error while creating HAR"); + } + // Move from the tmp dir to an intermediate directory, in the same level as + // the partition directory. e.g. .../hr=12-intermediate-archived + try { + console.printInfo("Moving " + tmpDir + " to " + intermediateArchivedDir); + if (fs.exists(intermediateArchivedDir)) { + throw new HiveException("The intermediate archive directory already exists."); + } + fs.rename(tmpDir, intermediateArchivedDir); + } catch (IOException e) { + throw new HiveException("Error while moving tmp directory"); + } + } else { + if (pathExists(fs, intermediateArchivedDir)) { + console.printInfo("Intermediate archive directory " + intermediateArchivedDir + + " already exists. Assuming it contains an archived version of the partition"); + } + } + + // If we get to here, we know that we've archived the partition files, but + // they may be in the original partition location, or in the intermediate + // original dir. + + // Move the original parent directory to the intermediate original directory + // if the move hasn't been made already + if (!pathExists(fs, intermediateOriginalDir)) { + console.printInfo("Moving " + originalDir + " to " + + intermediateOriginalDir); + moveDir(fs, originalDir, intermediateOriginalDir); + } else { + console.printInfo(intermediateOriginalDir + " already exists. " + + "Assuming it contains the original files in the partition"); + } + + // If there's a failure from here to when the metadata is updated, + // there will be no data in the partition, or an error while trying to read + // the partition (if the archive files have been moved to the original + // partition directory.) But re-running the archive command will allow + // recovery + + // Move the intermediate archived directory to the original parent directory + if (!pathExists(fs, originalDir)) { + console.printInfo("Moving " + intermediateArchivedDir + " to " + + originalDir); + moveDir(fs, intermediateArchivedDir, originalDir); + } else { + console.printInfo(originalDir + " already exists. " + + "Assuming it contains the archived version of the partition"); + } + + // Record this change in the metastore + try { + boolean parentSettable = + conf.getBoolVar(HiveConf.ConfVars.HIVEHARPARENTDIRSETTABLE); + + // dirInArchive is the directory within the archive that has all the files + // for this partition. With older versions of Hadoop, archiving a + // a directory would produce the same directory structure + // in the archive. So if you created myArchive.har of /tmp/myDir, the + // files in /tmp/myDir would be located under myArchive.har/tmp/myDir/* + // In this case, dirInArchive should be tmp/myDir + + // With newer versions of Hadoop, the parent directory could be specified. + // Assuming the parent directory was set to /tmp/myDir when creating the + // archive, the files can be found under myArchive.har/* + // In this case, dirInArchive should be empty + + String dirInArchive = ""; + if (!parentSettable) { + dirInArchive = originalDir.toUri().getPath(); + if(dirInArchive.length() > 1 && dirInArchive.charAt(0)=='/') { + dirInArchive = dirInArchive.substring(1); + } + } + setArchived(p, originalDir, dirInArchive, archiveName); + db.alterPartition(tblName, p); + } catch (Exception e) { + throw new HiveException("Unable to change the partition info for HAR", e); + } + + // Get rid of the original files + try { + Warehouse wh = new Warehouse(conf); + wh.deleteDir(intermediateOriginalDir, true); + } catch (MetaException e) { + throw new HiveException(e); + } + + + return 0; + } + + private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) + throws HiveException { + String dbName = simpleDesc.getDbName(); + String tblName = simpleDesc.getTableName(); + + Table tbl = db.getTable(dbName, tblName); + validateAlterTableType(tbl, AlterTableDesc.AlterTableTypes.UNARCHIVE); + + // Means user specified a table, not a partition + if (simpleDesc.getPartSpec() == null) { + throw new HiveException("ARCHIVE is for partitions only"); + } + + Map partSpec = simpleDesc.getPartSpec(); + Partition p = db.getPartition(tbl, partSpec, false); + + if (tbl.getTableType() != TableType.MANAGED_TABLE) { + throw new HiveException("UNARCHIVE can only be performed on managed tables"); + } + + if (p == null) { + throw new HiveException("Specified partition does not exist"); + } + + if (!isArchived(p)) { + throw new HiveException("Specified partition is not archived"); + } + + + Path originalLocation = getOriginalLocation(p); + Path sourceDir = getLocation(p); + + // Some sanity checks + if (originalLocation == null) { + throw new HiveException("Missing archive data in the partition"); + } + if (!"har".equals(sourceDir.toUri().getScheme())) { + throw new HiveException("Location should refer to a HAR"); + } + + // Clarification of terms: + // - The originalLocation directory represents the original directory of the + // partition's files. They now contain an archived version of those files + // eg. hdfs:/warehouse/myTable/ds=1/ + // - The source directory is the directory containing all the files that + // should be in the partition. e.g. har:/warehouse/myTable/ds=1/myTable.har/ + // Note the har:/ scheme + // - The archive directory is the archive on the underlying fs. + // e.g. hdfs:/warehouse/myTable/ds=1/myTable.hr + // Because it doesn't have the 'har' scheme, the archive directory can't + // be used to get the files in the archive. + + // Steps: + // 1. Extract the archive in a temporary folder + // 2. Move the archive dir to an intermediate dir that is in at the same + // dir as originalLocation. Call the new dir intermediate-extracted. + // 3. Rename the original partition dir to an intermediate dir. Call the + // renamed dir intermediate-archive + // 4. Rename intermediate-extracted to the original partition dir + // 5. Change the metadata + // 6. Delete the archived partition files in intermediate-archive + + Path tmpDir = new Path(driverContext.getCtx().getMRScratchDir()); + + Path intermediateArchiveDir = new Path(originalLocation.getParent(), + originalLocation.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX); + Path intermediateExtractedDir = new Path(originalLocation.getParent(), + originalLocation.getName() + INTERMEDIATE_EXTRACTED_DIR_SUFFIX); + + FileSystem fs = null; + try { + fs = tmpDir.getFileSystem(conf); + // Verify that there are no files in the tmp dir, because if there are, it + // would be copied to the partition + FileStatus [] filesInTmpDir = fs.listStatus(tmpDir); + if (filesInTmpDir.length != 0) { + for (FileStatus file : filesInTmpDir) { + console.printInfo(file.getPath().toString()); + } + throw new HiveException("Temporary directory " + tmpDir + " is not empty"); + } + + } catch (IOException e) { + throw new HiveException(e); + } + + if (!pathExists(fs, intermediateExtractedDir) && + !pathExists(fs, intermediateArchiveDir)) { + try { + + // Copy the files out of the archive into the temporary directory + String copySource = (new Path(sourceDir, "*")).toString(); + String copyDest = tmpDir.toString(); + List args = new ArrayList(); + args.add("-cp"); + args.add(copySource); + args.add(copyDest); + + console.printInfo("Copying " + copySource + " to " + copyDest); + FsShell fss = new FsShell(conf); + int ret = 0; + try { + ret = ToolRunner.run(fss, args.toArray(new String[0])); + } catch (Exception e) { + throw new HiveException(e); + } + if (ret != 0) { + throw new HiveException("Error while copying files from archive"); + } + + console.printInfo("Moving " + tmpDir + " to " + intermediateExtractedDir); + if (fs.exists(intermediateExtractedDir)) { + throw new HiveException("Invalid state: the intermediate extracted " + + "directory already exists."); + } + fs.rename(tmpDir, intermediateExtractedDir); + } catch (Exception e) { + throw new HiveException(e); + } + } + + // At this point, we know that the extracted files are in the intermediate + // extracted dir, or in the the original directory. + + if (!pathExists(fs, intermediateArchiveDir)) { + try { + console.printInfo("Moving " + originalLocation + " to " + intermediateArchiveDir); + if (fs.exists(intermediateArchiveDir)) { + throw new HiveException("Invalid state: the intermediate archive " + + "directory already exists."); + } + fs.rename(originalLocation, intermediateArchiveDir); + } catch (IOException e) { + throw new HiveException(e); + } + } else { + console.printInfo(intermediateArchiveDir + " already exists. " + + "Assuming it contains the archived version of the partition"); + } + + // If there is a failure from here to until when the metadata is changed, + // the partition will be empty or throw errors on read. + + // If the original location exists here, then it must be the extracted files + // because in the previous step, we moved the previous original location + // (containing the archived version of the files) to intermediateArchiveDir + if (!pathExists(fs, originalLocation)) { + try { + console.printInfo("Moving " + intermediateExtractedDir + " to " + originalLocation); + if (fs.exists(originalLocation)) { + throw new HiveException("Invalid state: the original partition " + + "directory already exists."); + } + fs.rename(intermediateExtractedDir, originalLocation); + } catch (IOException e) { + throw new HiveException(e); + } + } else { + console.printInfo(originalLocation + " already exists. " + + "Assuming it contains the extracted files in the partition"); + } + + setUnArchived(p); + try { + db.alterPartition(tblName, p); + } catch (InvalidOperationException e) { + throw new HiveException(e); + } + // If a failure happens here, the intermediate archive files won't be + // deleted, even on subsequent unarchive calls + try { + Warehouse wh = new Warehouse(conf); + // Get rid of the original files + wh.deleteDir(intermediateArchiveDir, true); + } catch (MetaException e) { + throw new HiveException(e); + } + + + return 0; + } + private void validateAlterTableType( Table tbl, AlterTableDesc.AlterTableTypes alterType) throws HiveException { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (revision 6055) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (working copy) @@ -42,7 +42,7 @@ private ShowPartitionsDesc showPartsDesc; private DescTableDesc descTblDesc; private AddPartitionDesc addPartitionDesc; - private TouchDesc touchDesc; + private AlterTableSimpleDesc alterTblSimpleDesc; private MsckDesc msckDesc; private ShowTableStatusDesc showTblStatusDesc; @@ -183,10 +183,10 @@ * information about the table/partitions that we want to touch */ public DDLWork(HashSet inputs, HashSet outputs, - TouchDesc touchDesc) { + AlterTableSimpleDesc simpleDesc) { this(inputs, outputs); - this.touchDesc = touchDesc; + this.alterTblSimpleDesc = simpleDesc; } public DDLWork(HashSet inputs, HashSet outputs, @@ -383,18 +383,18 @@ } /** - * @return information about the table/partitionss we want to touch. + * @return information about the table/partitions we want to alter. */ - public TouchDesc getTouchDesc() { - return touchDesc; + public AlterTableSimpleDesc getAlterTblSimpleDesc() { + return alterTblSimpleDesc; } /** - * @param touchDesc - * information about the table/partitions we want to touch. + * @param desc + * information about the table/partitions we want to alter. */ - public void setTouchDesc(TouchDesc touchDesc) { - this.touchDesc = touchDesc; + public void setAlterTblSimpleDesc(AlterTableSimpleDesc desc) { + this.alterTblSimpleDesc = desc; } /** Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java (revision 0) @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * ArchiveDesc. + * + */ +public class ArchiveDesc extends DDLDesc { + +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java (revision 6055) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java (working copy) @@ -1,81 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.plan; - -import java.util.LinkedHashMap; -import java.util.Map; - -/** - * Contains information needed to touch a partition (cause pre/post hooks to - * fire). - */ -public class TouchDesc extends DDLDesc { - private String tableName; - private String dbName; - private LinkedHashMap partSpec; - - - public TouchDesc() { - } - - /** - * @param dbName - * database that contains the table / partition - * @param tableName - * table containing the partition - * @param partSpec - * partition specification. Null if touching a table. - */ - public TouchDesc(String dbName, String tableName, - Map partSpec) { - super(); - this.dbName = dbName; - this.tableName = tableName; - if(partSpec == null) { - this.partSpec = null; - } else { - this.partSpec = new LinkedHashMap(partSpec); - } - } - - public String getTableName() { - return tableName; - } - - public void setTableName(String tableName) { - this.tableName = tableName; - } - - public String getDbName() { - return dbName; - } - - public void setDbName(String dbName) { - this.dbName = dbName; - } - - public LinkedHashMap getPartSpec() { - return partSpec; - } - - public void setPartSpec(LinkedHashMap partSpec) { - this.partSpec = partSpec; - } - -} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java (revision 0) @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.LinkedHashMap; + +/** + * ArchiveWork. + * + */ +@Explain(displayName = "Map Reduce") +public class ArchiveWork implements Serializable { + private static final long serialVersionUID = 1L; + private String tableName; + private String dbName; + private LinkedHashMap partSpec; + private ArchiveActionType type; + + public static enum ArchiveActionType { + ARCHIVE, UNARCHIVE + }; + + + public ArchiveActionType getType() { + return type; + } + + public void setType(ArchiveActionType type) { + this.type = type; + } + + public ArchiveWork() { + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java (revision 0) @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; + +/** + * Contains information needed to modify a partition or a table + */ +public class AlterTableSimpleDesc extends DDLDesc { + private String tableName; + private String dbName; + private LinkedHashMap partSpec; + + AlterTableTypes type; + + public AlterTableSimpleDesc() { + } + + /** + * @param dbName + * database that contains the table / partition + * @param tableName + * table containing the partition + * @param partSpec + * partition specification. Null if touching a table. + */ + public AlterTableSimpleDesc(String dbName, String tableName, + Map partSpec, AlterTableDesc.AlterTableTypes type) { + super(); + this.dbName = dbName; + this.tableName = tableName; + if(partSpec == null) { + this.partSpec = null; + } else { + this.partSpec = new LinkedHashMap(partSpec); + } + this.type = type; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public String getDbName() { + return dbName; + } + + public void setDbName(String dbName) { + this.dbName = dbName; + } + + public AlterTableDesc.AlterTableTypes getType() { + return type; + } + + public void setType(AlterTableDesc.AlterTableTypes type) { + this.type = type; + } + + public LinkedHashMap getPartSpec() { + return partSpec; + } + + public void setPartSpec(LinkedHashMap partSpec) { + this.partSpec = partSpec; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java (revision 6055) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java (working copy) @@ -42,7 +42,7 @@ public static enum AlterTableTypes { RENAME, ADDCOLS, REPLACECOLS, ADDPROPS, ADDSERDE, ADDSERDEPROPS, ADDFILEFORMAT, ADDCLUSTERSORTCOLUMN, RENAMECOLUMN, ADDPARTITION, - TOUCH + TOUCH, ARCHIVE, UNARCHIVE, }; AlterTableTypes op; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (revision 6055) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (working copy) @@ -149,7 +149,15 @@ UNSUPPORTED_TYPE("DATE, DATETIME, and TIMESTAMP types aren't supported yet. Please use " + "STRING instead."), CREATE_NON_NATIVE_AS("CREATE TABLE AS SELECT cannot be used for a non-native table"), - LOAD_INTO_NON_NATIVE("A non-native table cannot be used as target for LOAD"); + LOAD_INTO_NON_NATIVE("A non-native table cannot be used as target for LOAD"), + OVERWRITE_ARCHIVED_PART("Cannot overwrite an archived partition. " + + "Unarchive before running this command."), + ARCHIVE_METHODS_DISABLED("Archiving methods are currently disabled. " + + "Please see the Hive wiki for more information about enabling archiving."), + ARCHIVE_ON_MULI_PARTS("ARCHIVE can only be run on a single partition"), + UNARCHIVE_ON_MULI_PARTS("ARCHIVE can only be run on a single partition"), + ARCHIVE_ON_TABLE("ARCHIVE can only be run on partitions"), + ; private String mesg; private String sqlState; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (revision 6055) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (working copy) @@ -95,6 +95,8 @@ TOK_ALTERTABLE_ADDPARTS; TOK_ALTERTABLE_DROPPARTS; TOK_ALTERTABLE_TOUCH; +TOK_ALTERTABLE_ARCHIVE; +TOK_ALTERTABLE_UNARCHIVE; TOK_ALTERTABLE_SERDEPROPERTIES; TOK_ALTERTABLE_SERIALIZER; TOK_ALTERTABLE_FILEFORMAT; @@ -281,6 +283,8 @@ | alterStatementSuffixDropPartitions | alterStatementSuffixAddPartitions | alterStatementSuffixTouch + | alterStatementSuffixArchive + | alterStatementSuffixUnArchive | alterStatementSuffixProperties | alterStatementSuffixSerdeProperties | alterStatementSuffixFileFormat @@ -335,6 +339,20 @@ -> ^(TOK_ALTERTABLE_TOUCH Identifier (partitionSpec)*) ; +alterStatementSuffixArchive +@init { msgs.push("archive statement"); } +@after { msgs.pop(); } + : Identifier KW_ARCHIVE (partitionSpec)* + -> ^(TOK_ALTERTABLE_ARCHIVE Identifier (partitionSpec)*) + ; + +alterStatementSuffixUnArchive +@init { msgs.push("unarchive statement"); } +@after { msgs.pop(); } + : Identifier KW_UNARCHIVE (partitionSpec)* + -> ^(TOK_ALTERTABLE_UNARCHIVE Identifier (partitionSpec)*) + ; + partitionLocation @init { msgs.push("partition location"); } @after { msgs.pop(); } @@ -1616,6 +1634,8 @@ KW_SEMI: 'SEMI'; KW_LATERAL: 'LATERAL'; KW_TOUCH: 'TOUCH'; +KW_ARCHIVE: 'ARCHIVE'; +KW_UNARCHIVE: 'UNARCHIVE'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (revision 6055) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (working copy) @@ -46,6 +46,8 @@ commandType.put(HiveParser.TOK_ALTERTABLE_DROPPARTS, "ALTERTABLE_DROPPARTS"); commandType.put(HiveParser.TOK_ALTERTABLE_ADDPARTS, "ALTERTABLE_ADDPARTS"); commandType.put(HiveParser.TOK_ALTERTABLE_TOUCH, "ALTERTABLE_TOUCH"); + commandType.put(HiveParser.TOK_ALTERTABLE_ARCHIVE, "ALTERTABLE_ARCHIVE"); + commandType.put(HiveParser.TOK_ALTERTABLE_UNARCHIVE, "ALTERTABLE_UNARCHIVE"); commandType.put(HiveParser.TOK_ALTERTABLE_PROPERTIES, "ALTERTABLE_PROPERTIES"); commandType.put(HiveParser.TOK_ALTERTABLE_SERIALIZER, "ALTERTABLE_SERIALIZER"); commandType.put(HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES, "ALTERTABLE_SERDEPROPERTIES"); @@ -98,6 +100,8 @@ case HiveParser.TOK_ALTERTABLE_FILEFORMAT: case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: case HiveParser.TOK_ALTERTABLE_TOUCH: + case HiveParser.TOK_ALTERTABLE_ARCHIVE: + case HiveParser.TOK_ALTERTABLE_UNARCHIVE: return new DDLSemanticAnalyzer(conf); case HiveParser.TOK_CREATEFUNCTION: case HiveParser.TOK_DROPFUNCTION: Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 6055) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -31,9 +31,9 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; +import java.util.Map.Entry; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -88,7 +88,6 @@ import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; import org.apache.hadoop.hive.ql.optimizer.GenMROperator; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; -import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; @@ -98,6 +97,7 @@ import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory; import org.apache.hadoop.hive.ql.optimizer.Optimizer; +import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -117,7 +117,6 @@ import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; -import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; @@ -138,11 +137,12 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; @@ -150,9 +150,9 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -3253,6 +3253,10 @@ dest_tab = dest_part.getTable(); dest_path = dest_part.getPath()[0]; + if ("har".equalsIgnoreCase(dest_path.toUri().getScheme())) { + throw new SemanticException(ErrorMsg.OVERWRITE_ARCHIVED_PART + .getMsg()); + } queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri()); table_desc = Utilities.getTableDesc(dest_tab); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (revision 6055) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (working copy) @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc; +import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; import org.apache.hadoop.hive.ql.plan.DescFunctionDesc; import org.apache.hadoop.hive.ql.plan.DescTableDesc; @@ -51,7 +52,6 @@ import org.apache.hadoop.hive.ql.plan.ShowTableStatusDesc; import org.apache.hadoop.hive.ql.plan.ShowTablesDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.plan.TouchDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; @@ -121,6 +121,10 @@ analyzeAlterTableRename(ast); } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_TOUCH) { analyzeAlterTableTouch(ast); + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ARCHIVE) { + analyzeAlterTableArchive(ast, false); + } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_UNARCHIVE) { + analyzeAlterTableArchive(ast, true); } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ADDCOLS) { analyzeAlterTableModifyCols(ast, AlterTableTypes.ADDCOLS); } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_REPLACECOLS) { @@ -599,20 +603,50 @@ List> partSpecs = getPartitionSpecs(ast); if (partSpecs.size() == 0) { - TouchDesc touchDesc = new TouchDesc( - MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, null); + AlterTableSimpleDesc touchDesc = new AlterTableSimpleDesc( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, null, + AlterTableDesc.AlterTableTypes.TOUCH); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), touchDesc), conf)); } else { for (Map partSpec : partSpecs) { - TouchDesc touchDesc = new TouchDesc( - MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec); + AlterTableSimpleDesc touchDesc = new AlterTableSimpleDesc( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec, + AlterTableDesc.AlterTableTypes.TOUCH); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), touchDesc), conf)); } } } + private void analyzeAlterTableArchive(CommonTree ast, boolean isUnArchive) + throws SemanticException { + + if (!conf.getBoolVar(HiveConf.ConfVars.HIVEARCHIVEENABLED)) { + throw new SemanticException(ErrorMsg.ARCHIVE_METHODS_DISABLED.getMsg()); + + } + String tblName = unescapeIdentifier(ast.getChild(0).getText()); + // partition name to value + List> partSpecs = getPartitionSpecs(ast); + if (partSpecs.size() > 1 ) { + throw new SemanticException(isUnArchive ? + ErrorMsg.UNARCHIVE_ON_MULI_PARTS.getMsg() : + ErrorMsg.ARCHIVE_ON_MULI_PARTS.getMsg()); + } + if (partSpecs.size() == 0) { + throw new SemanticException(ErrorMsg.ARCHIVE_ON_TABLE.getMsg()); + } + + Map partSpec = partSpecs.get(0); + AlterTableSimpleDesc archiveDesc = new AlterTableSimpleDesc( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec, + (isUnArchive ? AlterTableTypes.UNARCHIVE : AlterTableTypes.ARCHIVE)); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), + archiveDesc), conf)); + + } + /** * Verify that the information in the metastore matches up with the data on * the fs.