diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index bb105fe6ce..1c4c0d8661 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -43,13 +43,17 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.TransactionalValidationListener; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.io.orc.OrcFile; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater; +import org.apache.hadoop.hive.ql.io.orc.Reader; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hive.common.util.Ref; +import org.apache.orc.FileFormatException; import org.apache.orc.impl.OrcAcidUtils; import org.codehaus.jackson.map.ObjectMapper; import org.slf4j.Logger; @@ -124,7 +128,7 @@ public boolean accept(Path path) { public static final Pattern LEGACY_BUCKET_DIGIT_PATTERN = Pattern.compile("^[0-9]{6}"); /** * A write into a non-aicd table produces files like 0000_0 or 0000_0_copy_1 - * (Unless via Load Data statment) + * (Unless via Load Data statement) */ public static final PathFilter originalBucketFilter = new PathFilter() { @Override @@ -1520,7 +1524,8 @@ public static void createMetaFile(Path baseOrDeltaDir, FileSystem fs, boolean is throw ioe; } } - public static boolean isRawFormat(Path baseOrDeltaDir, FileSystem fs) throws IOException { + //should be useful for import/export + public static boolean isImport(Path baseOrDeltaDir, FileSystem fs) throws IOException { Path formatFile = new Path(baseOrDeltaDir, METADATA_FILE); if(!fs.exists(formatFile)) { return false; @@ -1549,5 +1554,48 @@ public static boolean isRawFormat(Path baseOrDeltaDir, FileSystem fs) throws IOE throw e; } } + + /** + * Chooses 1 representantive file from {@code baseOrDeltaDir} + * This assumes that all files in the dir are of the same type: either written by an acid + * write or Load Data. This should always be the case for an Acid table. + */ + private static Path chooseFile(Path baseOrDeltaDir, FileSystem fs) throws IOException { + if(!(baseOrDeltaDir.getName().startsWith(BASE_PREFIX) || + baseOrDeltaDir.getName().startsWith(DELTA_PREFIX))) { + throw new IllegalArgumentException(baseOrDeltaDir + " is not a base/delta"); + } + FileStatus[] dataFiles = fs.listStatus(new Path[] {baseOrDeltaDir}, originalBucketFilter); + return dataFiles != null && dataFiles.length > 0 ? dataFiles[0].getPath() : null; + } + + /** + * Checks if the files in base/delta dir are a result of Load Data statement and thus do not + * have ROW_IDs embedded in the data. + * @param baseOrDeltaDir base or delta file. + */ + public static boolean isRawFormat(Path baseOrDeltaDir, FileSystem fs) throws IOException { + Path dataFile = chooseFile(baseOrDeltaDir, fs); + if (dataFile == null) { + //directory is empty or doesn't have any that could have been produced by load data + return false; + } + try { + Reader reader = OrcFile.createReader(dataFile, OrcFile.readerOptions(fs.getConf())); + /* + acid file would have schema like > so could + check it this way once/if OrcRecordUpdater.ACID_KEY_INDEX_NAME is removed + TypeDescription schema = reader.getSchema(); + List columns = schema.getFieldNames(); + */ + return OrcInputFormat.isOriginal(reader); + } catch (FileFormatException ex) { + //We may be parsing a delta for Insert-only table which may not even be an ORC file so + //cannot have ROW_IDs in it. + LOG.debug("isRawFormat() called on " + dataFile + " which is not an ORC file: " + + ex.getMessage()); + return true; + } + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 022ba04fbe..897af3eda5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1921,7 +1921,6 @@ private Path fixFullAcidPathForLoadData(LoadFileType loadFileType, Path destPath if(!FileUtils.mkdir(fs, destPath, conf)) { LOG.warn(destPath + " already exists?!?!"); } - AcidUtils.MetaDataFile.createMetaFile(destPath, fs, true); } catch (IOException e) { throw new HiveException("load: error while creating " + destPath + ";loadFileType=" + loadFileType, e); } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java index 76618ffca9..0c309c0b06 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java @@ -105,9 +105,14 @@ public void testCreateFilenameLargeIds() throws Exception { @Test public void testParsing() throws Exception { + Configuration conf = new Configuration(); + MockFileSystem fs = new MockFileSystem(conf, + //new MockFile("mock:/tmp/base_000123/bucket_00001", 500, new byte[0]), + new MockFile("mock:/tmp/delta_000005_000006/bucket_00001", 500, new byte[0]), + new MockFile("mock:/tmp/delete_delta_000005_000006/bucket_00001", 500, + new byte[0])); assertEquals(123, AcidUtils.parseBase(new Path("/tmp/base_000123"))); Path dir = new Path("/tmp/tbl"); - Configuration conf = new Configuration(); AcidOutputFormat.Options opts = AcidUtils.parseBaseOrDeltaBucketFilename(new Path(dir, "base_567/bucket_123"), conf); @@ -116,15 +121,15 @@ public void testParsing() throws Exception { assertEquals(567, opts.getMaximumTransactionId()); assertEquals(0, opts.getMinimumTransactionId()); assertEquals(123, opts.getBucketId()); - opts = AcidUtils.parseBaseOrDeltaBucketFilename(new Path(dir, "delta_000005_000006/bucket_00001"), - conf); + opts = AcidUtils.parseBaseOrDeltaBucketFilename( + new MockPath(fs, dir + "/delta_000005_000006/bucket_00001"), conf); assertEquals(false, opts.getOldStyle()); assertEquals(false, opts.isWritingBase()); assertEquals(6, opts.getMaximumTransactionId()); assertEquals(5, opts.getMinimumTransactionId()); assertEquals(1, opts.getBucketId()); - opts = AcidUtils.parseBaseOrDeltaBucketFilename(new Path(dir, "delete_delta_000005_000006/bucket_00001"), - conf); + opts = AcidUtils.parseBaseOrDeltaBucketFilename( + new MockPath(fs, dir + "/delete_delta_000005_000006/bucket_00001"), conf); assertEquals(false, opts.getOldStyle()); assertEquals(false, opts.isWritingBase()); assertEquals(6, opts.getMaximumTransactionId());