diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java index 2655e2b..7fcbad4 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java @@ -76,6 +76,8 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import com.google.common.annotations.VisibleForTesting; + /** * Export the specified snapshot to a given FileSystem. * @@ -96,6 +98,8 @@ public class ExportSnapshot extends Configured implements Tool { private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group"; private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode"; private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify"; + private static final String CONF_CHECKSUM_POST_EXPORT_VERIFY = + "snapshot.export.checksum.post.export.verify"; private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root"; private static final String CONF_INPUT_ROOT = "snapshot.export.input.root"; private static final String CONF_BUFFER_SIZE = "snapshot.export.buffer.size"; @@ -123,6 +127,7 @@ public class ExportSnapshot extends Configured implements Tool { private Random random; private boolean verifyChecksum; + private boolean postExportVerifyChecksum; private String filesGroup; private String filesUser; private short filesMode; @@ -140,6 +145,7 @@ public class ExportSnapshot extends Configured implements Tool { public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true); + postExportVerifyChecksum = conf.getBoolean(CONF_CHECKSUM_POST_EXPORT_VERIFY, false); filesGroup = conf.get(CONF_FILES_GROUP); filesUser = conf.get(CONF_FILES_USER); @@ -251,7 +257,7 @@ public class ExportSnapshot extends Configured implements Tool { // Verify if the output file exists and is the same that we want to copy if (outputFs.exists(outputPath)) { FileStatus outputStat = outputFs.getFileStatus(outputPath); - if (outputStat != null && sameFile(inputStat, outputStat)) { + if (outputStat != null && sameFile(inputStat, outputStat, verifyChecksum)) { LOG.info("Skip copy " + inputStat.getPath() + " to " + outputPath + ", same file."); context.getCounter(Counter.FILES_SKIPPED).increment(1); context.getCounter(Counter.BYTES_SKIPPED).increment(inputStat.getLen()); @@ -277,6 +283,33 @@ public class ExportSnapshot extends Configured implements Tool { out.close(); } + Path inputPath = inputStat.getPath(); + try { + // Verify that the written size matches + FileStatus outputStat = outputFs.getFileStatus(outputPath); + if (!sameFilesize(inputStat, outputStat)) { + String msg = + "number of bytes copied not matching copied=" + outputStat.getLen() + " expected=" + + inputStat.getLen() + " for file=" + inputPath; + LOG.error(msg); + throw new IOException(msg); + } + + // Verify if checksums match post export, if enabled + if (!sameChecksum(inputPath, outputPath, postExportVerifyChecksum, inputFs, outputFs)) { + String msg = + "checksums don't match for input:" + inputPath + " and output:" + outputPath; + LOG.error(msg); + throw new IOException(msg); + } + } catch (IOException e) { + LOG.error("Error copying " + inputPath + " to " + outputPath, e); + context.getCounter(Counter.COPY_FAILED).increment(1); + throw e; + } + LOG.info("copy completed for input=" + inputPath + " output=" + outputPath); + context.getCounter(Counter.FILES_COPIED).increment(1); + // Try to Preserve attributes if (!preserveAttributes(outputPath, inputStat)) { LOG.warn("You may have to run manually chown on: " + outputPath); @@ -345,7 +378,6 @@ public class ExportSnapshot extends Configured implements Tool { final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) + " (%.1f%%)"; - try { byte[] buffer = new byte[bufferSize]; long totalBytesWritten = 0; int reportBytes = 0; @@ -374,24 +406,10 @@ public class ExportSnapshot extends Configured implements Tool { (totalBytesWritten/(float)inputFileSize) * 100.0f) + " from " + inputPath + " to " + outputPath); - // Verify that the written size match - if (totalBytesWritten != inputFileSize) { - String msg = "number of bytes copied not matching copied=" + totalBytesWritten + - " expected=" + inputFileSize + " for file=" + inputPath; - throw new IOException(msg); - } - - LOG.info("copy completed for input=" + inputPath + " output=" + outputPath); LOG.info("size=" + totalBytesWritten + " (" + StringUtils.humanReadableInt(totalBytesWritten) + ")" + " time=" + StringUtils.formatTimeDiff(etime, stime) + String.format(" %.3fM/sec", (totalBytesWritten / ((etime - stime)/1000.0))/1048576.0)); - context.getCounter(Counter.FILES_COPIED).increment(1); - } catch (IOException e) { - LOG.error("Error copying " + inputPath + " to " + outputPath, e); - context.getCounter(Counter.COPY_FAILED).increment(1); - throw e; - } } /** @@ -452,34 +470,52 @@ public class ExportSnapshot extends Configured implements Tool { } } - private FileChecksum getFileChecksum(final FileSystem fs, final Path path) { - try { - return fs.getFileChecksum(path); - } catch (IOException e) { - LOG.warn("Unable to get checksum for file=" + path, e); - return null; - } - } - /** * Check if the two files are equal by looking at the file length, * and at the checksum (if user has specified the verifyChecksum flag). */ - private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) { - // Not matching length - if (inputStat.getLen() != outputStat.getLen()) return false; + private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat, + final boolean verifyChecksum) { + // check length and checksum + return sameFilesize(inputStat, outputStat) + && sameChecksum(inputStat.getPath(), outputStat.getPath(), verifyChecksum, inputFs, + outputFs); + } + + /** + * Check if the two files have same length + */ + private boolean sameFilesize(final FileStatus inputStat, final FileStatus outputStat) { + return inputStat != null && outputStat != null && inputStat.getLen() == outputStat.getLen(); + } + } - // Mark files as equals, since user asked for no checksum verification - if (!verifyChecksum) return true; + /** + * Check if the two files with paths have same checksum, (if user has specified the verifyChecksum + * flag). + */ + @VisibleForTesting + public static boolean sameChecksum(final Path inputPath, final Path outputPath, + boolean verifyChecksum, FileSystem inputFs, FileSystem outputFs) { + // Mark files as same checkSum, since user asked for no checksum verification + if (!verifyChecksum) return true; - // If checksums are not available, files are not the same. - FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath()); - if (inChecksum == null) return false; + // If checksums are not available, files are not the same. + FileChecksum inChecksum = getFileChecksum(inputFs, inputPath); + if (inChecksum == null) return false; - FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath()); - if (outChecksum == null) return false; + FileChecksum outChecksum = getFileChecksum(outputFs, outputPath); + if (outChecksum == null) return false; - return inChecksum.equals(outChecksum); + return inChecksum.equals(outChecksum); + } + + private static FileChecksum getFileChecksum(final FileSystem fs, final Path path) { + try { + return fs.getFileChecksum(path); + } catch (IOException e) { + LOG.warn("Unable to get checksum for file=" + path, e); + return null; } } @@ -742,7 +778,7 @@ public class ExportSnapshot extends Configured implements Tool { private void runCopyJob(final Path inputRoot, final Path outputRoot, final String snapshotName, final Path snapshotDir, final boolean verifyChecksum, final String filesUser, final String filesGroup, final int filesMode, - final int mappers, final int bandwidthMB) + final int mappers, final int bandwidthMB, final boolean postExportVerifyChecksum) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup); @@ -753,6 +789,7 @@ public class ExportSnapshot extends Configured implements Tool { } conf.setInt(CONF_FILES_MODE, filesMode); conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum); + conf.setBoolean(CONF_CHECKSUM_POST_EXPORT_VERIFY, postExportVerifyChecksum); conf.set(CONF_OUTPUT_ROOT, outputRoot.toString()); conf.set(CONF_INPUT_ROOT, inputRoot.toString()); conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB); @@ -799,6 +836,7 @@ public class ExportSnapshot extends Configured implements Tool { public int run(String[] args) throws IOException { boolean verifyTarget = true; boolean verifyChecksum = true; + boolean postExportVerifyChecksum = false; String snapshotName = null; String targetName = null; boolean overwrite = false; @@ -826,6 +864,8 @@ public class ExportSnapshot extends Configured implements Tool { FSUtils.setRootDir(conf, inputRoot); } else if (cmd.equals("-no-checksum-verify")) { verifyChecksum = false; + } else if (cmd.equals("-post-export-checksum-verify")) { + postExportVerifyChecksum = true; } else if (cmd.equals("-no-target-verify")) { verifyTarget = false; } else if (cmd.equals("-mappers")) { @@ -863,6 +903,10 @@ public class ExportSnapshot extends Configured implements Tool { targetName = snapshotName; } + if (postExportVerifyChecksum) { + LOG.warn("checksums can differ between different filesystems/configs, so use" + + " -post-export-checksum-verify cautiously"); + } conf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true); FileSystem inputFs = FileSystem.get(inputRoot.toUri(), conf); LOG.debug("inputFs=" + inputFs.getUri().toString() + " inputRoot=" + inputRoot); @@ -934,7 +978,7 @@ public class ExportSnapshot extends Configured implements Tool { // by the HFileArchiver, since they have no references. try { runCopyJob(inputRoot, outputRoot, snapshotName, snapshotDir, verifyChecksum, - filesUser, filesGroup, filesMode, mappers, bandwidthMB); + filesUser, filesGroup, filesMode, mappers, bandwidthMB, postExportVerifyChecksum); LOG.info("Finalize the Snapshot Export"); if (!skipTmp) { @@ -975,6 +1019,7 @@ public class ExportSnapshot extends Configured implements Tool { System.err.println(" -copy-to NAME Remote destination hdfs://"); System.err.println(" -copy-from NAME Input folder hdfs:// (default hbase.rootdir)"); System.err.println(" -no-checksum-verify Do not verify checksum, use name+length only."); + System.err.println(" -post-export-checksum-verify Verify checksum after export"); System.err.println(" -no-target-verify Do not verify the integrity of the \\" + "exported snapshot."); System.err.println(" -overwrite Rewrite the snapshot manifest if already exists"); diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java index 71a39f06..98b60fe 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.snapshot; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; import java.io.IOException; import java.net.URI; @@ -31,6 +32,7 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -42,6 +44,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotFileInfo; @@ -324,6 +327,36 @@ public class TestExportSnapshot { assertEquals(0, runExportAndInjectFailures(snapshotName, true)); } + /** + * Check checksum + */ + @Test + public void testSameChecksum() throws Exception { + assertTrue(ExportSnapshot.sameChecksum(null, null, false, null, null)); + + FileSystem fs = HFileSystem.get(TEST_UTIL.getConfiguration()); + Path file = new Path(TEST_UTIL.getDataTestDir(), "file"); + Path similarFile = new Path(TEST_UTIL.getDataTestDir(), "sameFile"); + Path differentFile = new Path(TEST_UTIL.getDataTestDir(), "differentFile"); + + FSDataOutputStream os = fs.create(file); + os.write("a file".getBytes()); + os.close(); + + os = fs.create(similarFile); + os.write("a file".getBytes()); + os.close(); + + os = fs.create(differentFile); + os.write("another file".getBytes()); + os.close(); + + assertTrue(ExportSnapshot.sameChecksum(file, file, true, fs, fs)); + assertTrue(ExportSnapshot.sameChecksum(file, similarFile, true, fs, fs)); + assertTrue(ExportSnapshot.sameChecksum(file, differentFile, false, fs, fs)); + assertFalse(ExportSnapshot.sameChecksum(file, differentFile, true, fs, fs)); + } + /* * Execute the ExportSnapshot job injecting failures */