Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt (revision 966616)
+++ lucene/CHANGES.txt (working copy)
@@ -241,6 +241,9 @@
explicitly enable the old behavior with setAutoGeneratePhraseQueries(true)
(Robert Muir)
+* LUCENE-2537: FSDirectory.copy() implementation was unsafe and could result in
+ OOM if a large file was copied. (Shai Erera)
+
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight
Index: lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java (revision 966616)
+++ lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java (working copy)
@@ -20,12 +20,13 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.IOUtils;
+
import java.util.LinkedList;
import java.util.HashSet;
import java.io.IOException;
-
/**
* Combines multiple files into a single compound file.
* The file format:
@@ -142,20 +143,17 @@
*/
public void close() throws IOException {
if (merged)
- throw new IllegalStateException(
- "Merge already performed");
+ throw new IllegalStateException("Merge already performed");
if (entries.isEmpty())
- throw new IllegalStateException(
- "No entries to merge have been defined");
+ throw new IllegalStateException("No entries to merge have been defined");
merged = true;
// open the compound stream
- IndexOutput os = null;
+ IndexOutput os = directory.createOutput(fileName);
+ IOException priorException = null;
try {
- os = directory.createOutput(fileName);
-
// Write the Version info - must be a VInt because CFR reads a VInt
// in older versions!
os.writeVInt(FORMAT_CURRENT);
@@ -185,10 +183,9 @@
// Open the files and copy their data into the stream.
// Remember the locations of each file's data section.
- byte buffer[] = new byte[16384];
for (FileEntry fe : entries) {
fe.dataOffset = os.getFilePointer();
- copyFile(fe, os, buffer);
+ copyFile(fe, os);
}
// Write the data offsets into the directory of the compound stream
@@ -206,56 +203,37 @@
IndexOutput tmp = os;
os = null;
tmp.close();
-
+ } catch (IOException e) {
+ priorException = e;
} finally {
- if (os != null) try { os.close(); } catch (IOException e) { }
+ IOUtils.closeSafely(priorException, os);
}
}
- /** Copy the contents of the file with specified extension into the
- * provided output stream. Use the provided buffer for moving data
- * to reduce memory allocation.
- */
- private void copyFile(FileEntry source, IndexOutput os, byte buffer[])
- throws IOException
- {
- IndexInput is = null;
- try {
- long startPtr = os.getFilePointer();
+ /**
+ * Copy the contents of the file with specified extension into the provided
+ * output stream.
+ */
+ private void copyFile(FileEntry source, IndexOutput os) throws IOException {
+ IndexInput is = directory.openInput(source.file);
+ try {
+ long startPtr = os.getFilePointer();
+ long length = is.length();
+ os.copyBytes(is, length);
- is = directory.openInput(source.file);
- long length = is.length();
- long remainder = length;
- int chunk = buffer.length;
+ if (checkAbort != null) {
+ checkAbort.work(length);
+ }
- while(remainder > 0) {
- int len = (int) Math.min(chunk, remainder);
- is.readBytes(buffer, 0, len, false);
- os.writeBytes(buffer, len);
- remainder -= len;
- if (checkAbort != null)
- // Roughly every 2 MB we will check if
- // it's time to abort
- checkAbort.work(80);
- }
+ // Verify that the output length diff is equal to original file
+ long endPtr = os.getFilePointer();
+ long diff = endPtr - startPtr;
+ if (diff != length)
+ throw new IOException("Difference in the output file offsets " + diff
+ + " does not match the original file length " + length);
- // Verify that remainder is 0
- if (remainder != 0)
- throw new IOException(
- "Non-zero remainder length after copying: " + remainder
- + " (id: " + source.file + ", length: " + length
- + ", buffer size: " + chunk + ")");
-
- // Verify that the output length diff is equal to original file
- long endPtr = os.getFilePointer();
- long diff = endPtr - startPtr;
- if (diff != length)
- throw new IOException(
- "Difference in the output file offsets " + diff
- + " does not match the original file length " + length);
-
- } finally {
- if (is != null) is.close();
- }
+ } finally {
+ is.close();
}
+ }
}
Index: lucene/src/java/org/apache/lucene/store/FSDirectory.java
===================================================================
--- lucene/src/java/org/apache/lucene/store/FSDirectory.java (revision 966616)
+++ lucene/src/java/org/apache/lucene/store/FSDirectory.java (working copy)
@@ -33,6 +33,8 @@
import static java.util.Collections.synchronizedSet;
import java.util.HashSet;
import java.util.Set;
+
+import org.apache.lucene.store.SimpleFSDirectory.SimpleFSIndexInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.Constants;
@@ -126,6 +128,12 @@
protected final Set staleFiles = synchronizedSet(new HashSet()); // Files written, but not yet sync'ed
private int chunkSize = DEFAULT_READ_CHUNK_SIZE; // LUCENE-1566
+ /**
+ * Chunk size used to read when using FileChannel API. If an attempt to read a
+ * large file is made without limiting the chunk size, an OOM may occur.
+ */
+ private static final long CHANNEL_CHUNK_SIZE = 1 << 21; // Use 2MB chunk size - LUCENE-2537
+
// returns the canonical version of the directory, creating it if it doesn't exist.
private static File getCanonicalPath(File file) throws IOException {
return new File(file.getCanonicalPath());
@@ -441,7 +449,7 @@
try {
input = new FileInputStream(new File(directory, src)).getChannel();
output = new FileOutputStream(new File(target.directory, dest)).getChannel();
- output.transferFrom(input, 0, input.size());
+ copy(input, output, input.size());
} catch (IOException ioe) {
priorException = ioe;
} finally {
@@ -451,6 +459,25 @@
super.copy(to, src, dest);
}
}
+
+ /**
+ * Copies the content of a given {@link FileChannel} to a destination one. The
+ * copy is done in chunks of 2MB because if transferFrom is used without a
+ * limit when copying a very large file, then an OOM may be thrown (depends on
+ * the state of the RAM in the machine, as well as the OS used). Performance
+ * measurements showed that chunk sizes larger than 2MB do not result in much
+ * faster file copy, therefore we limit the size to be safe with different
+ * file sizes and systems.
+ */
+ static void copy(FileChannel input, FileChannel output, long numBytes) throws IOException {
+ long pos = output.position();
+ long writeTo = numBytes + pos;
+ while (pos < writeTo) {
+ pos += output.transferFrom(input, pos, Math.min(CHANNEL_CHUNK_SIZE, writeTo - pos));
+ }
+ // transferFrom does not change the position of the channel. Need to change it manually
+ output.position(pos);
+ }
protected static class FSIndexOutput extends BufferedIndexOutput {
private final FSDirectory parent;
@@ -472,6 +499,28 @@
}
@Override
+ public void copyBytes(IndexInput input, long numBytes) throws IOException {
+ // Optimized copy only if the number of bytes to copy is larger than the
+ // buffer size, and the given IndexInput supports FileChannel copying ..
+ // NOTE: the below check relies on NIOIndexInput extending Simple. If that
+ // changes in the future, we should change the check as well.
+ if (numBytes > BUFFER_SIZE && input instanceof SimpleFSIndexInput) {
+ // flush any bytes in the buffer
+ flush();
+ // do the optimized copy
+ FileChannel in = ((SimpleFSIndexInput) input).file.getChannel();
+ FileChannel out = file.getChannel();
+ copy(in, out, numBytes);
+ // corrects the position in super (BufferedIndexOutput), so that calls
+ // to getFilePointer will return the correct pointer.
+ // Perhaps a specific method is better?
+ super.seek(out.position());
+ } else {
+ super.copyBytes(input, numBytes);
+ }
+ }
+
+ @Override
public void close() throws IOException {
// only close the file if it has not been closed yet
if (isOpen) {
Index: lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java
===================================================================
--- lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java (revision 966616)
+++ lucene/src/java/org/apache/lucene/store/SimpleFSDirectory.java (working copy)
@@ -125,7 +125,7 @@
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
"OutOfMemoryError likely caused by the Sun VM Bug described in "
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
- + "with a a value smaller than the current chunks size (" + chunkSize + ")");
+ + "with a value smaller than the current chunks size (" + chunkSize + ")");
outOfMemoryError.initCause(e);
throw outOfMemoryError;
}