Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java (revision 1416361)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java (working copy)
@@ -20,6 +20,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.GenerationReplacementsFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PostingsFormat;
@@ -44,6 +45,7 @@
// TODO: need a plain-text impl (using the above)
private final NormsFormat normsFormat = new SimpleTextNormsFormat();
private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat();
+ private final GenerationReplacementsFormat generationReplacements = new SimpleTextGenerationReplacementsFormat();
public SimpleTextCodec() {
super("SimpleText");
@@ -88,4 +90,9 @@
public LiveDocsFormat liveDocsFormat() {
return liveDocs;
}
+
+ @Override
+ public GenerationReplacementsFormat generationReplacementsFormat() {
+ return generationReplacements;
+ }
}
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextGenerationReplacementsFormat.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextGenerationReplacementsFormat.java (revision 0)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextGenerationReplacementsFormat.java (working copy)
@@ -0,0 +1,86 @@
+package org.apache.lucene.codecs.simpletext;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map.Entry;
+
+import org.apache.lucene.codecs.GenerationReplacementsFormat;
+import org.apache.lucene.index.FieldGenerationReplacements;
+import org.apache.lucene.index.SegmentInfoPerCommit;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.StringHelper;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SimpleTextGenerationReplacementsFormat extends
+ GenerationReplacementsFormat {
+ final static BytesRef FGR_DOCCOUNT = new BytesRef(" number of documents ");
+ final static BytesRef FGR_DOC = new BytesRef(" doc ");
+ final static BytesRef FGR_GENERATION = new BytesRef(" generation ");
+
+ @Override
+ protected FieldGenerationReplacements readPersistedGeneration(IndexInput input)
+ throws IOException {
+ FieldGenerationReplacements reps = new FieldGenerationReplacements();
+
+ BytesRef scratch = new BytesRef();
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch, FGR_DOCCOUNT);
+ final int size = Integer.parseInt(readString(FGR_DOCCOUNT.length, scratch));
+
+ for (int i = 0; i < size; i++) {
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch, FGR_DOC);
+ final int doc = Integer.parseInt(readString(FGR_DOC.length, scratch));
+
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch, FGR_GENERATION);
+ final long generation = Integer.parseInt(readString(FGR_GENERATION.length, scratch));
+
+ reps.set(doc, generation);
+ }
+
+ return reps;
+ }
+
+ private String readString(int offset, BytesRef scratch) {
+ return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8);
+ }
+
+ @Override
+ protected void persistGeneration(FieldGenerationReplacements reps,
+ IndexOutput output) throws IOException {
+ BytesRef scratch = new BytesRef();
+ SimpleTextUtil.write(output, FGR_DOCCOUNT);
+ SimpleTextUtil.write(output, Integer.toString(reps.size()), scratch);
+ SimpleTextUtil.writeNewline(output);
+
+ for (Entry
- * You need to implement {@link #visitDocument(int, StoredFieldVisitor)} to
+ * You need to implement {@link #visitDocument(int, StoredFieldVisitor, Set)} to
* read the stored fields for a document, implement {@link #clone()} (creating
* clones of any IndexInputs used, etc), and {@link #close()}
* @lucene.experimental
@@ -35,8 +36,9 @@
protected StoredFieldsReader() {
}
- /** Visit the stored fields for document
* Since each {@link DeleteSlice} maintains its own head and the list is only
* single linked the garbage collector takes care of pruning the list for us.
@@ -44,14 +44,15 @@
* Each DWPT as well as the global delete pool maintain their private
* DeleteSlice instance. In the DWPT case updating a slice is equivalent to
* atomically finishing the document. The slice update guarantees a "happens
- * before" relationship to all other updates in the same indexing session. When a
- * DWPT updates a document it:
+ * before" relationship to all other updates in the same indexing session. When
+ * a DWPT updates a document it:
*
* NOTE: extensions used by codecs are not
@@ -83,8 +83,9 @@
* @param base main part of the file name
* @param ext extension of the filename
* @param gen generation
+ * @param isUpdate whether the file is an update file or not
*/
- public static String fileNameFromGeneration(String base, String ext, long gen) {
+ public static String fileNameFromGeneration(String base, String ext, long gen, boolean isUpdate) {
if (gen == -1) {
return null;
} else if (gen == 0) {
@@ -94,8 +95,11 @@
// The '6' part in the length is: 1 for '.', 1 for '_' and 4 as estimate
// to the gen length as string (hopefully an upper limit so SB won't
// expand in the middle.
- StringBuilder res = new StringBuilder(base.length() + 6 + ext.length())
- .append(base).append('_').append(Long.toString(gen, Character.MAX_RADIX));
+ StringBuilder res = new StringBuilder(base.length() + 6 + ext.length());
+ if (isUpdate) {
+ res.append('_');
+ }
+ res.append(base).append('_').append(generationString(gen));
if (ext.length() > 0) {
res.append('.').append(ext);
}
@@ -103,7 +107,36 @@
}
}
+ public static String generationString(long gen) {
+ return Long.toString(gen, Character.MAX_RADIX);
+ }
+
/**
+ * Computes the base name of an updated segment from base and generation. If
+ * the generation < 0, the file name is null. otherwise, the file name is
+ * <base>_upd_<gen>.
+ * The {@link OpenMode} option on
+ * {@link IndexWriterConfig#setOpenMode(OpenMode)} determines whether a new
+ * index is created, or whether an existing index is opened. Note that you can
+ * open an index with {@link OpenMode#CREATE} even while readers are using the
+ * index. The old readers will continue to search the "point in time" snapshot
+ * they had opened, and won't see the newly created index until they re-open. If
+ * {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a new index
+ * if there is not already an index at the provided path and otherwise open the
+ * existing index.
+ *
+ * In either case, documents are added with {@link #addDocument(IndexDocument)
+ * addDocument} and removed with {@link #deleteDocuments(Term)} or
+ * {@link #deleteDocuments(Query)}. A document can be updated with
+ * {@link #updateDocument(Term, IndexDocument) updateDocument} (which just
+ * deletes and then adds the entire document). When finished adding, deleting
+ * and updating documents, {@link #close() close} should be called.
+ *
+ * These changes are buffered in memory and periodically flushed to the
+ * {@link Directory} (during the above method calls). A flush is triggered when
+ * there are enough added documents since the last flush. Flushing is triggered
+ * either by RAM usage of the documents (see
+ * {@link IndexWriterConfig#setRAMBufferSizeMB}) or the number of added
+ * documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}). The
+ * default is to flush when RAM usage hits
+ * {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For best indexing
+ * speed you should flush by RAM usage with a large RAM buffer. Additionally, if
+ * IndexWriter reaches the configured number of buffered deletes (see
+ * {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) the deleted terms and
+ * queries are flushed and applied to existing segments. In contrast to the
+ * other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and
+ * {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms won't
+ * trigger a segment flush. Note that flushing just moves the internal buffered
+ * state in IndexWriter into the index, but these changes are not visible to
+ * IndexReader until either {@link #commit()} or {@link #close} is called. A
+ * flush may also trigger one or more segment merges which by default run with a
+ * background thread so as not to block the addDocument calls (see below for changing the {@link MergeScheduler}).
+ *
+ * Opening an
+ * Expert:
+ * Expert:
+ * NOTE: if you hit an OutOfMemoryError then IndexWriter will quietly
+ * record this fact and block all future segment commits. This is a defensive
+ * measure in case any internal state (buffered documents and deletions) were
+ * corrupted. Any subsequent calls to {@link #commit()} will throw an
+ * IllegalStateException. The only course of action is to call {@link #close()},
+ * which internally will call {@link #rollback()}, to undo any changes to the
+ * index since the last commit. You can also just call {@link #rollback()}
+ * directly.
+ *
+ * NOTE: {@link IndexWriter} instances are completely thread safe,
+ * meaning multiple threads can call any of its methods, concurrently. If your
+ * application requires external synchronization, you should not
+ * synchronize on the
+ * NOTE: If you call The {@link OpenMode} option on
- {@link IndexWriterConfig#setOpenMode(OpenMode)} determines
- whether a new index is created, or whether an existing index is
- opened. Note that you can open an index with {@link OpenMode#CREATE}
- even while readers are using the index. The old readers will
- continue to search the "point in time" snapshot they had opened,
- and won't see the newly created index until they re-open. If
- {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a
- new index if there is not already an index at the provided path
- and otherwise open the existing index. In either case, documents are added with {@link #addDocument(IndexDocument)
- addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
- #deleteDocuments(Query)}. A document can be updated with {@link
- #updateDocument(Term, IndexDocument) updateDocument} (which just deletes
- and then adds the entire document). When finished adding, deleting
- and updating documents, {@link #close() close} should be called. These changes are buffered in memory and periodically
- flushed to the {@link Directory} (during the above method
- calls). A flush is triggered when there are enough added documents
- since the last flush. Flushing is triggered either by RAM usage of the
- documents (see {@link IndexWriterConfig#setRAMBufferSizeMB}) or the
- number of added documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}).
- The default is to flush when RAM usage hits
- {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For
- best indexing speed you should flush by RAM usage with a
- large RAM buffer. Additionally, if IndexWriter reaches the configured number of
- buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms})
- the deleted terms and queries are flushed and applied to existing segments.
- In contrast to the other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and
- {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms
- won't trigger a segment flush. Note that flushing just moves the
- internal buffered state in IndexWriter into the index, but
- these changes are not visible to IndexReader until either
- {@link #commit()} or {@link #close} is called. A flush may
- also trigger one or more segment merges which by default
- run with a background thread so as not to block the
- addDocument calls (see below
- for changing the {@link MergeScheduler}). Opening an Expert: Expert:
- NOTE: if you hit an
- OutOfMemoryError then IndexWriter will quietly record this
- fact and block all future segment commits. This is a
- defensive measure in case any internal state (buffered
- documents and deletions) were corrupted. Any subsequent
- calls to {@link #commit()} will throw an
- IllegalStateException. The only course of action is to
- call {@link #close()}, which internally will call {@link
- #rollback()}, to undo any changes to the index since the
- last commit. You can also just call {@link #rollback()}
- directly. NOTE: {@link
- IndexWriter} instances are completely thread
- safe, meaning multiple threads can call any of its
- methods, concurrently. If your application requires
- external synchronization, you should not
- synchronize on the NOTE: If you call
- Note that this is functionally equivalent to calling
- * {#flush} and then opening a new reader. But the turnaround time of this
- * method should be faster since it avoids the potentially
- * costly {@link #commit}. You must close the {@link IndexReader} returned by
- * this method once you are done using it. It's near real-time because there is no hard
- * guarantee on how quickly you can get a new reader after
- * making changes with IndexWriter. You'll have to
- * experiment in your situation to determine if it's
- * fast enough. As this is a new and experimental
- * feature, please report back on your findings so we can
- * learn, improve and iterate. The resulting reader supports {@link
- * DirectoryReader#openIfChanged}, but that call will simply forward
- * back to this method (though this may change in the
- * future). The very first time this method is called, this
- * writer instance will make every effort to pool the
- * readers that it opens for doing merges, applying
- * deletes, etc. This means additional resources (RAM,
- * file descriptors, CPU time) will be consumed. For lower latency on reopening a reader, you should
- * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
- * pre-warm a newly merged segment before it's committed
- * to the index. This is important for minimizing
- * index-to-search delay after a large merge. If an addIndexes* call is running in another thread,
- * then this reader will only search those segments from
- * the foreign index that have been successfully copied
- * over, so far NOTE: Once the writer is closed, any
- * outstanding readers may continue to be used. However,
- * if you attempt to reopen any of those readers, you'll
- * hit an {@link AlreadyClosedException}.
+ * Note that this is functionally equivalent to calling {#flush} and then
+ * opening a new reader. But the turnaround time of this method should be
+ * faster since it avoids the potentially costly {@link #commit}.
+ *
+ * You must close the {@link IndexReader} returned by this method once you are
+ * done using it.
+ *
+ * It's near real-time because there is no hard guarantee on how
+ * quickly you can get a new reader after making changes with IndexWriter.
+ * You'll have to experiment in your situation to determine if it's fast
+ * enough. As this is a new and experimental feature, please report back on
+ * your findings so we can learn, improve and iterate.
+ *
+ * The resulting reader supports {@link DirectoryReader#openIfChanged}, but
+ * that call will simply forward back to this method (though this may change
+ * in the future).
+ *
+ * The very first time this method is called, this writer instance will make
+ * every effort to pool the readers that it opens for doing merges, applying
+ * deletes, etc. This means additional resources (RAM, file descriptors, CPU
+ * time) will be consumed.
+ *
+ * For lower latency on reopening a reader, you should call
+ * {@link IndexWriterConfig#setMergedSegmentWarmer} to pre-warm a newly merged
+ * segment before it's committed to the index. This is important for
+ * minimizing index-to-search delay after a large merge.
+ *
+ * If an addIndexes* call is running in another thread, then this reader will
+ * only search those segments from the foreign index that have been
+ * successfully copied over, so far
+ *
+ * NOTE: Once the writer is closed, any outstanding readers may
+ * continue to be used. However, if you attempt to reopen any of those
+ * readers, you'll hit an {@link AlreadyClosedException}.
+ *
* Calls {@link #ensureOpen(boolean) ensureOpen(true)}.
- * @throws AlreadyClosedException if this IndexWriter is closed
+ *
+ * @throws AlreadyClosedException
+ * if this IndexWriter is closed
*/
protected final void ensureOpen() throws AlreadyClosedException {
ensureOpen(true);
}
-
+
final Codec codec; // for writing new segments
-
+
/**
* Constructs a new IndexWriter per the settings given in
*
* @param d
@@ -599,15 +625,15 @@
mergePolicy.setIndexWriter(this);
mergeScheduler = config.getMergeScheduler();
codec = config.getCodec();
-
+
bufferedDeletesStream = new BufferedDeletesStream(infoStream);
poolReaders = config.getReaderPooling();
-
+
writeLock = directory.makeLock(WRITE_LOCK_NAME);
-
+
if (!writeLock.obtain(config.getWriteLockTimeout())) // obtain write lock
- throw new LockObtainFailedException("Index locked for write: " + writeLock);
-
+ throw new LockObtainFailedException("Index locked for write: " + writeLock);
+
boolean success = false;
try {
OpenMode mode = config.getOpenMode();
@@ -620,15 +646,15 @@
// CREATE_OR_APPEND - create only if an index does not exist
create = !DirectoryReader.indexExists(directory);
}
-
+
// If index is too old, reading the segments will throw
// IndexFormatTooOldException.
segmentInfos = new SegmentInfos();
-
+
if (create) {
- // Try to read first. This is to allow create
+ // Try to read first. This is to allow create
// against an index that's currently open for
- // searching. In this case we write the next
+ // searching. In this case we write the next
// segments_N file with no segments:
try {
segmentInfos.read(directory);
@@ -636,48 +662,49 @@
} catch (IOException e) {
// Likely this means it's a fresh directory
}
-
+
// Record that we have a change (zero out all
// segments) pending:
changeCount++;
segmentInfos.changed();
} else {
segmentInfos.read(directory);
-
+
IndexCommit commit = config.getIndexCommit();
if (commit != null) {
// Swap out all segments, but, keep metadata in
// SegmentInfos, like version & generation, to
- // preserve write-once. This is important if
+ // preserve write-once. This is important if
// readers are open against the future commit
// points.
- if (commit.getDirectory() != directory)
- throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory");
+ if (commit.getDirectory() != directory) throw new IllegalArgumentException(
+ "IndexCommit's directory doesn't match my directory");
SegmentInfos oldInfos = new SegmentInfos();
oldInfos.read(directory, commit.getSegmentsFileName());
segmentInfos.replace(oldInfos);
changeCount++;
segmentInfos.changed();
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
+ infoStream.message("IW",
+ "init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
}
}
}
-
+
rollbackSegments = segmentInfos.createBackupSegmentInfos();
-
+
// start with previous field numbers, but new FieldInfos
globalFieldNumberMap = getFieldNumberMap();
- docWriter = new DocumentsWriter(codec, config, directory, this, globalFieldNumberMap, bufferedDeletesStream);
-
+ docWriter = new DocumentsWriter(codec, config, directory, this,
+ globalFieldNumberMap, bufferedDeletesStream);
+
// Default deleter (for backwards compatibility) is
// KeepOnlyLastCommitDeleter:
- synchronized(this) {
+ synchronized (this) {
deleter = new IndexFileDeleter(directory,
- config.getIndexDeletionPolicy(),
- segmentInfos, infoStream, this);
+ config.getIndexDeletionPolicy(), segmentInfos, infoStream, this);
}
-
+
if (deleter.startingCommitDeleted) {
// Deletion policy deleted the "head" commit point.
// We have to mark ourself as changed so that if we
@@ -686,18 +713,19 @@
changeCount++;
segmentInfos.changed();
}
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "init: create=" + create);
messageState();
}
-
+
success = true;
-
+
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "init: hit exception on init; releasing write lock");
+ infoStream.message("IW",
+ "init: hit exception on init; releasing write lock");
}
try {
writeLock.release();
@@ -708,102 +736,102 @@
}
}
}
-
+
private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
Directory cfsDir = null;
try {
if (info.getUseCompoundFile()) {
cfsDir = new CompoundFileDirectory(info.dir,
- IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION),
- IOContext.READONCE,
- false);
+ IndexFileNames.segmentFileName(info.name, "",
+ IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE,
+ false);
} else {
cfsDir = info.dir;
}
- return info.getCodec().fieldInfosFormat().getFieldInfosReader().read(cfsDir,
- info.name,
- IOContext.READONCE);
+ return info.getCodec().fieldInfosFormat().getFieldInfosReader()
+ .read(cfsDir, info.name, IOContext.READONCE);
} finally {
if (info.getUseCompoundFile() && cfsDir != null) {
cfsDir.close();
}
}
}
-
+
/**
- * Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
- * If this {@link SegmentInfos} has no global field number map the returned instance is empty
+ * Loads or returns the already loaded the global field number map for this
+ * {@link SegmentInfos}. If this {@link SegmentInfos} has no global field
+ * number map the returned instance is empty
*/
private FieldNumbers getFieldNumberMap() throws IOException {
- final FieldNumbers map = new FieldNumbers();
-
+ final FieldNumbers map = new FieldNumbers();
+
SegmentInfoPerCommit biggest = null;
- for(SegmentInfoPerCommit info : segmentInfos) {
- if (biggest == null || (info.info.getDocCount()-info.getDelCount()) > (biggest.info.getDocCount()-biggest.getDelCount())) {
+ for (SegmentInfoPerCommit info : segmentInfos) {
+ if (biggest == null
+ || (info.info.getDocCount() - info.getDelCount()) > (biggest.info
+ .getDocCount() - biggest.getDelCount())) {
biggest = info;
}
}
-
+
if (biggest != null) {
- for(FieldInfo fi : getFieldInfos(biggest.info)) {
+ for (FieldInfo fi : getFieldInfos(biggest.info)) {
map.addOrGet(fi.name, fi.number);
}
}
-
+
// TODO: we could also pull DV type of each field here,
// and use that to make sure new segment(s) don't change
// the type...
-
+
return map;
}
/**
- * Returns a {@link LiveIndexWriterConfig}, which can be used to query the IndexWriter
- * current settings, as well as modify "live" ones.
+ * Returns a {@link LiveIndexWriterConfig}, which can be used to query the
+ * IndexWriter current settings, as well as modify "live" ones.
*/
public LiveIndexWriterConfig getConfig() {
ensureOpen(false);
return config;
}
-
+
private void messageState() {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "\ndir=" + directory + "\n" +
- "index=" + segString() + "\n" +
- "version=" + Constants.LUCENE_VERSION + "\n" +
- config.toString());
+ infoStream.message("IW", "\ndir=" + directory + "\n" + "index="
+ + segString() + "\n" + "version=" + Constants.LUCENE_VERSION + "\n"
+ + config.toString());
}
}
-
+
/**
- * Commits all changes to an index, waits for pending merges
- * to complete, and closes all associated files.
+ * Commits all changes to an index, waits for pending merges to complete, and
+ * closes all associated files.
*
- * This is a "slow graceful shutdown" which may take a long time
- * especially if a big merge is pending: If you only want to close
- * resources use {@link #rollback()}. If you only want to commit
- * pending changes and close resources see {@link #close(boolean)}.
+ * This is a "slow graceful shutdown" which may take a long time especially if
+ * a big merge is pending: If you only want to close resources use
+ * {@link #rollback()}. If you only want to commit pending changes and close
+ * resources see {@link #close(boolean)}.
*
- * Note that this may be a costly
- * operation, so, try to re-use a single writer instead of
- * closing and opening a new one. See {@link #commit()} for
- * caveats about write caching done by some IO devices.
- *
- * If an Exception is hit during close, eg due to disk
- * full or some other reason, then both the on-disk index
- * and the internal state of the IndexWriter instance will
- * be consistent. However, the close will not be complete
- * even though part of it (flushing buffered documents)
- * may have succeeded, so the write lock will still be
- * held. If you can correct the underlying cause (eg free up
- * some disk space) then you can call close() again.
- * Failing that, if you want to force the write lock to be
- * released (dangerous, because you may then lose buffered
- * docs in the IndexWriter instance) then you can do
- * something like this:
+ * If an Exception is hit during close, eg due to disk full or some other
+ * reason, then both the on-disk index and the internal state of the
+ * IndexWriter instance will be consistent. However, the close will not be
+ * complete even though part of it (flushing buffered documents) may have
+ * succeeded, so the write lock will still be held.
+ *
+ * If you can correct the underlying cause (eg free up some disk space) then
+ * you can call close() again. Failing that, if you want to force the write
+ * lock to be released (dangerous, because you may then lose buffered docs in
+ * the IndexWriter instance) then you can do something like this:
+ * NOTE: if this method hits an OutOfMemoryError
- * you should immediately close the writer, again. See above for details.field.
*
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40GenerationReplacementsFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40GenerationReplacementsFormat.java (revision 0)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40GenerationReplacementsFormat.java (working copy)
@@ -0,0 +1,60 @@
+package org.apache.lucene.codecs.lucene40;
+
+import java.io.IOException;
+import java.util.Map.Entry;
+
+import org.apache.lucene.codecs.GenerationReplacementsFormat;
+import org.apache.lucene.index.FieldGenerationReplacements;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Lucene40GenerationReplacementsFormat extends
+ GenerationReplacementsFormat {
+
+ @Override
+ protected FieldGenerationReplacements readPersistedGeneration(IndexInput input)
+ throws IOException {
+ final int size = input.readVInt();
+ FieldGenerationReplacements reps = new FieldGenerationReplacements();
+ int curr = 0;
+ for (int i = 0; i < size; i++) {
+ curr += input.readVInt();
+ reps.set(curr, input.readVLong());
+ }
+ return reps;
+ }
+
+ @Override
+ protected void persistGeneration(FieldGenerationReplacements reps,
+ IndexOutput output) throws IOException {
+ // write number of replacements
+ output.writeVInt(reps.size());
+
+ // write replacements
+ int prev = 0;
+ for (Entryfield.
*
Index: lucene/core/src/java/org/apache/lucene/codecs/Codec.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/Codec.java (revision 1416361)
+++ lucene/core/src/java/org/apache/lucene/codecs/Codec.java (working copy)
@@ -86,6 +86,9 @@
/** Encodes/decodes live docs */
public abstract LiveDocsFormat liveDocsFormat();
+ /** Encodes/decodes live docs */
+ public abstract GenerationReplacementsFormat generationReplacementsFormat();
+
/** looks up a codec by name */
public static Codec forName(String name) {
if (loader == null) {
Index: lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java (revision 1416361)
+++ lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java (working copy)
@@ -75,6 +75,11 @@
}
@Override
+ public GenerationReplacementsFormat generationReplacementsFormat() {
+ return delegate.generationReplacementsFormat();
+ }
+
+ @Override
public NormsFormat normsFormat() {
return delegate.normsFormat();
}
Index: lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java (revision 0)
+++ lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java (working copy)
@@ -0,0 +1,168 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.index.FieldGenerationReplacements;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfoPerCommit;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Format for field replacements of certain generation
+ *
+ * @lucene.experimental
+ */
+public abstract class GenerationReplacementsFormat {
+
+ /** Extension of generation replacements vectors */
+ static final String FIELD_GENERATION_REPLACEMENT_EXTENSION = "fgr";
+
+ /**
+ * Sole constructor. (For invocation by subclass constructors, typically
+ * implicit.)
+ */
+ protected GenerationReplacementsFormat() {}
+
+ /**
+ * Read field generation replacements. If no replacements exist return
+ * {@code null}.
+ */
+ public FieldGenerationReplacements readGenerationReplacements(String field,
+ SegmentInfoPerCommit info, IOContext context) throws IOException {
+ String fileName = getLastGenerationFileName(field, info.info.dir, info);
+ if (fileName == null) {
+ return null;
+ }
+
+ return internalReadGeneration(info.info.dir, fileName, context);
+ }
+
+ private FieldGenerationReplacements internalReadGeneration(Directory dir,
+ String fileName, IOContext context) throws IOException {
+ IndexInput input = dir.openInput(fileName, context);
+
+ boolean success = false;
+ try {
+ final FieldGenerationReplacements persistedGeneration = readPersistedGeneration(input);
+ success = true;
+ return persistedGeneration;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(input);
+ } else {
+ input.close();
+ }
+ }
+ }
+
+ private String getLastGenerationFileName(String field, Directory dir,
+ SegmentInfoPerCommit info) throws IOException {
+ for (long i = info.getUpdateGen(); i > 0; i--) {
+ final String fileName = IndexFileNames.segmentFileName(
+ IndexFileNames.fileNameFromGeneration(info.info.name, "", i, false),
+ field, FIELD_GENERATION_REPLACEMENT_EXTENSION);
+ if (dir.fileExists(fileName)) {
+ return fileName;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Read persisted field generation replacements from a given input.
+ */
+ protected abstract FieldGenerationReplacements readPersistedGeneration(
+ IndexInput input) throws IOException;
+
+ /**
+ * Persist field generation replacements. Use
+ * {@link SegmentInfoPerCommit#getNextUpdateGen()} to determine the generation
+ * of the deletes file you should write to.
+ */
+ public void writeGenerationReplacement(String field,
+ FieldGenerationReplacements reps, Directory dir,
+ SegmentInfoPerCommit info, IOContext context) throws IOException {
+ if (reps == null) {
+ // nothing new to write
+ return;
+ }
+
+ // load replacements from previous file
+ String prevFileName = getLastGenerationFileName(field, dir, info);
+ final FieldGenerationReplacements existing;
+ if (prevFileName != null) {
+ existing = internalReadGeneration(dir, prevFileName, context);
+ existing.merge(reps);
+ } else {
+ existing = reps;
+ }
+
+ final String nameWithGeneration = IndexFileNames.fileNameFromGeneration(
+ info.info.name, "", info.getNextUpdateGen(), false);
+ final String fileName = IndexFileNames.segmentFileName(nameWithGeneration,
+ field, FIELD_GENERATION_REPLACEMENT_EXTENSION);
+
+ final IndexOutput output = dir.createOutput(fileName, context);
+ boolean success = false;
+ try {
+ persistGeneration(reps, output);
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(output);
+ info.info.dir.deleteFile(fileName);
+ } else {
+ output.close();
+ if (prevFileName != null) {
+ // remove previous file
+ info.info.dir.deleteFile(prevFileName);
+ }
+ }
+ }
+ }
+
+ /**
+ * Persist field generation replacements to a given output.
+ */
+ protected abstract void persistGeneration(FieldGenerationReplacements reps,
+ IndexOutput output) throws IOException;
+
+ /**
+ * Records all files in use by this {@link SegmentInfoPerCommit} into the
+ * files argument.
+ */
+ public void files(SegmentInfoPerCommit info, Directory dir,
+ Collectionn */
- public abstract void visitDocument(int n, StoredFieldVisitor visitor) throws IOException;
+ /** Visit the stored fields for document n, ignoring certain
+ * fields. */
+ public abstract void visitDocument(int n, StoredFieldVisitor visitor, Set
*
+ *
+ * @param baseName
+ * base segment string
+ * @param gen
+ * update generation
+ */
+ public static String updatedSegmentFileNameFromGeneration(String baseName,
+ long gen) {
+ if (gen <= 0) {
+ return null;
+ } else {
+ assert gen > 0;
+ // The '10' part in the length is: 3 for '_', 3 for "upd" and 4 as
+ // estimate to the gen length as string (hopefully an upper limit so SB
+ // won't expand in the middle.
+ StringBuilder res = new StringBuilder(baseName.length() + 10).append('_')
+ .append(baseName).append('_').append(generationString(gen));
+ return res.toString();
+ }
+ }
+
+ /**
* Returns a file name that includes the given segment name, your own custom
* name and extension. The format of the filename is:
* <segmentName>(_<name>)(.<ext>).
@@ -201,5 +234,5 @@
// All files created by codecs much match this pattern (we
// check this in SegmentInfo.java):
- static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[a-z0-9]+(_.*)?\\..*");
+ static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[_]?[a-z0-9]+(_.*)?\\..*");
}
Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1420477)
+++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -34,7 +34,9 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.index.FieldInfos.FieldNumbers;
+import org.apache.lucene.index.FieldsUpdate.Operation;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.MergePolicy.MergeTrigger;
import org.apache.lucene.index.MergeState.CheckAbort;
@@ -54,185 +56,188 @@
import org.apache.lucene.util.ThreadInterruptedException;
/**
- An IndexWriter creates and maintains an index.
+ * An IndexWriter creates and maintains an index.
+ *
+ * IndexWriter creates a lock file for the directory in
+ * use. Trying to open another IndexWriter on the same directory
+ * will lead to a {@link LockObtainFailedException}. The
+ * {@link LockObtainFailedException} is also thrown if an IndexReader on the
+ * same directory is used to delete documents from the index.
+ * IndexWriter allows an optional
+ * {@link IndexDeletionPolicy} implementation to be specified. You can use this
+ * to control when prior commits are deleted from the index. The default policy
+ * is {@link KeepOnlyLastCommitDeletionPolicy} which removes all prior commits
+ * as soon as a new commit is done (this matches behavior before 2.2). Creating
+ * your own policy can allow you to explicitly keep previous "point in time"
+ * commits alive in the index for some time, to allow readers to refresh to the
+ * new commit without having the old commit deleted out from under them. This is
+ * necessary on filesystems like NFS that do not support "delete on last
+ * close" semantics, which Lucene's "point in time" search normally relies on.
+ * IndexWriter allows you to separately change the
+ * {@link MergePolicy} and the {@link MergeScheduler}. The {@link MergePolicy}
+ * is invoked whenever there are changes to the segments in the index. Its role
+ * is to select which merges to do, if any, and return a
+ * {@link MergePolicy.MergeSpecification} describing the merges. The default is
+ * {@link LogByteSizeMergePolicy}. Then, the {@link MergeScheduler} is invoked
+ * with the requested merges and it decides when and how to run the merges. The
+ * default is {@link ConcurrentMergeScheduler}.
+ * IndexWriter instance as this may cause
+ * deadlock; use your own (non-Lucene) objects instead.
+ * Thread.interrupt() on a thread that's
+ * within IndexWriter, IndexWriter will try to catch this (eg, if it's in a
+ * wait() or Thread.sleep()), and will then throw the unchecked exception
+ * {@link ThreadInterruptedException} and clear the interrupt status on
+ * the thread.
+ * IndexWriter creates a lock file for the directory in use. Trying to open
- another IndexWriter on the same directory will lead to a
- {@link LockObtainFailedException}. The {@link LockObtainFailedException}
- is also thrown if an IndexReader on the same directory is used to delete documents
- from the index.IndexWriter allows an optional
- {@link IndexDeletionPolicy} implementation to be
- specified. You can use this to control when prior commits
- are deleted from the index. The default policy is {@link
- KeepOnlyLastCommitDeletionPolicy} which removes all prior
- commits as soon as a new commit is done (this matches
- behavior before 2.2). Creating your own policy can allow
- you to explicitly keep previous "point in time" commits
- alive in the index for some time, to allow readers to
- refresh to the new commit without having the old commit
- deleted out from under them. This is necessary on
- filesystems like NFS that do not support "delete on last
- close" semantics, which Lucene's "point in time" search
- normally relies on. IndexWriter allows you to separately change
- the {@link MergePolicy} and the {@link MergeScheduler}.
- The {@link MergePolicy} is invoked whenever there are
- changes to the segments in the index. Its role is to
- select which merges to do, if any, and return a {@link
- MergePolicy.MergeSpecification} describing the merges.
- The default is {@link LogByteSizeMergePolicy}. Then, the {@link
- MergeScheduler} is invoked with the requested merges and
- it decides when and how to run the merges. The default is
- {@link ConcurrentMergeScheduler}. IndexWriter instance as
- this may cause deadlock; use your own (non-Lucene) objects
- instead. Thread.interrupt() on a thread that's within
- IndexWriter, IndexWriter will try to catch this (eg, if
- it's in a wait() or Thread.sleep()), and will then throw
- the unchecked exception {@link ThreadInterruptedException}
- and clear the interrupt status on the thread.conf.
- * Note that the passed in {@link IndexWriterConfig} is
- * privately cloned; if you need to make subsequent "live"
- * changes to the configuration use {@link #getConfig}.
+ * Note that the passed in {@link IndexWriterConfig} is privately cloned; if
+ * you need to make subsequent "live" changes to the configuration use
+ * {@link #getConfig}.
*
* try {
* writer.close();
@@ -813,49 +841,55 @@
* }
* }
*
- *
- * after which, you must be certain not to use the writer
- * instance anymore.
+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer, again. See above for details. + *
+ * + * @throws IOException + * if there is a low-level IO error */ @Override public void close() throws IOException { close(true); } - + /** - * Closes the index with or without waiting for currently - * running merges to finish. This is only meaningful when - * using a MergeScheduler that runs merges in background - * threads. - * - *NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer, again. See above for details.
- * - *NOTE: it is dangerous to always call - * close(false), especially when IndexWriter is not open - * for very long, because this can result in "merge - * starvation" whereby long merges will never have a - * chance to finish. This will cause too many segments in - * your index over time.
- * - * @param waitForMerges if true, this call will block - * until all merges complete; else, it will ask all - * running merges to abort, wait until those merges have - * finished (which should be at most a few seconds), and - * then return. + * Closes the index with or without waiting for currently running merges to + * finish. This is only meaningful when using a MergeScheduler that runs + * merges in background threads. + * + *+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer, again. See above for details. + *
+ * + *+ * NOTE: it is dangerous to always call close(false), especially when + * IndexWriter is not open for very long, because this can result in "merge + * starvation" whereby long merges will never have a chance to finish. This + * will cause too many segments in your index over time. + *
+ * + * @param waitForMerges + * if true, this call will block until all merges complete; else, it + * will ask all running merges to abort, wait until those merges have + * finished (which should be at most a few seconds), and then return. */ public void close(boolean waitForMerges) throws IOException { - + + // commit pending updates + if (updatesPending) { + commitInternal(); + } + // Ensure that only one thread actually gets to do the // closing, and make sure no commit is also in progress: - synchronized(commitLock) { + synchronized (commitLock) { if (shouldClose()) { // If any methods have hit OutOfMemoryError, then abort // on close, in case the internal state of IndexWriter @@ -868,12 +902,12 @@ } } } - + // Returns true if this thread should attempt to close, or // false if IndexWriter is now closed; else, waits until // another thread finishes closing synchronized private boolean shouldClose() { - while(true) { + while (true) { if (!closed) { if (!closing) { closing = true; @@ -889,21 +923,24 @@ } } } - - private void closeInternal(boolean waitForMerges, boolean doFlush) throws IOException { + + private void closeInternal(boolean waitForMerges, boolean doFlush) + throws IOException { boolean interrupted = false; try { - + if (pendingCommit != null) { - throw new IllegalStateException("cannot close: prepareCommit was already called with no corresponding call to commit"); + throw new IllegalStateException( + "cannot close: prepareCommit was already called with no corresponding call to commit"); } - + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "now flush at close waitForMerges=" + waitForMerges); + infoStream.message("IW", "now flush at close waitForMerges=" + + waitForMerges); } - + docWriter.close(); - + try { // Only allow a new merge to be triggered if we are // going to wait for merges: @@ -915,9 +952,10 @@ } finally { try { - // clean up merge scheduler in all cases, although flushing may have failed: + // clean up merge scheduler in all cases, although flushing may have + // failed: interrupted = Thread.interrupted(); - + if (waitForMerges) { try { // Give merge scheduler last chance to run, in case @@ -927,12 +965,13 @@ // ignore any interruption, does not matter interrupted = true; if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "interrupted while waiting for final merges"); + infoStream.message("IW", + "interrupted while waiting for final merges"); } } } - synchronized(this) { + synchronized (this) { for (;;) { try { finishMerges(waitForMerges && !interrupted); @@ -943,7 +982,8 @@ // so it will not wait interrupted = true; if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "interrupted while waiting for merges to finish"); + infoStream.message("IW", + "interrupted while waiting for merges to finish"); } } } @@ -951,42 +991,44 @@ } } finally { - // shutdown policy, scheduler and all threads (this call is not interruptible): + // shutdown policy, scheduler and all threads (this call is not + // interruptible): IOUtils.closeWhileHandlingException(mergePolicy, mergeScheduler); } } - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "now call final commit()"); } - + if (doFlush) { commitInternal(); } - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "at close: " + segString()); } // used by assert below final DocumentsWriter oldWriter = docWriter; - synchronized(this) { + synchronized (this) { readerPool.dropAll(true); docWriter = null; deleter.close(); } - + if (writeLock != null) { - writeLock.release(); // release write lock + writeLock.release(); // release write lock writeLock = null; } - synchronized(this) { + synchronized (this) { closed = true; } - assert oldWriter.perThreadPool.numDeactivatedThreadStates() == oldWriter.perThreadPool.getMaxThreadStates(); + assert oldWriter.perThreadPool.numDeactivatedThreadStates() == oldWriter.perThreadPool + .getMaxThreadStates(); } catch (OutOfMemoryError oom) { handleOOM(oom, "closeInternal"); } finally { - synchronized(this) { + synchronized (this) { closing = false; notifyAll(); if (!closed) { @@ -999,54 +1041,54 @@ if (interrupted) Thread.currentThread().interrupt(); } } - + /** Returns the Directory used by this index. */ public Directory getDirectory() { return directory; } - + /** Returns the analyzer used by this index. */ public Analyzer getAnalyzer() { ensureOpen(); return analyzer; } - - /** Returns total number of docs in this index, including - * docs not yet flushed (still in the RAM buffer), - * not counting deletions. - * @see #numDocs */ + + /** + * Returns total number of docs in this index, including docs not yet flushed + * (still in the RAM buffer), not counting deletions. + * + * @see #numDocs + */ public synchronized int maxDoc() { ensureOpen(); int count; - if (docWriter != null) - count = docWriter.getNumDocs(); - else - count = 0; - + if (docWriter != null) count = docWriter.getNumDocs(); + else count = 0; + count += segmentInfos.totalDocCount(); return count; } - - /** Returns total number of docs in this index, including - * docs not yet flushed (still in the RAM buffer), and - * including deletions. NOTE: buffered deletions - * are not counted. If you really need these to be - * counted you should call {@link #commit()} first. - * @see #numDocs */ + + /** + * Returns total number of docs in this index, including docs not yet flushed + * (still in the RAM buffer), and including deletions. NOTE: buffered + * deletions are not counted. If you really need these to be counted you + * should call {@link #commit()} first. + * + * @see #numDocs + */ public synchronized int numDocs() { ensureOpen(); int count; - if (docWriter != null) - count = docWriter.getNumDocs(); - else - count = 0; - + if (docWriter != null) count = docWriter.getNumDocs(); + else count = 0; + for (final SegmentInfoPerCommit info : segmentInfos) { count += info.info.getDocCount() - numDeletedDocs(info); } return count; } - + /** * Returns true if this index has deletions (including buffered deletions). */ @@ -1065,164 +1107,195 @@ } return false; } - + /** * Adds a document to this index. - * - *Note that if an Exception is hit (for example disk full) - * then the index will be consistent, but this document - * may not have been added. Furthermore, it's possible - * the index will have one segment in non-compound format - * even when using compound files (when a merge has - * partially succeeded).
- * - *This method periodically flushes pending documents - * to the Directory (see above), and - * also periodically triggers segment merges in the index - * according to the {@link MergePolicy} in use.
- * - *Merges temporarily consume space in the - * directory. The amount of space required is up to 1X the - * size of all segments being merged, when no - * readers/searchers are open against the index, and up to - * 2X the size of all segments being merged when - * readers/searchers are open against the index (see - * {@link #forceMerge(int)} for details). The sequence of - * primitive merge operations performed is governed by the - * merge policy. - * - *
Note that each term in the document can be no longer - * than 16383 characters, otherwise an - * IllegalArgumentException will be thrown.
- * - *Note that it's possible to create an invalid Unicode - * string in java if a UTF16 surrogate pair is malformed. - * In this case, the invalid characters are silently - * replaced with the Unicode replacement character - * U+FFFD.
- * - *NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
- * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * + *+ * Note that if an Exception is hit (for example disk full) then the index + * will be consistent, but this document may not have been added. Furthermore, + * it's possible the index will have one segment in non-compound format even + * when using compound files (when a merge has partially succeeded). + *
+ * + *+ * This method periodically flushes pending documents to the Directory (see above), and also periodically triggers segment merges in + * the index according to the {@link MergePolicy} in use. + *
+ * + *+ * Merges temporarily consume space in the directory. The amount of space + * required is up to 1X the size of all segments being merged, when no + * readers/searchers are open against the index, and up to 2X the size of all + * segments being merged when readers/searchers are open against the index + * (see {@link #forceMerge(int)} for details). The sequence of primitive merge + * operations performed is governed by the merge policy. + * + *
+ * Note that each term in the document can be no longer than 16383 characters, + * otherwise an IllegalArgumentException will be thrown. + *
+ * + *+ * Note that it's possible to create an invalid Unicode string in java if a + * UTF16 surrogate pair is malformed. In this case, the invalid characters are + * silently replaced with the Unicode replacement character U+FFFD. + *
+ * + *+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void addDocument(IndexDocument doc) throws IOException { addDocument(doc, analyzer); } - + /** * Adds a document to this index, using the provided analyzer instead of the * value of {@link #getAnalyzer()}. - * - *See {@link #addDocument(IndexDocument)} for details on - * index and IndexWriter state after an Exception, and - * flushing/merging temporary free space requirements.
- * - *NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
- * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * + *+ * See {@link #addDocument(IndexDocument)} for details on index and + * IndexWriter state after an Exception, and flushing/merging temporary free + * space requirements. + *
+ * + *+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ - public void addDocument(IndexDocument doc, Analyzer analyzer) throws IOException { + public void addDocument(IndexDocument doc, Analyzer analyzer) + throws IOException { updateDocument(null, doc, analyzer); } - + /** - * Atomically adds a block of documents with sequentially - * assigned document IDs, such that an external reader - * will see all or none of the documents. - * - *WARNING: the index does not currently record - * which documents were added as a block. Today this is - * fine, because merging will preserve a block. The order of - * documents within a segment will be preserved, even when child - * documents within a block are deleted. Most search features - * (like result grouping and block joining) require you to - * mark documents; when these documents are deleted these - * search features will not work as expected. Obviously adding - * documents to an existing block will require you the reindex - * the entire block. - * - *
However it's possible that in the future Lucene may - * merge more aggressively re-order documents (for example, - * perhaps to obtain better index compression), in which case - * you may need to fully re-index your documents at that time. - * - *
See {@link #addDocument(IndexDocument)} for details on - * index and IndexWriter state after an Exception, and - * flushing/merging temporary free space requirements.
- * - *NOTE: tools that do offline splitting of an index - * (for example, IndexSplitter in contrib) or - * re-sorting of documents (for example, IndexSorter in - * contrib) are not aware of these atomically added documents - * and will likely break them up. Use such tools at your - * own risk! - * - *
NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
- * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * + * Atomically adds a block of documents with sequentially assigned document + * IDs, such that an external reader will see all or none of the documents. + * + *+ * WARNING: the index does not currently record which documents were + * added as a block. Today this is fine, because merging will preserve a + * block. The order of documents within a segment will be preserved, even when + * child documents within a block are deleted. Most search features (like + * result grouping and block joining) require you to mark documents; when + * these documents are deleted these search features will not work as + * expected. Obviously adding documents to an existing block will require you + * the reindex the entire block. + * + *
+ * However it's possible that in the future Lucene may merge more aggressively + * re-order documents (for example, perhaps to obtain better index + * compression), in which case you may need to fully re-index your documents + * at that time. + * + *
+ * See {@link #addDocument(IndexDocument)} for details on index and + * IndexWriter state after an Exception, and flushing/merging temporary free + * space requirements. + *
+ * + *+ * NOTE: tools that do offline splitting of an index (for example, + * IndexSplitter in contrib) or re-sorting of documents (for example, + * IndexSorter in contrib) are not aware of these atomically added documents + * and will likely break them up. Use such tools at your own risk! + * + *
+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * * @lucene.experimental */ - public void addDocuments(Iterable extends IndexDocument> docs) throws IOException { + public void addDocuments(Iterable extends IndexDocument> docs) + throws IOException { addDocuments(docs, analyzer); } - + /** - * Atomically adds a block of documents, analyzed using the - * provided analyzer, with sequentially assigned document - * IDs, such that an external reader will see all or none - * of the documents. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * + * Atomically adds a block of documents, analyzed using the provided analyzer, + * with sequentially assigned document IDs, such that an external reader will + * see all or none of the documents. + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * * @lucene.experimental */ - public void addDocuments(Iterable extends IndexDocument> docs, Analyzer analyzer) throws IOException { + public void addDocuments(Iterable extends IndexDocument> docs, + Analyzer analyzer) throws IOException { updateDocuments(null, docs, analyzer); } - + /** - * Atomically deletes documents matching the provided - * delTerm and adds a block of documents with sequentially - * assigned document IDs, such that an external reader - * will see all or none of the documents. - * + * Atomically deletes documents matching the provided delTerm and adds a block + * of documents with sequentially assigned document IDs, such that an external + * reader will see all or none of the documents. + * * See {@link #addDocuments(Iterable)}. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * * @lucene.experimental */ - public void updateDocuments(Term delTerm, Iterable extends IndexDocument> docs) throws IOException { - updateDocuments(delTerm, docs, analyzer); + public void replaceDocuments(Term delTerm, + Iterable extends IndexDocument> docs) throws IOException { + replaceDocuments(delTerm, docs, analyzer); } - + /** - * Atomically deletes documents matching the provided - * delTerm and adds a block of documents, analyzed using - * the provided analyzer, with sequentially - * assigned document IDs, such that an external reader - * will see all or none of the documents. - * + * Replaced by {@link #replaceDocuments(Term, Iterable)}. + * + * @deprecated use {@link #replaceDocuments(Term, Iterable)}. + */ + @Deprecated + public void updateDocuments(Term delTerm, + Iterable extends IndexDocument> docs) throws IOException { + replaceDocuments(delTerm, docs, analyzer); + } + + /** + * Atomically deletes documents matching the provided delTerm and adds a block + * of documents, analyzed using the provided analyzer, with sequentially + * assigned document IDs, such that an external reader will see all or none of + * the documents. + * * See {@link #addDocuments(Iterable)}. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * * @lucene.experimental */ - public void updateDocuments(Term delTerm, Iterable extends IndexDocument> docs, Analyzer analyzer) throws IOException { + public void replaceDocuments(Term delTerm, + Iterable extends IndexDocument> docs, Analyzer analyzer) + throws IOException { ensureOpen(); try { boolean success = false; @@ -1244,17 +1317,121 @@ handleOOM(oom, "updateDocuments"); } } - + /** + * Replaced by {@link #replaceDocuments(Term, Iterable, Analyzer)}. + * + * @deprecated use {@link #replaceDocuments(Term, Iterable, Analyzer)}. + */ + @Deprecated + public void updateDocuments(Term delTerm, + Iterable extends IndexDocument> docs, Analyzer analyzer) + throws IOException { + replaceDocuments(delTerm, docs, analyzer); + } + + /** + * Update fields of documents matching the given term. + * + * @param term + * The term to match. + * @param operation + * defines whether the new fields are either: + *term.
- *
- * NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
- * - * @param term the term to identify the documents to be deleted - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * + *+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + * @param term + * the term to identify the documents to be deleted + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void deleteDocuments(Term term) throws IOException { ensureOpen(); @@ -1264,23 +1441,23 @@ handleOOM(oom, "deleteDocuments(Term)"); } } - - /** Expert: attempts to delete by document ID, as long as - * the provided reader is a near-real-time reader (from {@link - * DirectoryReader#open(IndexWriter,boolean)}). If the - * provided reader is an NRT reader obtained from this - * writer, and its segment has not been merged away, then - * the delete succeeds and this method returns true; else, it - * returns false the caller must then separately delete by - * Term or Query. - * - * NOTE: this method can only delete documents - * visible to the currently open NRT reader. If you need - * to delete documents indexed after opening the NRT - * reader you must use the other deleteDocument methods - * (e.g., {@link #deleteDocuments(Term)}). */ - public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException { - + + /** + * Expert: attempts to delete by document ID, as long as the provided reader + * is a near-real-time reader (from + * {@link DirectoryReader#open(IndexWriter,boolean)}). If the provided reader + * is an NRT reader obtained from this writer, and its segment has not been + * merged away, then the delete succeeds and this method returns true; else, + * it returns false the caller must then separately delete by Term or Query. + * + * NOTE: this method can only delete documents visible to the currently + * open NRT reader. If you need to delete documents indexed after opening the + * NRT reader you must use the other deleteDocument methods (e.g., + * {@link #deleteDocuments(Term)}). + */ + public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) + throws IOException { + final AtomicReader reader; if (readerIn instanceof AtomicReader) { // Reader is already atomic: use the incoming docID: @@ -1294,25 +1471,27 @@ assert docID >= 0; assert docID < reader.maxDoc(); } - + if (!(reader instanceof SegmentReader)) { - throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders"); + throw new IllegalArgumentException( + "the reader must be a SegmentReader or composite reader containing only SegmentReaders"); } - + final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo(); - + // TODO: this is a slow linear search, but, number of // segments should be contained unless something is // seriously wrong w/ the index, so it should be a minor // cost: - + if (segmentInfos.indexOf(info) != -1) { ReadersAndLiveDocs rld = readerPool.get(info, false); if (rld != null) { - synchronized(bufferedDeletesStream) { + synchronized (bufferedDeletesStream) { rld.initWritableLiveDocs(); if (rld.delete(docID)) { - final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount(); + final int fullDelCount = rld.info.getDelCount() + + rld.getPendingDeleteCount(); if (fullDelCount == rld.info.info.getDocCount()) { // If a merge has already registered for this // segment, we leave it in the readerPool; the @@ -1324,36 +1503,38 @@ checkpoint(); } } - + // Must bump changeCount so if no other changes // happened, we still commit this change: changeCount++; } - //System.out.println(" yes " + info.info.name + " " + docID); + // System.out.println(" yes " + info.info.name + " " + docID); return true; } } else { - //System.out.println(" no rld " + info.info.name + " " + docID); + // System.out.println(" no rld " + info.info.name + " " + docID); } } else { - //System.out.println(" no seg " + info.info.name + " " + docID); + // System.out.println(" no seg " + info.info.name + " " + docID); } return false; } - + /** - * Deletes the document(s) containing any of the - * terms. All given deletes are applied and flushed atomically - * at the same time. - * - *NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
- * - * @param terms array of terms to identify the documents - * to be deleted - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * Deletes the document(s) containing any of the terms. All given deletes are + * applied and flushed atomically at the same time. + * + *+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + * @param terms + * array of terms to identify the documents to be deleted + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void deleteDocuments(Term... terms) throws IOException { ensureOpen(); @@ -1363,17 +1544,21 @@ handleOOM(oom, "deleteDocuments(Term..)"); } } - + /** * Deletes the document(s) matching the provided query. - * - *NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
- * - * @param query the query to identify the documents to be deleted - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * + *+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + * @param query + * the query to identify the documents to be deleted + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void deleteDocuments(Query query) throws IOException { ensureOpen(); @@ -1383,19 +1568,22 @@ handleOOM(oom, "deleteDocuments(Query)"); } } - + /** - * Deletes the document(s) matching any of the provided queries. - * All given deletes are applied and flushed atomically at the same time. - * - *NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
- * - * @param queries array of queries to identify the documents - * to be deleted - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * Deletes the document(s) matching any of the provided queries. All given + * deletes are applied and flushed atomically at the same time. + * + *+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + * @param queries + * array of queries to identify the documents to be deleted + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void deleteDocuments(Query... queries) throws IOException { ensureOpen(); @@ -1405,48 +1593,69 @@ handleOOM(oom, "deleteDocuments(Query..)"); } } - + /** - * Updates a document by first deleting the document(s) - * containingterm and then adding the new
- * document. The delete and then add are atomic as seen
- * by a reader on the same index (flush may happen only after
- * the add).
- *
- * NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
- * - * @param term the term to identify the document(s) to be - * deleted - * @param doc the document to be added - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * Updates a document by first deleting the document(s) containing + *term and then adding the new document. The delete and then add
+ * are atomic as seen by a reader on the same index (flush may happen only
+ * after the add).
+ *
+ * + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + * @param term + * the term to identify the document(s) to be deleted + * @param doc + * the document to be added + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ + public void replaceDocument(Term term, IndexDocument doc) throws IOException { + ensureOpen(); + replaceDocument(term, doc, getAnalyzer()); + } + + /** + * Replaced by {@link #replaceDocument(Term, IndexDocument)}. If you wish to + * update fields of existing documents use + * {@link #updateFields(Operation, Term, IndexDocument)}. + * + * @deprecated use {@link #replaceDocument(Term, IndexDocument)} or + * {@link #updateFields(Operation, Term, IndexDocument)}. + */ + @Deprecated public void updateDocument(Term term, IndexDocument doc) throws IOException { ensureOpen(); - updateDocument(term, doc, analyzer); + replaceDocument(term, doc, analyzer); } - + /** - * Updates a document by first deleting the document(s) - * containingterm and then adding the new
- * document. The delete and then add are atomic as seen
- * by a reader on the same index (flush may happen only after
- * the add).
- *
- * NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
- * - * @param term the term to identify the document(s) to be - * deleted - * @param doc the document to be added - * @param analyzer the analyzer to use when analyzing the document - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * Updates a document by first deleting the document(s) containing + *term and then adding the new document. The delete and then add
+ * are atomic as seen by a reader on the same index (flush may happen only
+ * after the add).
+ *
+ * + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + * @param term + * the term to identify the document(s) to be deleted + * @param doc + * the document to be added + * @param analyzer + * the analyzer to use when analyzing the document + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ - public void updateDocument(Term term, IndexDocument doc, Analyzer analyzer) + public void replaceDocument(Term term, IndexDocument doc, Analyzer analyzer) throws IOException { ensureOpen(); try { @@ -1462,7 +1671,7 @@ } } } - + if (anySegmentFlushed) { maybeMerge(MergeTrigger.SEGMENT_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS); } @@ -1470,22 +1679,37 @@ handleOOM(oom, "updateDocument"); } } - + + /** + * Replaced by {@link #replaceDocument(Term, IndexDocument, Analyzer)}. If you + * wish to update fields of existing documents use + * {@link #updateFields(Operation, Term, IndexDocument, Analyzer)}. + * + * @deprecated use {@link #replaceDocument(Term, IndexDocument, Analyzer)} or + * {@link #updateFields(Operation, Term, IndexDocument, Analyzer)} + * . + */ + @Deprecated + public void updateDocument(Term term, IndexDocument doc, Analyzer analyzer) + throws IOException { + replaceDocument(term, doc, analyzer); + } + // for test purpose - final synchronized int getSegmentCount(){ + final synchronized int getSegmentCount() { return segmentInfos.size(); } - + // for test purpose - final synchronized int getNumBufferedDocuments(){ + final synchronized int getNumBufferedDocuments() { return docWriter.getNumDocs(); } - + // for test purpose final synchronized CollectionThis is a horribly costly operation, especially when - * you pass a small {@code maxNumSegments}; usually you - * should only call this if the index is static (will no - * longer be changed).
- * - *Note that this requires up to 2X the index size free - * space in your Directory (3X if you're using compound - * file format). For example, if your index size is 10 MB - * then you need up to 20 MB free for this to complete (30 - * MB if you're using compound file format). Also, - * it's best to call {@link #commit()} afterwards, - * to allow IndexWriter to free up disk space.
- * - *If some but not all readers re-open while merging - * is underway, this will cause > 2X temporary - * space to be consumed as those new readers will then - * hold open the temporary segments at that time. It is - * best not to re-open readers while merging is running.
- * - *The actual temporary usage could be much less than - * these figures (it depends on many factors).
- * - *In general, once this completes, the total size of the - * index will be less than the size of the starting index. - * It could be quite a bit smaller (if there were many - * pending deletes) or just slightly smaller.
- * - *If an Exception is hit, for example - * due to disk full, the index will not be corrupted and no - * documents will be lost. However, it may have - * been partially merged (some segments were merged but - * not all), and it's possible that one of the segments in - * the index will be in non-compound format even when - * using compound file format. This will occur when the - * Exception is hit during conversion of the segment into - * compound format.
- * - *This call will merge those segments present in - * the index when the call started. If other threads are - * still adding documents and flushing segments, those - * newly created segments will not be merged unless you - * call forceMerge again.
- * - *NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
- * - *NOTE: if you call {@link #close(boolean)} - * with false, which aborts all running merges, - * then any thread still running this method might hit a - * {@link MergePolicy.MergeAbortedException}. - * - * @param maxNumSegments maximum number of segments left - * in the index after merging finishes + * Forces merge policy to merge segments until there are <= maxNumSegments. + * The actual merges to be executed are determined by the {@link MergePolicy}. * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + *
+ * This is a horribly costly operation, especially when you pass a small + * {@code maxNumSegments}; usually you should only call this if the index is + * static (will no longer be changed). + *
+ * + *+ * Note that this requires up to 2X the index size free space in your + * Directory (3X if you're using compound file format). For example, if your + * index size is 10 MB then you need up to 20 MB free for this to complete (30 + * MB if you're using compound file format). Also, it's best to call + * {@link #commit()} afterwards, to allow IndexWriter to free up disk space. + *
+ * + *+ * If some but not all readers re-open while merging is underway, this will + * cause > 2X temporary space to be consumed as those new readers will then + * hold open the temporary segments at that time. It is best not to re-open + * readers while merging is running. + *
+ * + *+ * The actual temporary usage could be much less than these figures (it + * depends on many factors). + *
+ * + *+ * In general, once this completes, the total size of the index will be less + * than the size of the starting index. It could be quite a bit smaller (if + * there were many pending deletes) or just slightly smaller. + *
+ * + *+ * If an Exception is hit, for example due to disk full, the index will not be + * corrupted and no documents will be lost. However, it may have been + * partially merged (some segments were merged but not all), and it's possible + * that one of the segments in the index will be in non-compound format even + * when using compound file format. This will occur when the Exception is hit + * during conversion of the segment into compound format. + *
+ * + *+ * This call will merge those segments present in the index when the call + * started. If other threads are still adding documents and flushing segments, + * those newly created segments will not be merged unless you call forceMerge + * again. + *
+ * + *+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + *+ * NOTE: if you call {@link #close(boolean)} with false, which + * aborts all running merges, then any thread still running this method might + * hit a {@link MergePolicy.MergeAbortedException}. + * + * @param maxNumSegments + * maximum number of segments left in the index after merging + * finishes + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error * @see MergePolicy#findMerges - * - */ + * + */ public void forceMerge(int maxNumSegments) throws IOException { forceMerge(maxNumSegments, true); } - - /** Just like {@link #forceMerge(int)}, except you can - * specify whether the call should block until - * all merging completes. This is only meaningful with a - * {@link MergeScheduler} that is able to run merges in - * background threads. - * - *
NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
+ + /** + * Just like {@link #forceMerge(int)}, except you can specify whether the call + * should block until all merging completes. This is only meaningful with a + * {@link MergeScheduler} that is able to run merges in background threads. + * + *+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
*/ public void forceMerge(int maxNumSegments, boolean doWait) throws IOException { ensureOpen(); - - if (maxNumSegments < 1) - throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments); - + + if (maxNumSegments < 1) throw new IllegalArgumentException( + "maxNumSegments must be >= 1; got " + maxNumSegments); + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "forceMerge: index now " + segString()); infoStream.message("IW", "now flush at forceMerge"); } - + flush(true, true); - - synchronized(this) { + + synchronized (this) { resetMergeExceptions(); segmentsToMerge.clear(); - for(SegmentInfoPerCommit info : segmentInfos) { + for (SegmentInfoPerCommit info : segmentInfos) { segmentsToMerge.put(info, Boolean.TRUE); } mergeMaxNumSegments = maxNumSegments; - + // Now mark all pending & running merges for forced // merge: - for(final MergePolicy.OneMerge merge : pendingMerges) { + for (final MergePolicy.OneMerge merge : pendingMerges) { merge.maxNumSegments = maxNumSegments; segmentsToMerge.put(merge.info, Boolean.TRUE); } - - for (final MergePolicy.OneMerge merge: runningMerges) { + + for (final MergePolicy.OneMerge merge : runningMerges) { merge.maxNumSegments = maxNumSegments; segmentsToMerge.put(merge.info, Boolean.TRUE); } } - + maybeMerge(MergeTrigger.EXPLICIT, maxNumSegments); - + if (doWait) { - synchronized(this) { - while(true) { - + synchronized (this) { + while (true) { + if (hitOOM) { - throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge"); + throw new IllegalStateException( + "this writer hit an OutOfMemoryError; cannot complete forceMerge"); } - + if (mergeExceptions.size() > 0) { // Forward any exceptions in background merge // threads to the current thread: final int size = mergeExceptions.size(); - for(int i=0;iNOTE: if you call {@link #close(boolean)} - * with false, which aborts all running merges, - * then any thread still running this method might hit a - * {@link MergePolicy.MergeAbortedException}. + + /** + * Just like {@link #forceMergeDeletes()}, except you can specify whether the + * call should block until the operation completes. This is only meaningful + * with a {@link MergeScheduler} that is able to run merges in background + * threads. + * + *
+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
+ * + *
+ * NOTE: if you call {@link #close(boolean)} with false, which
+ * aborts all running merges, then any thread still running this method might
+ * hit a {@link MergePolicy.MergeAbortedException}.
*/
- public void forceMergeDeletes(boolean doWait)
- throws IOException {
+ public void forceMergeDeletes(boolean doWait) throws IOException {
ensureOpen();
-
+
flush(true, true);
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "forceMergeDeletes: index now " + segString());
}
-
+
MergePolicy.MergeSpecification spec;
-
- synchronized(this) {
+
+ synchronized (this) {
spec = mergePolicy.findForcedDeletesMerges(segmentInfos);
if (spec != null) {
final int numMerges = spec.merges.size();
- for(int i=0;i
To see how - * many deletions you have pending in your index, call - * {@link IndexReader#numDeletedDocs}.
- * - *NOTE: this method first flushes a new - * segment (if there are indexed documents), and applies - * all buffered deletes. - * - *
NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
+ * Forces merging of all segments that have deleted documents. The actual + * merges to be executed are determined by the {@link MergePolicy}. For + * example, the default {@link TieredMergePolicy} will only pick a segment if + * the percentage of deleted docs is over 10%. + * + *+ * This is often a horribly costly operation; rarely is it warranted. + *
+ * + *+ * To see how many deletions you have pending in your index, call + * {@link IndexReader#numDeletedDocs}. + *
+ * + *+ * NOTE: this method first flushes a new segment (if there are indexed + * documents), and applies all buffered deletes. + * + *
+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
*/ public void forceMergeDeletes() throws IOException { forceMergeDeletes(true); } - + /** - * Expert: asks the mergePolicy whether any merges are - * necessary now and if so, runs the requested merges and - * then iterate (test again if merges are needed) until no - * more merges are returned by the mergePolicy. - * - * Explicit calls to maybeMerge() are usually not - * necessary. The most common case is when merge policy - * parameters have changed. + * Expert: asks the mergePolicy whether any merges are necessary now and if + * so, runs the requested merges and then iterate (test again if merges are + * needed) until no more merges are returned by the mergePolicy. * + * Explicit calls to maybeMerge() are usually not necessary. The most common + * case is when merge policy parameters have changed. + * * This method will call the {@link MergePolicy} with * {@link MergeTrigger#EXPLICIT}. - * - *NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.
+ * + *+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *
*/ public final void maybeMerge() throws IOException { maybeMerge(MergeTrigger.EXPLICIT, UNBOUNDED_MAX_MERGE_SEGMENTS); } - - private final void maybeMerge(MergeTrigger trigger, int maxNumSegments) throws IOException { + + private final void maybeMerge(MergeTrigger trigger, int maxNumSegments) + throws IOException { ensureOpen(false); updatePendingMerges(trigger, maxNumSegments); mergeScheduler.merge(this); } - - private synchronized void updatePendingMerges(MergeTrigger trigger, int maxNumSegments) - throws IOException { + + private synchronized void updatePendingMerges(MergeTrigger trigger, + int maxNumSegments) throws IOException { assert maxNumSegments == -1 || maxNumSegments > 0; assert trigger != null; if (stopMerges) { return; } - + // Do not start new merges if we've hit OOME if (hitOOM) { return; } - + final MergePolicy.MergeSpecification spec; if (maxNumSegments != UNBOUNDED_MAX_MERGE_SEGMENTS) { - assert trigger == MergeTrigger.EXPLICIT || trigger == MergeTrigger.MERGE_FINISHED : - "Expected EXPLICT or MERGE_FINISHED as trigger even with maxNumSegments set but was: " + trigger.name(); - spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge)); + assert trigger == MergeTrigger.EXPLICIT + || trigger == MergeTrigger.MERGE_FINISHED : "Expected EXPLICT or MERGE_FINISHED as trigger even with maxNumSegments set but was: " + + trigger.name(); + spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, + Collections.unmodifiableMap(segmentsToMerge)); if (spec != null) { final int numMerges = spec.merges.size(); - for(int i=0;i
+ * Do not alter the returned collection!
+ */
public synchronized Collection This method will drop all buffered documents and will
- * remove all segments from the index. This change will not be
- * visible until a {@link #commit()} has been called. This method
- * can be rolled back using {@link #rollback()}. NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ). NOTE: this method will forcefully abort all merges
- * in progress. If other threads are running {@link
- * #forceMerge}, {@link #addIndexes(IndexReader[])} or
- * {@link #forceMergeDeletes} methods, they may receive
- * {@link MergePolicy.MergeAbortedException}s.
+ *
+ *
+ * This method will drop all buffered documents and will remove all segments
+ * from the index. This change will not be visible until a {@link #commit()}
+ * has been called. This method can be rolled back using {@link #rollback()}.
+ *
+ * NOTE: this method is much faster than using deleteDocuments( new
+ * MatchAllDocsQuery() ).
+ *
+ * NOTE: this method will forcefully abort all merges in progress. If other
+ * threads are running {@link #forceMerge}, {@link #addIndexes(IndexReader[])}
+ * or {@link #forceMergeDeletes} methods, they may receive
+ * {@link MergePolicy.MergeAbortedException}s.
*/
public synchronized void deleteAll() throws IOException {
ensureOpen();
boolean success = false;
try {
-
+
// Abort any running merges
finishMerges(false);
-
+
// Remove any buffered docs
docWriter.abort();
-
+
// Remove all segments
segmentInfos.clear();
-
+
// Ask deleter to locate unreferenced files & remove them:
deleter.checkpoint(segmentInfos, false);
deleter.refresh();
-
+
// Don't bother saving any changes in our segmentInfos
readerPool.dropAll(false);
-
+
// Mark that the index has changed
++changeCount;
segmentInfos.changed();
@@ -2060,50 +2303,53 @@
}
}
}
-
+
private synchronized void finishMerges(boolean waitForMerges) {
if (!waitForMerges) {
-
+
stopMerges = true;
-
+
// Abort all pending & running merges:
for (final MergePolicy.OneMerge merge : pendingMerges) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "now abort pending merge " + segString(merge.segments));
+ infoStream.message("IW", "now abort pending merge "
+ + segString(merge.segments));
}
merge.abort();
mergeFinish(merge);
}
pendingMerges.clear();
-
+
for (final MergePolicy.OneMerge merge : runningMerges) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "now abort running merge " + segString(merge.segments));
+ infoStream.message("IW", "now abort running merge "
+ + segString(merge.segments));
}
merge.abort();
}
-
+
// These merges periodically check whether they have
- // been aborted, and stop if so. We wait here to make
- // sure they all stop. It should not take very long
+ // been aborted, and stop if so. We wait here to make
+ // sure they all stop. It should not take very long
// because the merge threads periodically check if
// they are aborted.
- while(runningMerges.size() > 0) {
+ while (runningMerges.size() > 0) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "now wait for " + runningMerges.size() + " running merge/s to abort");
+ infoStream.message("IW", "now wait for " + runningMerges.size()
+ + " running merge/s to abort");
}
doWait();
}
-
+
stopMerges = false;
notifyAll();
-
+
assert 0 == mergingSegments.size();
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "all running merges have aborted");
}
-
+
} else {
// waitForMerges() will ensure any running addIndexes finishes.
// It's fine if a new one attempts to start because from our
@@ -2113,43 +2359,51 @@
waitForMerges();
}
}
-
+
/**
* Wait for any currently outstanding merges to finish.
- *
- * It is guaranteed that any merges started prior to calling this method
- * will have completed once this method completes.
+ * It is guaranteed that any merges started prior to calling this method will
+ * have completed once this method completes.
+ *
- * NOTE: the index in each {@link Directory} must not be
- * changed (opened by a writer) while this method is
- * running. This method does not acquire a write lock in
- * each input Directory, so it is up to the caller to
+ * This may be used to parallelize batch indexing. A large document collection
+ * can be broken into sub-collections. Each sub-collection can be indexed in
+ * parallel, on a different thread, process or machine. The complete index can
+ * then be created by merging sub-collection indexes with this method.
+ *
+ *
+ * NOTE: the index in each {@link Directory} must not be changed
+ * (opened by a writer) while this method is running. This method does not
+ * acquire a write lock in each input Directory, so it is up to the caller to
* enforce this.
- *
- * This method is transactional in how Exceptions are
- * handled: it does not commit a new segments_N file until
- * all indexes are added. This means if an Exception
- * occurs (for example disk full), then either no indexes
- * will have been added or they all will have been.
- *
- * Note that this requires temporary free space in the
- * {@link Directory} up to 2X the sum of all input indexes
- * (including the starting index). If readers/searchers
- * are open against the starting index, then temporary
- * free space required will be higher by the size of the
- * starting index (see {@link #forceMerge(int)} for details).
- *
+ *
*
+ * This method is transactional in how Exceptions are handled: it does not
+ * commit a new segments_N file until all indexes are added. This means if an
+ * Exception occurs (for example disk full), then either no indexes will have
+ * been added or they all will have been.
+ *
+ *
+ * Note that this requires temporary free space in the {@link Directory} up to
+ * 2X the sum of all input indexes (including the starting index). If
+ * readers/searchers are open against the starting index, then temporary free
+ * space required will be higher by the size of the starting index (see
+ * {@link #forceMerge(int)} for details).
+ *
+ *
* NOTE: this method only copies the segments of the incoming indexes
* and does not merge them. Therefore deleted documents are not removed and
* the new segments are not merged with the existing ones.
- *
- * This requires this index not be among those to be added.
- *
+ *
*
- * NOTE: if this method hits an OutOfMemoryError
- * you should immediately close the writer. See above for details.
- *
- * @throws CorruptIndexException if the index is corrupt
- * @throws IOException if there is a low-level IO error
+ * This requires this index not be among those to be added.
+ *
+ *
+ * NOTE: if this method hits an OutOfMemoryError you should immediately
+ * close the writer. See above for details.
+ *
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws IOException
+ * if there is a low-level IO error
*/
public void addIndexes(Directory... dirs) throws IOException {
ensureOpen();
-
+
noDupDirs(dirs);
-
+
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at addIndexes(Directory...)");
}
-
+
flush(false, true);
-
+
List Expert: prepare for commit. This does the
- * first phase of 2-phase commit. This method does all
- * steps necessary to commit changes since this writer
- * was opened: flushes pending added and deleted docs,
- * syncs the index files, writes most of next segments_N
- * file. After calling this you must call either {@link
- * #commit()} to finish the commit, or {@link
- * #rollback()} to revert the commit and undo all changes
- * done since the writer was opened. You can also just call {@link #commit()} directly
- * without prepareCommit first in which case that method
- * will internally call prepareCommit.
- *
- * NOTE: if this method hits an OutOfMemoryError
- * you should immediately close the writer. See above for details.
+ * Expert: prepare for commit. This does the first phase of 2-phase commit.
+ * This method does all steps necessary to commit changes since this writer
+ * was opened: flushes pending added and deleted docs, syncs the index files,
+ * writes most of next segments_N file. After calling this you must call
+ * either {@link #commit()} to finish the commit, or {@link #rollback()} to
+ * revert the commit and undo all changes done since the writer was opened.
+ *
+ * You can also just call {@link #commit()} directly without prepareCommit
+ * first in which case that method will internally call prepareCommit.
+ *
+ *
+ * NOTE: if this method hits an OutOfMemoryError you should immediately
+ * close the writer. See above for details.
+ *
* NOTE: the map is cloned internally, therefore altering the map's
* contents after calling this method has no effect.
@@ -2707,34 +2994,34 @@
// Used only by commit and prepareCommit, below; lock
// order is commitLock -> IW
private final Object commitLock = new Object();
-
+
/**
- * Commits all pending changes (added & deleted
- * documents, segment merges, added
- * indexes, etc.) to the index, and syncs all referenced
- * index files, such that a reader will see the changes
- * and the index updates will survive an OS or machine
- * crash or power loss. Note that this does not wait for
- * any running background merges to finish. This may be a
- * costly operation, so you should test the cost in your
- * application and do it only when really necessary. Note that this operation calls Directory.sync on
- * the index files. That call should not return until the
- * file contents & metadata are on stable storage. For
- * FSDirectory, this calls the OS's fsync. But, beware:
- * some hardware devices may in fact cache writes even
- * during fsync, and return before the bits are actually
- * on stable storage, to give the appearance of faster
- * performance. If you have such a device, and it does
- * not have a battery backup (for example) then on power
- * loss it may still lose data. Lucene cannot guarantee
- * consistency on such devices. NOTE: if this method hits an OutOfMemoryError
- * you should immediately close the writer. See above for details.
+ * Commits all pending changes (added & deleted documents, segment merges,
+ * added indexes, etc.) to the index, and syncs all referenced index files,
+ * such that a reader will see the changes and the index updates will survive
+ * an OS or machine crash or power loss. Note that this does not wait for any
+ * running background merges to finish. This may be a costly operation, so you
+ * should test the cost in your application and do it only when really
+ * necessary.
+ *
+ * Note that this operation calls Directory.sync on the index files. That call
+ * should not return until the file contents & metadata are on stable storage.
+ * For FSDirectory, this calls the OS's fsync. But, beware: some hardware
+ * devices may in fact cache writes even during fsync, and return before the
+ * bits are actually on stable storage, to give the appearance of faster
+ * performance. If you have such a device, and it does not have a battery
+ * backup (for example) then on power loss it may still lose data. Lucene
+ * cannot guarantee consistency on such devices.
+ *
+ * NOTE: if this method hits an OutOfMemoryError you should immediately
+ * close the writer. See above for details.
+ *
- * Caution: this should only be used by failure recovery code,
- * when it is known that no other process nor thread is in fact
- * currently accessing this index.
+ * Caution: this should only be used by failure recovery code, when it is
+ * known that no other process nor thread is in fact currently accessing this
+ * index.
*/
public static void unlock(Directory directory) throws IOException {
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
}
-
- /** If {@link DirectoryReader#open(IndexWriter,boolean)} has
- * been called (ie, this writer is in near real-time
- * mode), then after a merge completes, this class can be
- * invoked to warm the reader on the newly merged
- * segment, before the merge commits. This is not
- * required for near real-time search, but will reduce
- * search latency on opening a new near real-time reader
- * after a merge completes.
- *
+
+ /**
+ * If {@link DirectoryReader#open(IndexWriter,boolean)} has been called (ie,
+ * this writer is in near real-time mode), then after a merge completes, this
+ * class can be invoked to warm the reader on the newly merged segment, before
+ * the merge commits. This is not required for near real-time search, but will
+ * reduce search latency on opening a new near real-time reader after a merge
+ * completes.
+ *
* @lucene.experimental
- *
- * NOTE: warm is called before any deletes have
- * been carried over to the merged segment. */
+ *
+ *
+ * NOTE: warm is called before any deletes have
+ * been carried over to the merged segment.
+ */
public static abstract class IndexReaderWarmer {
-
- /** Sole constructor. (For invocation by subclass
- * constructors, typically implicit.) */
- protected IndexReaderWarmer() {
- }
-
- /** Invoked on the {@link AtomicReader} for the newly
- * merged segment, before that segment is made visible
- * to near-real-time readers. */
+
+ /**
+ * Sole constructor. (For invocation by subclass constructors, typically
+ * implicit.)
+ */
+ protected IndexReaderWarmer() {}
+
+ /**
+ * Invoked on the {@link AtomicReader} for the newly merged segment, before
+ * that segment is made visible to near-real-time readers.
+ */
public abstract void warm(AtomicReader reader) throws IOException;
}
-
+
private void handleOOM(OutOfMemoryError oom, String location) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit OutOfMemoryError inside " + location);
@@ -4062,67 +4460,73 @@
hitOOM = true;
throw oom;
}
-
- // Used only by assert for testing. Current points:
- // startDoFlush
- // startCommitMerge
- // startStartCommit
- // midStartCommit
- // midStartCommit2
- // midStartCommitSuccess
- // finishStartCommit
- // startCommitMergeDeletes
- // startMergeInit
- // DocumentsWriter.ThreadState.init start
+
+ // Used only by assert for testing. Current points:
+ // startDoFlush
+ // startCommitMerge
+ // startStartCommit
+ // midStartCommit
+ // midStartCommit2
+ // midStartCommitSuccess
+ // finishStartCommit
+ // startCommitMergeDeletes
+ // startMergeInit
+ // DocumentsWriter.ThreadState.init start
boolean testPoint(String name) {
return true;
}
-
+
synchronized boolean nrtIsCurrent(SegmentInfos infos) {
- //System.out.println("IW.nrtIsCurrent " + (infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any()));
+ // System.out.println("IW.nrtIsCurrent " + (infos.version ==
+ // segmentInfos.version && !docWriter.anyChanges() &&
+ // !bufferedDeletesStream.any()));
ensureOpen();
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "nrtIsCurrent: infoVersion matches: " + (infos.version == segmentInfos.version) + " DW changes: " + docWriter.anyChanges() + " BD changes: "+bufferedDeletesStream.any());
-
+ infoStream.message("IW",
+ "nrtIsCurrent: infoVersion matches: "
+ + (infos.version == segmentInfos.version) + " DW changes: "
+ + docWriter.anyChanges() + " BD changes: "
+ + bufferedDeletesStream.any());
+
}
- return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
+ return infos.version == segmentInfos.version && !docWriter.anyChanges()
+ && !bufferedDeletesStream.any();
}
-
+
synchronized boolean isClosed() {
return closed;
}
-
- /** Expert: remove any index files that are no longer
- * used.
- *
- * IndexWriter normally deletes unused files itself,
- * during indexing. However, on Windows, which disallows
- * deletion of open files, if there is a reader open on
- * the index then those files cannot be deleted. This is
- * fine, because IndexWriter will periodically retry
- * the deletion. However, IndexWriter doesn't try that often: only
- * on open, close, flushing a new segment, and finishing
- * a merge. If you don't do any of these actions with your
- * IndexWriter, you'll see the unused files linger. If
- * that's a problem, call this method to delete them
- * (once you've closed the open readers that were
- * preventing their deletion).
- *
- * In addition, you can call this method to delete
- * unreferenced index commits. This might be useful if you
- * are using an {@link IndexDeletionPolicy} which holds
- * onto index commits until some criteria are met, but those
- * commits are no longer needed. Otherwise, those commits will
- * be deleted the next time commit() is called.
+
+ /**
+ * Expert: remove any index files that are no longer used.
+ *
+ *
+ * IndexWriter normally deletes unused files itself, during indexing. However,
+ * on Windows, which disallows deletion of open files, if there is a reader
+ * open on the index then those files cannot be deleted. This is fine, because
+ * IndexWriter will periodically retry the deletion.
+ *
+ * However, IndexWriter doesn't try that often: only on open, close, flushing
+ * a new segment, and finishing a merge. If you don't do any of these actions
+ * with your IndexWriter, you'll see the unused files linger. If that's a
+ * problem, call this method to delete them (once you've closed the open
+ * readers that were preventing their deletion).
+ *
+ *
+ * In addition, you can call this method to delete unreferenced index commits.
+ * This might be useful if you are using an {@link IndexDeletionPolicy} which
+ * holds onto index commits until some criteria are met, but those commits are
+ * no longer needed. Otherwise, those commits will be deleted the next time
+ * commit() is called.
*/
public synchronized void deleteUnusedFiles() throws IOException {
ensureOpen(false);
deleter.deletePendingFiles();
deleter.revisitPolicy();
}
-
+
// Called by DirectoryReader.doClose
synchronized void deletePendingFiles() throws IOException {
deleter.deletePendingFiles();
@@ -4132,25 +4536,50 @@
* NOTE: this method creates a compound file for all files returned by
* info.files(). While, generally, this may include separate norms and
* deletion files, this SegmentInfo must not reference such files when this
- * method is called, because they are not allowed within a compound file.
+ * method is called, because they are not allowed within a compound file. The
+ * value of updateGen for a base segment must be negative.
*/
- static final Collection
- * Instances pointing to the same segment (but with different deletes, etc)
- * may share the same core data.
+ * Instances pointing to the same segment (but with different deletes, etc) may
+ * share the same core data.
+ *
* @lucene.experimental
*/
public final class SegmentReader extends AtomicReader {
-
+
private final SegmentInfoPerCommit si;
private final Bits liveDocs;
-
+
// Normally set to si.docCount - si.delDocCount, unless we
// were created as an NRT reader from IW, in which case IW
// tells us the docCount:
private final int numDocs;
-
+
final SegmentCoreReaders core;
-
+
+ private SegmentCoreReaders[] updates;
+ private final IOContext context;
+ private Fields fields;
+ private FieldInfos fieldInfos;
+ private StoredFieldsReader fieldsReader;
+ private TermVectorsReader termVectorsReader;
+ private Map
- * This listener is called only once all SegmentReaders
- * sharing the same core are closed. At this point it
- * is safe for apps to evict this reader from any caches
- * keyed on {@link #getCoreCacheKey}. This is the same
- * interface that {@link FieldCache} uses, internally,
- * to evict entries.
+ * Constructing an {@link UpdatesIndexDocument} also updates the containing
+ * {@link UpdatedSegmentData}'s analyzer and its
+ * {@link FieldGenerationReplacements} vectors for the relevant fields.
+ */
+ private class UpdatesIndexDocument implements IndexDocument {
+
+ MapIndexWriter without committing
- * any changes that have occurred since the last commit
- * (or since it was opened, if commit hasn't been called).
- * This removes any temporary files that had been created,
- * after which the state of the index will be the same as
- * it was when commit() was last called or when this
- * writer was first opened. This also clears a previous
- * call to {@link #prepareCommit}.
- * @throws IOException if there is a low-level IO error
+ * Close the IndexWriter without committing any changes that have
+ * occurred since the last commit (or since it was opened, if commit hasn't
+ * been called). This removes any temporary files that had been created, after
+ * which the state of the index will be the same as it was when commit() was
+ * last called or when this writer was first opened. This also clears a
+ * previous call to {@link #prepareCommit}.
+ *
+ * @throws IOException
+ * if there is a low-level IO error
*/
@Override
public void rollback() throws IOException {
ensureOpen();
-
+
// Ensure that only one thread actually gets to do the
// closing, and make sure no commit is also in progress:
- synchronized(commitLock) {
+ synchronized (commitLock) {
if (shouldClose()) {
rollbackInternal();
}
}
}
-
+
private void rollbackInternal() throws IOException {
-
+
boolean success = false;
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "rollback");
}
-
try {
- synchronized(this) {
+ synchronized (this) {
finishMerges(false);
stopMerges = true;
}
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "rollback: done finish merges");
}
-
+
// Must pre-close these two, in case they increment
// changeCount so that we can then set it to false
// before calling closeInternal
mergePolicy.close();
mergeScheduler.close();
-
+
bufferedDeletesStream.clear();
- docWriter.close(); // mark it as closed first to prevent subsequent indexing actions/flushes
+ docWriter.close(); // mark it as closed first to prevent subsequent
+ // indexing actions/flushes
docWriter.abort();
- synchronized(this) {
-
+ synchronized (this) {
+
if (pendingCommit != null) {
pendingCommit.rollbackCommit(directory);
deleter.decRef(pendingCommit);
pendingCommit = null;
notifyAll();
}
-
+
// Don't bother saving any changes in our segmentInfos
readerPool.dropAll(false);
-
+
// Keep the same segmentInfos instance but replace all
- // of its SegmentInfo instances. This is so the next
+ // of its SegmentInfo instances. This is so the next
// attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write
// once").
segmentInfos.rollbackSegmentInfos(rollbackSegments);
- if (infoStream.isEnabled("IW") ) {
- infoStream.message("IW", "rollback: infos=" + segString(segmentInfos));
+ if (infoStream.isEnabled("IW")) {
+ infoStream
+ .message("IW", "rollback: infos=" + segString(segmentInfos));
}
-
assert testPoint("rollback before checkpoint");
-
+
// Ask deleter to locate unreferenced files & remove
// them:
deleter.checkpoint(segmentInfos, false);
deleter.refresh();
-
+
lastCommitChangeCount = changeCount;
}
-
+
success = true;
} catch (OutOfMemoryError oom) {
handleOOM(oom, "rollbackInternal");
} finally {
- synchronized(this) {
+ synchronized (this) {
if (!success) {
closing = false;
notifyAll();
@@ -2005,47 +2244,51 @@
}
}
}
-
+
closeInternal(false, false);
}
-
+
/**
* Delete all documents in the index.
- *
- * true iff the index in the named directory is
- * currently locked.
- * @param directory the directory to check for a lock
- * @throws IOException if there is a low-level IO error
+ * Returns true iff the index in the named directory is currently
+ * locked.
+ *
+ * @param directory
+ * the directory to check for a lock
+ * @throws IOException
+ * if there is a low-level IO error
*/
public static boolean isLocked(Directory directory) throws IOException {
return directory.makeLock(WRITE_LOCK_NAME).isLocked();
}
-
+
/**
* Forcibly unlocks the index in the named directory.
*