As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
* supplementary characters in strings and char arrays provided as compound word
* dictionaries.
+ *
As of 4.4, {@link CompoundWordTokenFilterBase} doesn't update offsets.
*
*/
public abstract class CompoundWordTokenFilterBase extends TokenFilter {
@@ -58,7 +58,8 @@
* The default for maximal length of subwords that get propagated to the output of this filter
*/
public static final int DEFAULT_MAX_SUBWORD_SIZE = 15;
-
+
+ protected final Version matchVersion;
protected final CharArraySet dictionary;
protected final LinkedList tokens;
protected final int minWordSize;
@@ -82,7 +83,7 @@
protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(input);
-
+ this.matchVersion = matchVersion;
this.tokens=new LinkedList();
if (minWordSize < 0) {
throw new IllegalArgumentException("minWordSize cannot be negative");
@@ -156,7 +157,8 @@
int startOff = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
int endOff = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
- if (endOff - startOff != CompoundWordTokenFilterBase.this.termAtt.length()) {
+ if (matchVersion.onOrAfter(Version.LUCENE_44) ||
+ endOff - startOff != CompoundWordTokenFilterBase.this.termAtt.length()) {
// if length by start + end offsets doesn't match the term text then assume
// this is a synonym and don't adjust the offsets.
this.startOffset = startOff;
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (revision 1481938)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (working copy)
@@ -24,6 +24,7 @@
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.Version;
/**
@@ -43,11 +44,12 @@
private int tokStart;
private int tokEnd; // only used if the length changed before this filter
private int savePosIncr;
- private boolean isFirstToken = true;
+ private int savePosLen;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
@@ -88,7 +90,8 @@
curGramSize = minGram;
tokStart = offsetAtt.startOffset();
tokEnd = offsetAtt.endOffset();
- savePosIncr = posIncrAtt.getPositionIncrement();
+ savePosIncr += posIncrAtt.getPositionIncrement();
+ savePosLen = posLenAtt.getPositionLength();
}
}
if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit
@@ -98,16 +101,14 @@
offsetAtt.setOffset(tokStart, tokEnd);
// first ngram gets increment, others don't
if (curGramSize == minGram) {
- // Leave the first token position increment at the cleared-attribute value of 1
- if ( ! isFirstToken) {
- posIncrAtt.setPositionIncrement(savePosIncr);
- }
+ posIncrAtt.setPositionIncrement(savePosIncr);
+ savePosIncr = 0;
} else {
posIncrAtt.setPositionIncrement(0);
}
+ posLenAtt.setPositionLength(savePosLen);
termAtt.copyBuffer(curTermBuffer, 0, curGramSize);
curGramSize++;
- isFirstToken = false;
return true;
}
}
@@ -119,6 +120,6 @@
public void reset() throws IOException {
super.reset();
curTermBuffer = null;
- isFirstToken = true;
+ savePosIncr = 0;
}
}
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java (revision 1481938)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java (working copy)
@@ -26,6 +26,7 @@
import java.nio.charset.CodingErrorAction;
import java.text.ParseException;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -48,9 +49,18 @@
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
* <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
* format="solr" ignoreCase="false" expand="true"
- * tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
+ * tokenizerFactory="solr.WhitespaceTokenizerFactory"
+ * [optional tokenizer factory parameters]/>
* </analyzer>
* </fieldType>
+ *
+ *
+ * An optional param name prefix of "tokenizerFactory." may be used for any
+ * init params that the SynonymFilterFactory needs to pass to the specified
+ * TokenizerFactory. If the TokenizerFactory expects an init parameters with
+ * the same name as an init param used by the SynonymFilterFactory, the prefix
+ * is mandatory.
+ *
*/
public class SynonymFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private final boolean ignoreCase;
@@ -58,19 +68,27 @@
private final String synonyms;
private final String format;
private final boolean expand;
+ private final Map tokArgs = new HashMap();
private SynonymMap map;
public SynonymFilterFactory(Map args) {
super(args);
ignoreCase = getBoolean(args, "ignoreCase", false);
+ synonyms = require(args, "synonyms");
+ format = get(args, "format");
+ expand = getBoolean(args, "expand", true);
+
tokenizerFactory = get(args, "tokenizerFactory");
if (tokenizerFactory != null) {
assureMatchVersion();
+ tokArgs.put("luceneMatchVersion", getLuceneMatchVersion().toString());
+ for (Iterator itr = args.keySet().iterator(); itr.hasNext();) {
+ String key = itr.next();
+ tokArgs.put(key.replaceAll("^tokenizerFactory\\.",""), args.get(key));
+ itr.remove();
+ }
}
- synonyms = require(args, "synonyms");
- format = get(args, "format");
- expand = getBoolean(args, "expand", true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -159,11 +177,9 @@
// (there are no tests for this functionality)
private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws IOException {
- Map args = new HashMap();
- args.put("luceneMatchVersion", getLuceneMatchVersion().toString());
Class extends TokenizerFactory> clazz = loader.findClass(cname, TokenizerFactory.class);
try {
- TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(args);
+ TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(tokArgs);
if (tokFactory instanceof ResourceLoaderAware) {
((ResourceLoaderAware) tokFactory).inform(loader);
}
Index: lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
===================================================================
--- lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (revision 1481938)
+++ lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (working copy)
@@ -76,7 +76,6 @@
org.apache.lucene.analysis.payloads.NumericPayloadTokenFilterFactory
org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilterFactory
org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterFactory
-org.apache.lucene.analysis.position.PositionFilterFactory
org.apache.lucene.analysis.pt.PortugueseLightStemFilterFactory
org.apache.lucene.analysis.pt.PortugueseMinimalStemFilterFactory
org.apache.lucene.analysis.pt.PortugueseStemFilterFactory
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (revision 1481938)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (working copy)
@@ -151,12 +151,12 @@
"fiol", "fodral", "Basfiolsfodralmakaregesäll", "Bas", "fiol",
"fodral", "makare", "gesäll", "Skomakare", "Sko", "makare",
"Vindrutetorkare", "Vind", "rute", "torkare", "Vindrutetorkarblad",
- "Vind", "rute", "blad", "abba" }, new int[] { 0, 0, 3, 8, 8, 11, 17,
- 17, 20, 24, 24, 28, 33, 33, 39, 44, 44, 49, 54, 54, 58, 62, 69, 69, 72,
- 77, 84, 84, 87, 92, 98, 104, 111, 111, 114, 121, 121, 125, 129, 137,
- 137, 141, 151, 156 }, new int[] { 7, 3, 7, 16, 11, 16, 23, 20, 23, 32,
- 28, 32, 43, 39, 43, 53, 49, 53, 68, 58, 62, 68, 83, 72, 76, 83, 110,
- 87, 91, 98, 104, 110, 120, 114, 120, 136, 125, 129, 136, 155, 141, 145,
+ "Vind", "rute", "blad", "abba" }, new int[] { 0, 0, 0, 8, 8, 8, 17,
+ 17, 17, 24, 24, 24, 33, 33, 33, 44, 44, 44, 54, 54, 54, 54, 69, 69, 69,
+ 69, 84, 84, 84, 84, 84, 84, 111, 111, 111, 121, 121, 121, 121, 137,
+ 137, 137, 137, 156 }, new int[] { 7, 7, 7, 16, 16, 16, 23, 23, 23, 32,
+ 32, 32, 43, 43, 43, 53, 53, 53, 68, 68, 68, 68, 83, 83, 83, 83, 110,
+ 110, 110, 110, 110, 110, 120, 120, 120, 136, 136, 136, 136, 155, 155, 155,
155, 160 }, new int[] { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 1 });
@@ -174,8 +174,8 @@
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
assertTokenStreamContents(tf, new String[] { "Basfiolsfodralmakaregesäll", "Bas",
- "fiolsfodral", "fodral", "makare", "gesäll" }, new int[] { 0, 0, 3, 8,
- 14, 20 }, new int[] { 26, 3, 14, 14, 20, 26 }, new int[] { 1, 0, 0, 0,
+ "fiolsfodral", "fodral", "makare", "gesäll" }, new int[] { 0, 0, 0, 0,
+ 0, 0 }, new int[] { 26, 26, 26, 26, 26, 26 }, new int[] { 1, 0, 0, 0,
0, 0 });
}
@@ -194,8 +194,8 @@
assertTokenStreamContents(tf,
new String[] { "abcdef", "ab", "cd", "ef" },
- new int[] { 0, 0, 2, 4},
- new int[] { 6, 2, 4, 6},
+ new int[] { 0, 0, 0, 0},
+ new int[] { 6, 6, 6, 6},
new int[] { 1, 0, 0, 0}
);
}
@@ -216,8 +216,8 @@
// since "d" is shorter than the minimum subword size, it should not be added to the token stream
assertTokenStreamContents(tf,
new String[] { "abcdefg", "abc", "efg" },
- new int[] { 0, 0, 4},
- new int[] { 7, 3, 7},
+ new int[] { 0, 0, 0},
+ new int[] { 7, 7, 7},
new int[] { 1, 0, 0}
);
}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (revision 1481938)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (working copy)
@@ -59,29 +59,21 @@
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
-import org.apache.lucene.analysis.compound.DictionaryCompoundWordTokenFilter;
import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
import org.apache.lucene.analysis.hunspell.HunspellDictionaryTest;
import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
-import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
-import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
import org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilter;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
-import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
-import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
-import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
-import org.apache.lucene.analysis.ngram.Lucene43NGramTokenizer;
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
import org.apache.lucene.analysis.payloads.IdentityEncoder;
import org.apache.lucene.analysis.payloads.PayloadEncoder;
-import org.apache.lucene.analysis.position.PositionFilter;
import org.apache.lucene.analysis.snowball.TestSnowball;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.synonym.SynonymMap;
@@ -172,10 +164,6 @@
for (Class> c : Arrays.>asList(
ReversePathHierarchyTokenizer.class,
PathHierarchyTokenizer.class,
- HyphenationCompoundWordTokenFilter.class,
- DictionaryCompoundWordTokenFilter.class,
- // TODO: corrumpts graphs (offset consistency check):
- PositionFilter.class,
// TODO: it seems to mess up offsets!?
WikipediaTokenizer.class,
// TODO: doesn't handle graph inputs
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (revision 1481938)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (working copy)
@@ -17,19 +17,24 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Random;
+
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.position.PositionFilter;
+import org.apache.lucene.analysis.shingle.ShingleFilter;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.util.Version;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Random;
-
/**
* Tests {@link EdgeNGramTokenFilter} for correctness.
*/
@@ -101,9 +106,39 @@
false);
}
+ private static class PositionFilter extends TokenFilter {
+
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private boolean started;
+
+ PositionFilter(final TokenStream input) {
+ super(input);
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ if (started) {
+ posIncrAtt.setPositionIncrement(0);
+ } else {
+ started = true;
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ started = false;
+ }
+ }
+
public void testFirstTokenPositionIncrement() throws Exception {
TokenStream ts = new MockTokenizer(new StringReader("a abc"), MockTokenizer.WHITESPACE, false);
- ts = new PositionFilter(ts, 0); // All but first token will get 0 position increment
+ ts = new PositionFilter(ts); // All but first token will get 0 position increment
EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, 2, 3);
// The first token "a" will not be output, since it's smaller than the mingram size of 2.
// The second token on input to EdgeNGramTokenFilter will have position increment of 0,
@@ -155,4 +190,19 @@
};
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
}
+
+ public void testGraphs() throws IOException {
+ TokenStream tk = new LetterTokenizer(Version.LUCENE_44, new StringReader("abc d efgh ij klmno p q"));
+ tk = new ShingleFilter(tk);
+ tk = new EdgeNGramTokenFilter(Version.LUCENE_44, tk, 7, 10);
+ tk.reset();
+ assertTokenStreamContents(tk,
+ new String[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" },
+ new int[] { 6,11,11,14 },
+ new int[] { 13,19,19,21 },
+ new int[] { 3,1,0,1 },
+ new int[] { 2,2,2,2 },
+ 23
+ );
+ }
}
Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
===================================================================
--- lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java (revision 1481938)
+++ lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java (working copy)
@@ -19,11 +19,15 @@
import java.io.Reader;
import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.synonym.SynonymFilter;
+import org.apache.lucene.analysis.pattern.PatternTokenizerFactory;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
public class TestSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
@@ -59,4 +63,53 @@
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
+
+ static final String TOK_SYN_ARG_VAL = "argument";
+ static final String TOK_FOO_ARG_VAL = "foofoofoo";
+
+ /** Test that we can parse TokenierFactory's arguments */
+ public void testTokenizerFactoryArguments() throws Exception {
+ final String clazz = PatternTokenizerFactory.class.getName();
+ TokenFilterFactory factory = null;
+
+ // simple arg form
+ factory = tokenFilterFactory("Synonym",
+ "synonyms", "synonyms.txt",
+ "tokenizerFactory", clazz,
+ "pattern", "(.*)",
+ "group", "0");
+ assertNotNull(factory);
+ // prefix
+ factory = tokenFilterFactory("Synonym",
+ "synonyms", "synonyms.txt",
+ "tokenizerFactory", clazz,
+ "tokenizerFactory.pattern", "(.*)",
+ "tokenizerFactory.group", "0");
+ assertNotNull(factory);
+
+ // sanity check that sub-PatternTokenizerFactory fails w/o pattern
+ try {
+ factory = tokenFilterFactory("Synonym",
+ "synonyms", "synonyms.txt",
+ "tokenizerFactory", clazz);
+ fail("tokenizerFactory should have complained about missing pattern arg");
+ } catch (Exception expected) {
+ // :NOOP:
+ }
+
+ // sanity check that sub-PatternTokenizerFactory fails on unexpected
+ try {
+ factory = tokenFilterFactory("Synonym",
+ "synonyms", "synonyms.txt",
+ "tokenizerFactory", clazz,
+ "tokenizerFactory.pattern", "(.*)",
+ "tokenizerFactory.bogusbogusbogus", "bogus",
+ "tokenizerFactory.group", "0");
+ fail("tokenizerFactory should have complained about missing pattern arg");
+ } catch (Exception expected) {
+ // :NOOP:
+ }
+ }
}
+
+
Index: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
===================================================================
--- lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java (revision 1481938)
+++ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java (working copy)
@@ -44,7 +44,7 @@
try {
is = getResource(FST_FILENAME_SUFFIX);
is = new BufferedInputStream(is);
- fst = new FST(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton(true));
+ fst = new FST(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton());
} catch (IOException ioe) {
priorE = ioe;
} finally {
Index: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
===================================================================
--- lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java (revision 1481938)
+++ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java (working copy)
@@ -88,7 +88,7 @@
List data = new ArrayList(featureEntries.size());
List segmentations = new ArrayList(featureEntries.size());
- PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
+ PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
Builder fstBuilder = new Builder(FST.INPUT_TYPE.BYTE2, fstOutput);
IntsRef scratch = new IntsRef();
long ord = 0;
Index: lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
===================================================================
--- lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java (revision 1481938)
+++ lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java (working copy)
@@ -131,7 +131,7 @@
System.out.println(" encode...");
- PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
+ PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
Builder fstBuilder = new Builder(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, PackedInts.DEFAULT, true, 15);
IntsRef scratch = new IntsRef();
long ord = -1; // first ord will be 0
Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (revision 1481938)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (working copy)
@@ -44,7 +44,7 @@
* @lucene.experimental */
public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
- private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton(true);
+ private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
private int indexDivisor;
// Closed if indexLoaded is true:
@@ -199,7 +199,7 @@
if (indexDivisor > 1) {
// subsample
final IntsRef scratchIntsRef = new IntsRef();
- final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs);
final BytesRefFSTEnum fstEnum = new BytesRefFSTEnum(fst);
BytesRefFSTEnum.InputOutput result;
Index: lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java (revision 1481938)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java (working copy)
@@ -235,7 +235,7 @@
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
this.fieldInfo = fieldInfo;
- fstOutputs = PositiveIntOutputs.getSingleton(true);
+ fstOutputs = PositiveIntOutputs.getSingleton();
fstBuilder = new Builder(FST.INPUT_TYPE.BYTE1, fstOutputs);
indexStart = out.getFilePointer();
////System.out.println("VGW: field=" + fieldInfo.name);
Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
===================================================================
--- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (revision 1481938)
+++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (working copy)
@@ -513,7 +513,7 @@
}
private void loadTerms() throws IOException {
- PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
+ PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
final Builder>> b;
final PairOutputs outputsInner = new PairOutputs(posIntOutputs, posIntOutputs);
final PairOutputs> outputs = new PairOutputs>(posIntOutputs,
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java (working copy)
@@ -165,9 +165,6 @@
if (docID < 0 || docID >= maxDoc) {
throw new IllegalArgumentException("docID out of range [0-" + maxDoc + "]: " + docID);
}
- if (docBases.length == 0) {
- return -1;
- }
final int block = block(docID);
final int relativeChunk = relativeChunk(block, docID - docBases[block]);
return startPointers[block] + relativeStartPointer(block, relativeChunk);
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java (working copy)
@@ -242,8 +242,8 @@
if (payloads) {
tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
}
- for (int i = 0; i < bufferedIndex; i++) {
- if (offsets) {
+ if (offsets) {
+ for (int i = 0; i < bufferedIndex; i++) {
tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
lastOffset = offsetEndBuffer[i];
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html (working copy)
@@ -372,13 +372,7 @@
Limitations
-
When referring to term numbers, Lucene's current implementation uses a Java
-int to hold the term index, which means the
-maximum number of unique terms in any single index segment is ~2.1 billion
-times the term index interval (default 128) = ~274 billion. This is technically
-not a limitation of the index file format, just of Lucene's current
-implementation.
-
Similarly, Lucene uses a Java int to refer to
+
Lucene uses a Java int to refer to
document numbers, and the index file format uses an Int32
on-disk to store document numbers. This is a limitation
of both the index file format and the current implementation. Eventually these
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (working copy)
@@ -161,7 +161,7 @@
*
SkipFPDelta determines the position of this term's SkipData within the .doc
* file. In particular, it is the length of the TermFreq data.
* SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
- * (i.e. 8 in Lucene41PostingsFormat).
+ * (i.e. 128 in Lucene41PostingsFormat).
*
SingletonDocID is an optimization when a term only appears in one document. In this case, instead
* of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the
* single document ID is written to the term dictionary.
When referring to term numbers, Lucene's current implementation uses a Java
-int to hold the term index, which means the
-maximum number of unique terms in any single index segment is ~2.1 billion
-times the term index interval (default 128) = ~274 billion. This is technically
-not a limitation of the index file format, just of Lucene's current
-implementation.
-
Similarly, Lucene uses a Java int to refer to
+
Lucene uses a Java int to refer to
document numbers, and the index file format uses an Int32
on-disk to store document numbers. This is a limitation
of both the index file format and the current implementation. Eventually these
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (working copy)
@@ -245,7 +245,7 @@
meta.writeVInt(field.number);
meta.writeByte(FST);
meta.writeLong(data.getFilePointer());
- PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder builder = new Builder(INPUT_TYPE.BYTE1, outputs);
IntsRef scratch = new IntsRef();
long ord = 0;
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (working copy)
@@ -278,7 +278,7 @@
instance = fstInstances.get(field.number);
if (instance == null) {
data.seek(entry.offset);
- instance = new FST(data, PositiveIntOutputs.getSingleton(true));
+ instance = new FST(data, PositiveIntOutputs.getSingleton());
fstInstances.put(field.number, instance);
}
}
@@ -352,7 +352,7 @@
instance = fstInstances.get(field.number);
if (instance == null) {
data.seek(entry.offset);
- instance = new FST(data, PositiveIntOutputs.getSingleton(true));
+ instance = new FST(data, PositiveIntOutputs.getSingleton());
fstInstances.put(field.number, instance);
}
}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html (working copy)
@@ -384,13 +384,7 @@
Limitations
-
When referring to term numbers, Lucene's current implementation uses a Java
-int to hold the term index, which means the
-maximum number of unique terms in any single index segment is ~2.1 billion
-times the term index interval (default 128) = ~274 billion. This is technically
-not a limitation of the index file format, just of Lucene's current
-implementation.
-
Similarly, Lucene uses a Java int to refer to
+
Lucene uses a Java int to refer to
document numbers, and the index file format uses an Int32
on-disk to store document numbers. This is a limitation
of both the index file format and the current implementation. Eventually these
Index: lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java (working copy)
@@ -36,9 +36,12 @@
protected StoredFieldsReader() {
}
- /** Visit the stored fields for document n, ignoring certain
- * fields. */
- public abstract void visitDocument(int n, StoredFieldVisitor visitor, Set ignoreFields) throws IOException;
+ /**
+ * Visit the stored fields for document n, ignoring certain
+ * fields.
+ */
+ public abstract void visitDocument(int n, StoredFieldVisitor visitor,
+ Set ignoreFields) throws IOException;
@Override
public abstract StoredFieldsReader clone();
Index: lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (working copy)
@@ -21,9 +21,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
-import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import java.util.SortedSet;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
@@ -86,12 +84,16 @@
assert packet.anyDeletes() || packet.anyUpdates();
assert checkDeleteStats();
assert packet.delGen() < nextGen;
- assert deletes.isEmpty() || deletes.get(deletes.size()-1).delGen() < packet.delGen() : "Delete packets must be in order";
+ assert deletes.isEmpty()
+ || deletes.get(deletes.size() - 1).delGen() < packet.delGen() : "Delete packets must be in order";
deletes.add(packet);
numTerms.addAndGet(packet.numTermDeletes);
bytesUsed.addAndGet(packet.bytesUsed);
if (infoStream.isEnabled("BD")) {
- infoStream.message("BD", "push deletes " + packet + " delGen=" + packet.delGen() + " packetCount=" + deletes.size() + " totBytesUsed=" + bytesUsed.get());
+ infoStream.message("BD",
+ "push deletes " + packet + " delGen=" + packet.delGen()
+ + " packetCount=" + deletes.size() + " totBytesUsed="
+ + bytesUsed.get());
}
assert checkDeleteStats();
return packet.delGen();
@@ -169,25 +171,51 @@
List infos2 = new ArrayList();
infos2.addAll(infos);
Collections.sort(infos2, sortSegInfoByDelGen);
-
+
+ boolean anyNewDeletes = false;
+ List allDeleted = new ArrayList();
+ // go through packets forward and apply deletes and updates
+ anyNewDeletes |= handleUpdates(readerPool, infos2);
+ // go through packets backwards and apply deletes
+ anyNewDeletes |= handleDeletes(readerPool, infos2, allDeleted);
+
+ // mark all advanced segment infos
+ for (SegmentInfoPerCommit info : infos2) {
+ info.setBufferedDeletesGen(gen);
+ }
+
+ assert checkDeleteStats();
+ if (infoStream.isEnabled("BD")) {
+ infoStream.message("BD",
+ "applyDeletes took " + (System.currentTimeMillis() - t0) + " msec");
+ }
+ // assert infos != segmentInfos || !any() : "infos=" + infos +
+ // " segmentInfos=" + segmentInfos + " any=" + any;
+
+ if (allDeleted.size() == 0) {
+ allDeleted = null;
+ }
+
+ return new ApplyDeletesResult(anyNewDeletes, gen, allDeleted);
+ }
+
+ private boolean handleDeletes(IndexWriter.ReaderPool readerPool,
+ List infos2, List allDeleted) throws IOException {
CoalescedDeletes coalescedDeletes = null;
boolean anyNewDeletes = false;
-
- int infosIDX = infos2.size()-1;
- int delIDX = deletes.size()-1;
-
- List allDeleted = null;
- Set advanced = null;
-
+
+ int infosIDX = infos2.size() - 1;
+ int delIDX = deletes.size() - 1;
+
while (infosIDX >= 0) {
//System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);
final FrozenBufferedDeletes packet = delIDX >= 0 ? deletes.get(delIDX) : null;
final SegmentInfoPerCommit info = infos2.get(infosIDX);
final long segGen = info.getBufferedDeletesGen();
-
+
if (packet != null && packet.anyDeletes() && segGen < packet.delGen()) {
- //System.out.println(" coalesce");
+ // System.out.println(" coalesce");
if (coalescedDeletes == null) {
coalescedDeletes = new CoalescedDeletes();
}
@@ -203,10 +231,12 @@
}
delIDX--;
- } else if (packet != null && packet.anyDeletes() && segGen == packet.delGen()) {
- assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen;
- //System.out.println(" eq");
-
+ } else if (packet != null && packet.anyDeletes()
+ && segGen == packet.delGen()) {
+ assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet gen="
+ + segGen;
+ // System.out.println(" eq");
+
// Lock order: IW -> BD -> RP
assert readerPool.infoIsLive(info);
final ReadersAndLiveDocs rld = readerPool.get(info, true);
@@ -230,17 +260,20 @@
rld.release(reader);
readerPool.release(rld);
}
- anyNewDeletes |= delCount > 0;
-
+ if (delCount > 0) {
+ anyNewDeletes = true;
+ }
+
if (segAllDeletes) {
- if (allDeleted == null) {
- allDeleted = new ArrayList();
- }
allDeleted.add(info);
}
if (infoStream.isEnabled("BD")) {
- infoStream.message("BD", "seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
+ infoStream.message("BD", "seg=" + info + " segGen=" + segGen
+ + " segDeletes=[" + packet + "]; coalesced deletes=["
+ + (coalescedDeletes == null ? "null" : coalescedDeletes)
+ + "] newDelCount=" + delCount
+ + (segAllDeletes ? " 100% deleted" : ""));
}
if (coalescedDeletes == null) {
@@ -254,11 +287,6 @@
*/
delIDX--;
infosIDX--;
- if (advanced == null) {
- advanced = new HashSet();
- }
- advanced.add(info);
-
} else if (packet != null && !packet.anyDeletes() && packet.anyUpdates()) {
// ignore updates only packets
delIDX--;
@@ -282,68 +310,58 @@
rld.release(reader);
readerPool.release(rld);
}
- anyNewDeletes |= delCount > 0;
-
+ if (delCount > 0) {
+ anyNewDeletes = true;
+ }
if (segAllDeletes) {
- if (allDeleted == null) {
- allDeleted = new ArrayList();
- }
allDeleted.add(info);
}
if (infoStream.isEnabled("BD")) {
- infoStream.message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
+ infoStream.message("BD", "seg=" + info + " segGen=" + segGen
+ + " coalesced deletes=["
+ + (coalescedDeletes == null ? "null" : coalescedDeletes)
+ + "] newDelCount=" + delCount
+ + (segAllDeletes ? " 100% deleted" : ""));
}
- if (advanced == null) {
- advanced = new HashSet();
}
- advanced.add(info);
- }
infosIDX--;
}
}
+ return anyNewDeletes;
+ }
+
+ private boolean handleUpdates(IndexWriter.ReaderPool readerPool,
+ List infos2)
+ throws IOException {
+ boolean anyNewDeletes = false;
- // go through deletes forward and apply updates
- for (SegmentInfoPerCommit updateInfo : infos2) {
- final long updateSegGen = updateInfo.getBufferedDeletesGen();
+ for (SegmentInfoPerCommit info : infos2) {
+ final long segGen = info.getBufferedDeletesGen();
- for (FrozenBufferedDeletes updatePacket : deletes) {
- if (updatePacket.anyUpdates() && updateSegGen <= updatePacket.delGen()) {
- assert readerPool.infoIsLive(updateInfo);
+ for (int delIdx = 0; delIdx < deletes.size(); delIdx++) {
+ FrozenBufferedDeletes packet = deletes.get(delIdx);
+ assert readerPool.infoIsLive(info);
+ if (segGen <= packet.delGen() && packet.anyUpdates()) {
// we need to reopen the reader every time, to include previous
- // updates when applying new ones
- final ReadersAndLiveDocs rld = readerPool.get(updateInfo, true);
+ // changes when applying new ones
+ final ReadersAndLiveDocs rld = readerPool.get(info, true);
final SegmentReader reader = rld.getReader(IOContext.READ);
- final boolean exactGen = updateSegGen == updatePacket.delGen();
try {
- anyNewDeletes |= applyTermUpdates(updatePacket.allUpdates, rld,
- reader, exactGen);
+ final boolean exactGen = (segGen == packet.delGen());
+ if (applyTermUpdates(packet.allUpdates, rld, reader, exactGen)) {
+ anyNewDeletes = true;
+ }
} finally {
rld.release(reader);
readerPool.release(rld);
}
- if (advanced == null) {
- advanced = new HashSet();
- }
- advanced.add(updateInfo);
}
}
+
}
-
- if (advanced != null) {
- for (SegmentInfoPerCommit info : advanced) {
- info.setBufferedDeletesGen(gen);
- }
- }
-
- assert checkDeleteStats();
- if (infoStream.isEnabled("BD")) {
- infoStream.message("BD", "applyDeletes took " + (System.currentTimeMillis()-t0) + " msec");
- }
- // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any;
-
- return new ApplyDeletesResult(anyNewDeletes, gen, allDeleted);
+ return anyNewDeletes;
}
synchronized long getNextGen() {
@@ -467,7 +485,7 @@
return delCount;
}
-
+
private synchronized boolean applyTermUpdates(
SortedSet packetUpdates, ReadersAndLiveDocs rld,
SegmentReader reader, boolean exactSegment) throws IOException {
@@ -478,9 +496,9 @@
}
assert checkDeleteTerm(null);
-
+
UpdatedSegmentData updatedSegmentData = new UpdatedSegmentData(reader,
- packetUpdates, exactSegment);
+ packetUpdates, exactSegment, infoStream);
if (updatedSegmentData.hasUpdates()) {
rld.setLiveUpdates(updatedSegmentData);
@@ -489,7 +507,7 @@
return false;
}
-
+
public static class QueryAndLimit {
public final Query query;
public final int limit;
Index: lucene/core/src/java/org/apache/lucene/index/BufferedUpdates.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/BufferedUpdates.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/BufferedUpdates.java (working copy)
@@ -17,8 +17,9 @@
* limitations under the License.
*/
-import java.util.SortedSet;
-import java.util.TreeSet;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
@@ -37,7 +38,7 @@
class BufferedUpdates {
final AtomicInteger numTermUpdates = new AtomicInteger();
- final SortedFieldsUpdates terms = new SortedFieldsUpdates();
+ final ConcurrentSkipListMap> terms = new ConcurrentSkipListMap>();
public static final Integer MAX_INT = Integer.valueOf(Integer.MAX_VALUE);
@@ -73,21 +74,11 @@
}
}
- public void addTerm(Term term, FieldsUpdate update) {
- SortedSet current = terms.get(term);
- //if (current != null && update.docIDUpto < current.peek().docIDUpto) {
- // Only record the new number if it's greater than the
- // current one. This is important because if multiple
- // threads are replacing the same doc at nearly the
- // same time, it's possible that one thread that got a
- // higher docID is scheduled before the other
- // threads. If we blindly replace than we can
- // incorrectly get both docs indexed.
- //return;
- //}
+ public synchronized void addTerm(Term term, FieldsUpdate update) {
+ List current = terms.get(term);
if (current == null) {
- current = new TreeSet();
+ current = new ArrayList(1);
terms.put(term, current);
bytesUsed.addAndGet(BufferedDeletes.BYTES_PER_DEL_TERM
+ term.bytes.length
Index: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (working copy)
@@ -30,7 +30,8 @@
import org.apache.lucene.codecs.BlockTreeTermsReader;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.PostingsFormat; // javadocs
+import org.apache.lucene.document.FieldType; // for javadocs
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
@@ -43,8 +44,6 @@
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.StringHelper;
-// javadocs
-// for javadocs
/**
* Basic tool and API to check the health of an index and
@@ -464,11 +463,11 @@
if (onlySegments != null) {
result.partial = true;
- if (infoStream != null)
+ if (infoStream != null) {
infoStream.print("\nChecking only these segments:");
- for (String s : onlySegments) {
- if (infoStream != null)
+ for (String s : onlySegments) {
infoStream.print(" " + s);
+ }
}
result.segmentsChecked.addAll(onlySegments);
msg(infoStream, ":");
Index: lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java (working copy)
@@ -38,10 +38,12 @@
void update(FrozenBufferedDeletes in) {
iterables.add(in.termsIterable());
-
- for(int queryIdx=0;queryIdx fieldsToFlush, SegmentWriteState state) throws IOException;
/** Called when an aborting exception is hit */
Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy)
@@ -35,80 +35,96 @@
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
import org.apache.lucene.index.FieldInfos.FieldNumbers;
import org.apache.lucene.index.FieldsUpdate.Operation;
+import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.MergeInfo;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.InfoStream;
+import org.apache.lucene.util.MutableBits;
/**
- * This class accepts multiple added documents and directly writes segment
- * files.
- *
- * Each added document is passed to the {@link DocConsumer}, which in turn
- * processes the document and interacts with other consumers in the indexing
- * chain. Certain consumers, like {@link StoredFieldsConsumer} and
- * {@link TermVectorsConsumer}, digest a document and immediately write bytes to
- * the "doc store" files (ie, they do not consume RAM per document, except while
- * they are processing the document).
- *
- * Other consumers, eg {@link FreqProxTermsWriter} and {@link NormsConsumer},
- * buffer bytes in RAM and flush only when a new segment is produced.
- *
- * Once we have used our allowed RAM buffer, or the number of added docs is
- * large enough (in the case we are flushing by doc count instead of RAM usage),
- * we create a real segment and flush it to the Directory.
- *
+ * This class accepts multiple added documents and directly
+ * writes segment files.
+ *
+ * Each added document is passed to the {@link DocConsumer},
+ * which in turn processes the document and interacts with
+ * other consumers in the indexing chain. Certain
+ * consumers, like {@link StoredFieldsConsumer} and {@link
+ * TermVectorsConsumer}, digest a document and
+ * immediately write bytes to the "doc store" files (ie,
+ * they do not consume RAM per document, except while they
+ * are processing the document).
+ *
+ * Other consumers, eg {@link FreqProxTermsWriter} and
+ * {@link NormsConsumer}, buffer bytes in RAM and flush only
+ * when a new segment is produced.
+
+ * Once we have used our allowed RAM buffer, or the number
+ * of added docs is large enough (in the case we are
+ * flushing by doc count instead of RAM usage), we create a
+ * real segment and flush it to the Directory.
+ *
* Threads:
- *
- * Multiple threads are allowed into addDocument at once. There is an initial
- * synchronized call to getThreadState which allocates a ThreadState for this
- * thread. The same thread will get the same ThreadState over time (thread
- * affinity) so that if there are consistent patterns (for example each thread
- * is indexing a different content source) then we make better use of RAM. Then
- * processDocument is called on that ThreadState without synchronization (most
- * of the "heavy lifting" is in this call). Finally the synchronized
- * "finishDocument" is called to flush changes to the directory.
- *
- * When flush is called by IndexWriter we forcefully idle all threads and flush
- * only once they are all idle. This means you can call flush with a given
- * thread even while other threads are actively adding/deleting documents.
- *
- *
+ *
+ * Multiple threads are allowed into addDocument at once.
+ * There is an initial synchronized call to getThreadState
+ * which allocates a ThreadState for this thread. The same
+ * thread will get the same ThreadState over time (thread
+ * affinity) so that if there are consistent patterns (for
+ * example each thread is indexing a different content
+ * source) then we make better use of RAM. Then
+ * processDocument is called on that ThreadState without
+ * synchronization (most of the "heavy lifting" is in this
+ * call). Finally the synchronized "finishDocument" is
+ * called to flush changes to the directory.
+ *
+ * When flush is called by IndexWriter we forcefully idle
+ * all threads and flush only once they are all idle. This
+ * means you can call flush with a given thread even while
+ * other threads are actively adding/deleting documents.
+ *
+ *
* Exceptions:
- *
- * Because this class directly updates in-memory posting lists, and flushes
- * stored fields and term vectors directly to files in the directory, there are
- * certain limited times when an exception can corrupt this state. For example,
- * a disk full while flushing stored fields leaves this file in a corrupt state.
- * Or, an OOM exception while appending to the in-memory posting lists can
- * corrupt that posting list. We call such exceptions "aborting exceptions". In
- * these cases we must call abort() to discard all docs added since the last
- * flush.
- *
- * All other exceptions ("non-aborting exceptions") can still partially update
- * the index structures. These updates are consistent, but, they represent only
- * a part of the document seen up until the exception was hit. When this
- * happens, we immediately mark the document as deleted so that the document is
- * always atomically ("all or none") added to the index.
+ *
+ * Because this class directly updates in-memory posting
+ * lists, and flushes stored fields and term vectors
+ * directly to files in the directory, there are certain
+ * limited times when an exception can corrupt this state.
+ * For example, a disk full while flushing stored fields
+ * leaves this file in a corrupt state. Or, an OOM
+ * exception while appending to the in-memory posting lists
+ * can corrupt that posting list. We call such exceptions
+ * "aborting exceptions". In these cases we must call
+ * abort() to discard all docs added since the last flush.
+ *
+ * All other exceptions ("non-aborting exceptions") can
+ * still partially update the index structures. These
+ * updates are consistent, but, they represent only a part
+ * of the document seen up until the exception was hit.
+ * When this happens, we immediately mark the document as
+ * deleted so that the document is always atomically ("all
+ * or none") added to the index.
*/
final class DocumentsWriter {
Directory directory;
-
+
private volatile boolean closed;
-
+
final InfoStream infoStream;
Similarity similarity;
-
+
List newFiles;
-
+
final IndexWriter indexWriter;
-
+
private AtomicInteger numDocsInRAM = new AtomicInteger(0);
+ private AtomicInteger numUpdates = new AtomicInteger(0);
// TODO: cut over to BytesRefHash in BufferedDeletes
volatile DocumentsWriterDeleteQueue deleteQueue = new DocumentsWriterDeleteQueue();
@@ -120,20 +136,17 @@
* #anyChanges() & #flushAllThreads
*/
private volatile boolean pendingChangesInCurrentFullFlush;
-
- private Collection abortedFiles; // List of files that were written
- // before last abort()
-
+
+ private Collection abortedFiles; // List of files that were written before last abort()
+
final IndexingChain chain;
-
+
final DocumentsWriterPerThreadPool perThreadPool;
final FlushPolicy flushPolicy;
final DocumentsWriterFlushControl flushControl;
final Codec codec;
-
- DocumentsWriter(Codec codec, LiveIndexWriterConfig config,
- Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers,
+ DocumentsWriter(Codec codec, LiveIndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers,
BufferedDeletesStream bufferedDeletesStream) {
this.codec = codec;
this.directory = directory;
@@ -148,7 +161,7 @@
flushPolicy.init(this);
flushControl = new DocumentsWriterFlushControl(this, config);
}
-
+
synchronized void deleteQueries(final Query... queries) throws IOException {
deleteQueue.addDelete(queries);
flushControl.doOnDelete();
@@ -156,7 +169,7 @@
applyAllDeletes(deleteQueue);
}
}
-
+
// TODO: we could check w/ FreqProxTermsWriter: if the
// term doesn't exist, don't bother buffering into the
// per-DWPT map (but still must go into the global map)
@@ -168,49 +181,47 @@
applyAllDeletes(deleteQueue);
}
}
-
+
DocumentsWriterDeleteQueue currentDeleteSession() {
return deleteQueue;
}
- private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue)
- throws IOException {
+ private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException {
if (deleteQueue != null && !flushControl.isFullFlush()) {
ticketQueue.addDeletesAndPurge(this, deleteQueue);
}
indexWriter.applyAllDeletes();
indexWriter.flushCount.incrementAndGet();
}
-
+
/** Returns how many docs are currently buffered in RAM. */
int getNumDocs() {
return numDocsInRAM.get();
}
-
+
Collection abortedFiles() {
return abortedFiles;
}
-
+
private void ensureOpen() throws AlreadyClosedException {
if (closed) {
throw new AlreadyClosedException("this IndexWriter is closed");
}
}
-
- /**
- * Called if we hit an exception at a bad time (when updating the index files)
- * and must discard all currently buffered docs. This resets our state,
- * discarding any docs added since last flush.
- */
+
+ /** Called if we hit an exception at a bad time (when
+ * updating the index files) and must discard all
+ * currently buffered docs. This resets our state,
+ * discarding any docs added since last flush. */
synchronized void abort() {
boolean success = false;
-
+
try {
deleteQueue.clear();
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "abort");
}
-
+
final int limit = perThreadPool.getActiveThreadState();
for (int i = 0; i < limit; i++) {
final ThreadState perThread = perThreadPool.getThreadState(i);
@@ -235,58 +246,110 @@
success = true;
} finally {
if (infoStream.isEnabled("DW")) {
- infoStream.message("DW", "done abort; abortedFiles=" + abortedFiles
- + " success=" + success);
+ infoStream.message("DW", "done abort; abortedFiles=" + abortedFiles + " success=" + success);
}
}
}
+ synchronized void lockAndAbortAll() {
+ assert indexWriter.holdsFullFlushLock();
+ if (infoStream.isEnabled("DW")) {
+ infoStream.message("DW", "lockAndAbortAll");
+ }
+ boolean success = false;
+ try {
+ deleteQueue.clear();
+ final int limit = perThreadPool.getMaxThreadStates();
+ for (int i = 0; i < limit; i++) {
+ final ThreadState perThread = perThreadPool.getThreadState(i);
+ perThread.lock();
+ if (perThread.isActive()) { // we might be closed or
+ try {
+ perThread.dwpt.abort();
+ } finally {
+ perThread.dwpt.checkAndResetHasAborted();
+ flushControl.doOnAbort(perThread);
+ }
+ }
+ }
+ deleteQueue.clear();
+ flushControl.abortPendingFlushes();
+ flushControl.waitForFlush();
+ success = true;
+ } finally {
+ if (infoStream.isEnabled("DW")) {
+ infoStream.message("DW", "finished lockAndAbortAll success=" + success);
+ }
+ if (!success) {
+ // if something happens here we unlock all states again
+ unlockAllAfterAbortAll();
+ }
+ }
+ }
+
+ final synchronized void unlockAllAfterAbortAll() {
+ assert indexWriter.holdsFullFlushLock();
+ if (infoStream.isEnabled("DW")) {
+ infoStream.message("DW", "unlockAll");
+ }
+ final int limit = perThreadPool.getMaxThreadStates();
+ for (int i = 0; i < limit; i++) {
+ try {
+ final ThreadState perThread = perThreadPool.getThreadState(i);
+ if (perThread.isHeldByCurrentThread()) {
+ perThread.unlock();
+ }
+ } catch(Throwable e) {
+ if (infoStream.isEnabled("DW")) {
+ infoStream.message("DW", "unlockAll: could not unlock state: " + i + " msg:" + e.getMessage());
+ }
+ // ignore & keep on unlocking
+ }
+ }
+ }
+
boolean anyChanges() {
if (infoStream.isEnabled("DW")) {
- infoStream.message("DW",
- "anyChanges? numDocsInRam=" + numDocsInRAM.get() + " deletes="
- + anyDeletions() + " hasTickets:" + ticketQueue.hasTickets()
- + " pendingChangesInFullFlush: "
- + pendingChangesInCurrentFullFlush);
+ infoStream.message("DW", "anyChanges? numDocsInRam=" + numDocsInRAM.get()
+ + " deletes=" + anyDeletions() + " hasTickets:"
+ + ticketQueue.hasTickets() + " pendingChangesInFullFlush: "
+ + pendingChangesInCurrentFullFlush);
}
/*
- * changes are either in a DWPT or in the deleteQueue. yet if we currently
- * flush deletes and / or dwpt there could be a window where all changes are
- * in the ticket queue before they are published to the IW. ie we need to
- * check if the ticket queue has any tickets.
+ * changes are either in a DWPT or in the deleteQueue.
+ * yet if we currently flush deletes and / or dwpt there
+ * could be a window where all changes are in the ticket queue
+ * before they are published to the IW. ie we need to check if the
+ * ticket queue has any tickets.
*/
- return numDocsInRAM.get() != 0 || anyDeletions()
- || ticketQueue.hasTickets() || pendingChangesInCurrentFullFlush;
+ return numDocsInRAM.get() != 0 || anyDeletions() || ticketQueue.hasTickets() || pendingChangesInCurrentFullFlush;
}
public int getBufferedDeleteTermsSize() {
return deleteQueue.getBufferedDeleteTermsSize();
}
-
- // for testing
+
+ //for testing
public int getNumBufferedDeleteTerms() {
return deleteQueue.numGlobalTermDeletes();
}
-
+
public boolean anyDeletions() {
return deleteQueue.anyChanges();
}
-
+
void close() {
closed = true;
flushControl.setClosed();
}
-
+
private boolean preUpdate() throws IOException {
ensureOpen();
boolean maybeMerge = false;
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
// Help out flushing any queued DWPTs so we can un-stall:
if (infoStream.isEnabled("DW")) {
- infoStream
- .message(
- "DW",
- "DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)");
+ infoStream.message("DW", "DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)");
}
do {
// Try pick up pending threads here if possible
@@ -295,58 +358,52 @@
// Don't push the delete here since the update could fail!
maybeMerge |= doFlush(flushingDWPT);
}
-
+
if (infoStream.isEnabled("DW")) {
if (flushControl.anyStalledThreads()) {
- infoStream.message("DW",
- "WARNING DocumentsWriter has stalled threads; waiting");
+ infoStream.message("DW", "WARNING DocumentsWriter has stalled threads; waiting");
}
}
flushControl.waitIfStalled(); // block if stalled
- } while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try
- // help flushing
-
+ } while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try help flushing
+
if (infoStream.isEnabled("DW")) {
- infoStream
- .message("DW",
- "continue indexing after helping out flushing DocumentsWriter is healthy");
+ infoStream.message("DW", "continue indexing after helping out flushing DocumentsWriter is healthy");
}
}
return maybeMerge;
}
-
- private boolean postUpdate(DocumentsWriterPerThread flushingDWPT,
- boolean maybeMerge) throws IOException {
+
+ private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean maybeMerge) throws IOException {
if (flushControl.doApplyAllDeletes()) {
applyAllDeletes(deleteQueue);
}
if (flushingDWPT != null) {
maybeMerge |= doFlush(flushingDWPT);
} else {
- final DocumentsWriterPerThread nextPendingFlush = flushControl
- .nextPendingFlush();
+ final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
if (nextPendingFlush != null) {
maybeMerge |= doFlush(nextPendingFlush);
}
}
-
+
return maybeMerge;
}
-
- boolean updateDocuments(final Iterable extends IndexDocument> docs,
- final Analyzer analyzer, final Term delTerm) throws IOException {
+
+ boolean updateDocuments(final Iterable extends IndexDocument> docs, final Analyzer analyzer,
+ final Term delTerm) throws IOException {
boolean maybeMerge = preUpdate();
-
+
final ThreadState perThread = flushControl.obtainAndLock();
final DocumentsWriterPerThread flushingDWPT;
try {
if (!perThread.isActive()) {
ensureOpen();
- assert false : "perThread is not active but we are still open";
+ assert false: "perThread is not active but we are still open";
}
-
+
final DocumentsWriterPerThread dwpt = perThread.dwpt;
try {
final int docCount = dwpt.updateDocuments(docs, analyzer, delTerm);
@@ -361,30 +418,29 @@
} finally {
perThread.unlock();
}
-
+
return postUpdate(flushingDWPT, maybeMerge);
}
-
+
boolean updateDocument(final IndexDocument doc, final Analyzer analyzer,
final Term delTerm) throws IOException {
-
+
boolean maybeMerge = preUpdate();
-
+
final ThreadState perThread = flushControl.obtainAndLock();
-
+
final DocumentsWriterPerThread flushingDWPT;
try {
-
+
if (!perThread.isActive()) {
ensureOpen();
- throw new IllegalStateException(
- "perThread is not active but we are still open");
+ throw new IllegalStateException("perThread is not active but we are still open");
}
-
+
final DocumentsWriterPerThread dwpt = perThread.dwpt;
try {
- dwpt.updateDocument(doc, analyzer, delTerm);
+ dwpt.updateDocument(doc, analyzer, delTerm);
numDocsInRAM.incrementAndGet();
} finally {
if (dwpt.checkAndResetHasAborted()) {
@@ -421,7 +477,7 @@
// create new fields update, which should effect previous docs in the
// current segment
FieldsUpdate fieldsUpdate = new FieldsUpdate(term, operation, fields,
- analyzer, numDocsInRAM.get() - 1, System.currentTimeMillis());
+ analyzer, numDocsInRAM.get() - 1, numUpdates.addAndGet(1));
// invert the given fields and store in RAMDirectory
dwpt.invertFieldsUpdate(fieldsUpdate, globalFieldNumberMap);
dwpt.updateFields(term, fieldsUpdate);
@@ -573,10 +629,9 @@
* might miss to deletes documents in 'A'.
*/
try {
- // Each flush is assigned a ticket in the order they acquire the
- // ticketQueue lock
+ // Each flush is assigned a ticket in the order they acquire the ticketQueue lock
ticket = ticketQueue.addFlushTicket(flushingDWPT);
-
+
// flush concurrently without locking
final FlushedSegment newSegment = flushingDWPT.flush();
if (newSegment == null) {
@@ -590,8 +645,7 @@
} finally {
if (!success && ticket != null) {
// In the case of a failure make sure we are making progress and
- // apply all the deletes since the segment flush failed since the
- // flush
+ // apply all the deletes since the segment flush failed since the flush
// ticket could hold global deletes see FlushTicket#canPublish()
ticketQueue.markTicketFailed(ticket);
}
@@ -600,38 +654,35 @@
* Now we are done and try to flush the ticket queue if the head of the
* queue has already finished the flush.
*/
- if (ticketQueue.getTicketCount() >= perThreadPool
- .getActiveThreadState()) {
+ if (ticketQueue.getTicketCount() >= perThreadPool.getActiveThreadState()) {
// This means there is a backlog: the one
// thread in innerPurge can't keep up with all
- // other threads flushing segments. In this case
+ // other threads flushing segments. In this case
// we forcefully stall the producers.
ticketQueue.forcePurge(this);
} else {
ticketQueue.tryPurge(this);
}
-
+
} finally {
flushControl.doAfterFlush(flushingDWPT);
flushingDWPT.checkAndResetHasAborted();
indexWriter.flushCount.incrementAndGet();
indexWriter.doAfterFlush();
}
-
+
flushingDWPT = flushControl.nextPendingFlush();
}
-
+
// If deletes alone are consuming > 1/2 our RAM
// buffer, force them all to apply now. This is to
// prevent too-frequent flushing of a long tail of
// tiny segments:
final double ramBufferSizeMB = indexWriter.getConfig().getRAMBufferSizeMB();
- if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH
- && flushControl.getDeleteBytesUsed() > (1024 * 1024 * ramBufferSizeMB / 2)) {
+ if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
+ flushControl.getDeleteBytesUsed() > (1024*1024*ramBufferSizeMB/2)) {
if (infoStream.isEnabled("DW")) {
- infoStream.message("DW", "force apply deletes bytesUsed="
- + flushControl.getDeleteBytesUsed() + " vs ramBuffer="
- + (1024 * 1024 * ramBufferSizeMB));
+ infoStream.message("DW", "force apply deletes bytesUsed=" + flushControl.getDeleteBytesUsed() + " vs ramBuffer=" + (1024*1024*ramBufferSizeMB));
}
applyAllDeletes(deleteQueue);
}
@@ -639,8 +690,9 @@
return actualFlushes > 0;
}
- void finishFlush(FlushedSegment newSegment,
- FrozenBufferedDeletes bufferedDeletes) throws IOException {
+
+ void finishFlush(FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes)
+ throws IOException {
// Finish the flushed segment and publish it to IndexWriter
if (newSegment == null) {
assert bufferedDeletes != null;
@@ -648,15 +700,14 @@
&& (bufferedDeletes.anyDeletes() || bufferedDeletes.anyUpdates())) {
indexWriter.publishFrozenDeletes(bufferedDeletes);
if (infoStream.isEnabled("DW")) {
- infoStream.message("DW", "flush: push buffered deletes: "
- + bufferedDeletes);
+ infoStream.message("DW", "flush: push buffered deletes: " + bufferedDeletes);
}
}
} else {
- publishFlushedSegment(newSegment, bufferedDeletes);
+ publishFlushedSegment(newSegment, bufferedDeletes);
}
}
-
+
final void subtractFlushedNumDocs(int numFlushed) {
int oldValue = numDocsInRAM.get();
while (!numDocsInRAM.compareAndSet(oldValue, oldValue - numFlushed)) {
@@ -666,62 +717,55 @@
/**
* Publishes the flushed segment, segment private deletes (if any) and its
- * associated global delete (if present) to IndexWriter. The actual publishing
- * operation is synced on IW -> BDS so that the {@link SegmentInfo}'s delete
- * generation is always GlobalPacket_deleteGeneration + 1
+ * associated global delete (if present) to IndexWriter. The actual
+ * publishing operation is synced on IW -> BDS so that the {@link SegmentInfo}'s
+ * delete generation is always GlobalPacket_deleteGeneration + 1
*/
- private void publishFlushedSegment(FlushedSegment newSegment,
- FrozenBufferedDeletes globalPacket) throws IOException {
+ private void publishFlushedSegment(FlushedSegment newSegment, FrozenBufferedDeletes globalPacket)
+ throws IOException {
assert newSegment != null;
assert newSegment.segmentInfo != null;
final FrozenBufferedDeletes segmentDeletes = newSegment.segmentDeletes;
- // System.out.println("FLUSH: " + newSegment.segmentInfo.info.name);
+ //System.out.println("FLUSH: " + newSegment.segmentInfo.info.name);
if (infoStream.isEnabled("DW")) {
- infoStream.message("DW", "publishFlushedSegment seg-private deletes="
- + segmentDeletes);
+ infoStream.message("DW", "publishFlushedSegment seg-private deletes=" + segmentDeletes);
}
+
if (segmentDeletes != null && infoStream.isEnabled("DW")) {
- infoStream.message("DW", "flush: push buffered seg private deletes: "
- + segmentDeletes);
+ infoStream.message("DW", "flush: push buffered seg private deletes: " + segmentDeletes);
}
// now publish!
- indexWriter.publishFlushedSegment(newSegment.segmentInfo, segmentDeletes,
- globalPacket);
+ indexWriter.publishFlushedSegment(newSegment.segmentInfo, segmentDeletes, globalPacket);
}
// for asserts
private volatile DocumentsWriterDeleteQueue currentFullFlushDelQueue = null;
-
+
// for asserts
- private synchronized boolean setFlushingDeleteQueue(
- DocumentsWriterDeleteQueue session) {
+ private synchronized boolean setFlushingDeleteQueue(DocumentsWriterDeleteQueue session) {
currentFullFlushDelQueue = session;
return true;
}
/*
* FlushAllThreads is synced by IW fullFlushLock. Flushing all threads is a
- * two stage operation; the caller must ensure (in try/finally) that
- * finishFlush is called after this method, to release the flush lock in
- * DWFlushControl
+ * two stage operation; the caller must ensure (in try/finally) that finishFlush
+ * is called after this method, to release the flush lock in DWFlushControl
*/
- final boolean flushAllThreads() throws IOException {
+ final boolean flushAllThreads()
+ throws IOException {
final DocumentsWriterDeleteQueue flushingDeleteQueue;
if (infoStream.isEnabled("DW")) {
- infoStream.message("DW", Thread.currentThread().getName()
- + " startFullFlush");
+ infoStream.message("DW", Thread.currentThread().getName() + " startFullFlush");
}
synchronized (this) {
pendingChangesInCurrentFullFlush = anyChanges();
flushingDeleteQueue = deleteQueue;
- /*
- * Cutover to a new delete queue. This must be synced on the flush control
+ /* Cutover to a new delete queue. This must be synced on the flush control
* otherwise a new DWPT could sneak into the loop with an already flushing
- * delete queue
- */
- flushControl.markForFullFlush(); // swaps the delQueue synced on
- // FlushControl
+ * delete queue */
+ flushControl.markForFullFlush(); // swaps the delQueue synced on FlushControl
assert setFlushingDeleteQueue(flushingDeleteQueue);
}
assert currentFullFlushDelQueue != null;
@@ -735,15 +779,10 @@
anythingFlushed |= doFlush(flushingDWPT);
}
// If a concurrent flush is still in flight wait for it
- flushControl.waitForFlush();
- if (!anythingFlushed && flushingDeleteQueue.anyChanges()) { // apply
- // deletes if
- // we did not
- // flush any
- // document
+ flushControl.waitForFlush();
+ if (!anythingFlushed && flushingDeleteQueue.anyChanges()) { // apply deletes if we did not flush any document
if (infoStream.isEnabled("DW")) {
- infoStream.message("DW", Thread.currentThread().getName()
- + ": flush naked frozen global deletes");
+ infoStream.message("DW", Thread.currentThread().getName() + ": flush naked frozen global deletes");
}
ticketQueue.addDeletesAndPurge(this, flushingDeleteQueue);
} else {
@@ -759,8 +798,7 @@
final void finishFullFlush(boolean success) {
try {
if (infoStream.isEnabled("DW")) {
- infoStream.message("DW", Thread.currentThread().getName()
- + " finishFullFlush success=" + success);
+ infoStream.message("DW", Thread.currentThread().getName() + " finishFullFlush success=" + success);
}
assert setFlushingDeleteQueue(null);
if (success) {
Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java (working copy)
@@ -240,6 +240,7 @@
}
public synchronized void waitForFlush() {
+ assert !Thread.holdsLock(this.documentsWriter.indexWriter) : "IW lock should never be hold when waiting on flush";
while (flushingWriters.size() != 0) {
try {
this.wait();
@@ -606,9 +607,10 @@
for (DocumentsWriterPerThread dwpt : flushQueue) {
try {
dwpt.abort();
- doAfterFlush(dwpt);
} catch (Throwable ex) {
// ignore - keep on aborting the flush queue
+ } finally {
+ doAfterFlush(dwpt);
}
}
for (BlockedFlush blockedFlush : blockedFlushes) {
@@ -616,9 +618,10 @@
flushingWriters
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
blockedFlush.dwpt.abort();
- doAfterFlush(blockedFlush.dwpt);
} catch (Throwable ex) {
// ignore - keep on aborting the blocked queue
+ } finally {
+ doAfterFlush(blockedFlush.dwpt);
}
}
} finally {
Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java (working copy)
@@ -274,7 +274,7 @@
* given ord.
*/
ThreadState getThreadState(int ord) {
- assert ord < numThreadStatesActive;
+ //assert ord < numThreadStatesActive;
return threadStates[ord];
}
Index: lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java (working copy)
@@ -47,7 +47,7 @@
final Set replacedFields;
final Analyzer analyzer;
final int docIdUpto;
- final long timeStamp;
+ final int updateNumber;
IndexDocument fields;
Directory directory;
@@ -64,11 +64,13 @@
* The fields to use in the update operation.
* @param analyzer
* The analyzer to use in the update.
- * @param docIDUpto
- * Document ID of the last document added before this field update
+ * @param docIdUpto
+ * The doc ID of the last document added before this update.
+ * @param updateNumber
+ * The running number of this update for the current segment.
*/
public FieldsUpdate(Term term, Operation operation, IndexDocument fields,
- Analyzer analyzer, int docIDUpto, long timeStamp) {
+ Analyzer analyzer, int docIdUpto, int updateNumber) {
this.term = term;
this.fields = fields;
this.operation = operation;
@@ -84,8 +86,8 @@
}
}
this.analyzer = analyzer;
- this.docIdUpto = docIDUpto;
- this.timeStamp = timeStamp;
+ this.docIdUpto = docIdUpto;
+ this.updateNumber = updateNumber;
}
/**
@@ -100,23 +102,20 @@
this.replacedFields = other.replacedFields;
this.analyzer = other.analyzer;
this.docIdUpto = other.docIdUpto;
- this.timeStamp = other.timeStamp;
+ this.updateNumber = other.updateNumber;
this.directory = other.directory;
this.segmentInfo = other.segmentInfo;
}
- /* Order FrieldsUpdate by increasing docIDUpto */
@Override
public int compareTo(FieldsUpdate other) {
- int diff = this.docIdUpto - other.docIdUpto;
- if (diff == 0) {
- if (this.timeStamp < other.timeStamp) {
- return -1;
- } else if (this.timeStamp > other.timeStamp) {
- return 1;
- }
- }
- return diff;
+ return this.updateNumber - other.updateNumber;
}
-
+
+ @Override
+ public String toString() {
+ return "FieldsUpdate [term=" + term + ", operation=" + operation
+ + ", docIdUpto=" + docIdUpto + ", updateNumber=" + updateNumber + "]";
+ }
+
}
Index: lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (working copy)
@@ -359,12 +359,12 @@
assert !writeOffsets || writePositions;
final Map segDeletes;
- if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
+ if (state.hasDeletesWithoutUpdates() && state.segDeletes.terms.size() > 0) {
segDeletes = state.segDeletes.terms;
} else {
segDeletes = null;
}
-
+
final int[] termIDs = termsHashPerField.sortPostings(termComp);
final int numTerms = termsHashPerField.bytesHash.size();
final BytesRef text = new BytesRef();
@@ -476,7 +476,7 @@
if (state.liveDocs == null) {
state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
}
- if (state.liveDocs.get(docID)) {
+ if (state.hasDeletesWithoutUpdates() && state.liveDocs.get(docID)) {
state.delCountOnFlush++;
state.liveDocs.clear(docID);
}
Index: lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (working copy)
@@ -55,13 +55,17 @@
// a segment private deletes. in that case is should
// only have Queries
- // An sorted set of updates
+ // A sorted set of updates
final SortedSet allUpdates;
- public FrozenBufferedDeletes(BufferedDeletes deletes, BufferedUpdates updates, boolean isSegmentPrivate) {
+ public FrozenBufferedDeletes(BufferedDeletes deletes,
+ BufferedUpdates updates, boolean isSegmentPrivate) {
this.isSegmentPrivate = isSegmentPrivate;
int localBytesUsed = 0;
+
+ // freeze deletes
if (deletes != null) {
+ // arrange terms and queries in arrays
assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries";
Term termsArray[] = deletes.terms.keySet().toArray(
new Term[deletes.terms.size()]);
@@ -97,10 +101,10 @@
allUpdates = null;
} else {
allUpdates = new TreeSet<>();
- for (SortedSet list : updates.terms.values()) {
+ for (List list : updates.terms.values()) {
allUpdates.addAll(list);
}
- localBytesUsed += 100;
+ localBytesUsed += updates.bytesUsed.get();
}
bytesUsed = localBytesUsed;
Index: lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java (working copy)
@@ -242,7 +242,7 @@
* All files created by codecs much match this pattern (checked in
* SegmentInfo).
*/
- public static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[a-z0-9]+(_.*)?\\..*");
+ public static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[_]?[a-z0-9]+(_.*)?\\..*");
/** Returns true if the file denotes an updated segment. */
public static boolean isUpdatedSegmentFile(String file) {
Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -31,6 +31,7 @@
import java.util.Locale;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.Analyzer;
@@ -57,133 +58,132 @@
import org.apache.lucene.util.ThreadInterruptedException;
/**
- * An IndexWriter creates and maintains an index.
- *
- *
- * The {@link OpenMode} option on
- * {@link IndexWriterConfig#setOpenMode(OpenMode)} determines whether a new
- * index is created, or whether an existing index is opened. Note that you can
- * open an index with {@link OpenMode#CREATE} even while readers are using the
- * index. The old readers will continue to search the "point in time" snapshot
- * they had opened, and won't see the newly created index until they re-open. If
- * {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a new index
- * if there is not already an index at the provided path and otherwise open the
- * existing index.
- *
- *
- *
- * In either case, documents are added with {@link #addDocument(IndexDocument)
- * addDocument} and removed with {@link #deleteDocuments(Term)} or
- * {@link #deleteDocuments(Query)}. A document can be updated with
- * {@link #updateDocument(Term, IndexDocument) updateDocument} (which just
- * deletes and then adds the entire document). When finished adding, deleting
- * and updating documents, {@link #close() close} should be called.
- *
- *
- *
- *
- * These changes are buffered in memory and periodically flushed to the
- * {@link Directory} (during the above method calls). A flush is triggered when
- * there are enough added documents since the last flush. Flushing is triggered
- * either by RAM usage of the documents (see
- * {@link IndexWriterConfig#setRAMBufferSizeMB}) or the number of added
- * documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}). The
- * default is to flush when RAM usage hits
- * {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For best indexing
- * speed you should flush by RAM usage with a large RAM buffer. Additionally, if
- * IndexWriter reaches the configured number of buffered deletes (see
- * {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) the deleted terms and
- * queries are flushed and applied to existing segments. In contrast to the
- * other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and
- * {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms won't
- * trigger a segment flush. Note that flushing just moves the internal buffered
- * state in IndexWriter into the index, but these changes are not visible to
- * IndexReader until either {@link #commit()} or {@link #close} is called. A
- * flush may also trigger one or more segment merges which by default run with a
- * background thread so as not to block the addDocument calls (see below for changing the {@link MergeScheduler}).
- *
- *
- *
- * Opening an IndexWriter creates a lock file for the directory in
- * use. Trying to open another IndexWriter on the same directory
- * will lead to a {@link LockObtainFailedException}. The
- * {@link LockObtainFailedException} is also thrown if an IndexReader on the
- * same directory is used to delete documents from the index.
- *
- *
- *
- *
- * Expert: IndexWriter allows an optional
- * {@link IndexDeletionPolicy} implementation to be specified. You can use this
- * to control when prior commits are deleted from the index. The default policy
- * is {@link KeepOnlyLastCommitDeletionPolicy} which removes all prior commits
- * as soon as a new commit is done (this matches behavior before 2.2). Creating
- * your own policy can allow you to explicitly keep previous "point in time"
- * commits alive in the index for some time, to allow readers to refresh to the
- * new commit without having the old commit deleted out from under them. This is
- * necessary on filesystems like NFS that do not support "delete on last
- * close" semantics, which Lucene's "point in time" search normally relies on.
- *
- *
- *
- *
- * Expert: IndexWriter allows you to separately change the
- * {@link MergePolicy} and the {@link MergeScheduler}. The {@link MergePolicy}
- * is invoked whenever there are changes to the segments in the index. Its role
- * is to select which merges to do, if any, and return a
- * {@link MergePolicy.MergeSpecification} describing the merges. The default is
- * {@link LogByteSizeMergePolicy}. Then, the {@link MergeScheduler} is invoked
- * with the requested merges and it decides when and how to run the merges. The
- * default is {@link ConcurrentMergeScheduler}.
- *
- *
- *
- *
- * NOTE: if you hit an OutOfMemoryError then IndexWriter will quietly
- * record this fact and block all future segment commits. This is a defensive
- * measure in case any internal state (buffered documents and deletions) were
- * corrupted. Any subsequent calls to {@link #commit()} will throw an
- * IllegalStateException. The only course of action is to call {@link #close()},
- * which internally will call {@link #rollback()}, to undo any changes to the
- * index since the last commit. You can also just call {@link #rollback()}
- * directly.
- *
- *
- *
- *
- * NOTE: {@link IndexWriter} instances are completely thread safe,
- * meaning multiple threads can call any of its methods, concurrently. If your
- * application requires external synchronization, you should not
- * synchronize on the IndexWriter instance as this may cause
- * deadlock; use your own (non-Lucene) objects instead.
- *
- *
- *
- * NOTE: If you call Thread.interrupt() on a thread that's
- * within IndexWriter, IndexWriter will try to catch this (eg, if it's in a
- * wait() or Thread.sleep()), and will then throw the unchecked exception
- * {@link ThreadInterruptedException} and clear the interrupt status on
- * the thread.
- *
- */
+ An IndexWriter creates and maintains an index.
+
The {@link OpenMode} option on
+ {@link IndexWriterConfig#setOpenMode(OpenMode)} determines
+ whether a new index is created, or whether an existing index is
+ opened. Note that you can open an index with {@link OpenMode#CREATE}
+ even while readers are using the index. The old readers will
+ continue to search the "point in time" snapshot they had opened,
+ and won't see the newly created index until they re-open. If
+ {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a
+ new index if there is not already an index at the provided path
+ and otherwise open the existing index.
+
+
In either case, documents are added with {@link #addDocument(IndexDocument)
+ addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
+ #deleteDocuments(Query)}. A document can be updated with {@link
+ #updateDocument(Term, IndexDocument) updateDocument} (which just deletes
+ and then adds the entire document). When finished adding, deleting
+ and updating documents, {@link #close() close} should be called.
+
+
+
These changes are buffered in memory and periodically
+ flushed to the {@link Directory} (during the above method
+ calls). A flush is triggered when there are enough added documents
+ since the last flush. Flushing is triggered either by RAM usage of the
+ documents (see {@link IndexWriterConfig#setRAMBufferSizeMB}) or the
+ number of added documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}).
+ The default is to flush when RAM usage hits
+ {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For
+ best indexing speed you should flush by RAM usage with a
+ large RAM buffer. Additionally, if IndexWriter reaches the configured number of
+ buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms})
+ the deleted terms and queries are flushed and applied to existing segments.
+ In contrast to the other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and
+ {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms
+ won't trigger a segment flush. Note that flushing just moves the
+ internal buffered state in IndexWriter into the index, but
+ these changes are not visible to IndexReader until either
+ {@link #commit()} or {@link #close} is called. A flush may
+ also trigger one or more segment merges which by default
+ run with a background thread so as not to block the
+ addDocument calls (see below
+ for changing the {@link MergeScheduler}).
+
+
Opening an IndexWriter creates a lock file for the directory in use. Trying to open
+ another IndexWriter on the same directory will lead to a
+ {@link LockObtainFailedException}. The {@link LockObtainFailedException}
+ is also thrown if an IndexReader on the same directory is used to delete documents
+ from the index.
+
+
+
Expert: IndexWriter allows an optional
+ {@link IndexDeletionPolicy} implementation to be
+ specified. You can use this to control when prior commits
+ are deleted from the index. The default policy is {@link
+ KeepOnlyLastCommitDeletionPolicy} which removes all prior
+ commits as soon as a new commit is done (this matches
+ behavior before 2.2). Creating your own policy can allow
+ you to explicitly keep previous "point in time" commits
+ alive in the index for some time, to allow readers to
+ refresh to the new commit without having the old commit
+ deleted out from under them. This is necessary on
+ filesystems like NFS that do not support "delete on last
+ close" semantics, which Lucene's "point in time" search
+ normally relies on.
+
+
Expert:
+ IndexWriter allows you to separately change
+ the {@link MergePolicy} and the {@link MergeScheduler}.
+ The {@link MergePolicy} is invoked whenever there are
+ changes to the segments in the index. Its role is to
+ select which merges to do, if any, and return a {@link
+ MergePolicy.MergeSpecification} describing the merges.
+ The default is {@link LogByteSizeMergePolicy}. Then, the {@link
+ MergeScheduler} is invoked with the requested merges and
+ it decides when and how to run the merges. The default is
+ {@link ConcurrentMergeScheduler}.
+
+
NOTE: if you hit an
+ OutOfMemoryError then IndexWriter will quietly record this
+ fact and block all future segment commits. This is a
+ defensive measure in case any internal state (buffered
+ documents and deletions) were corrupted. Any subsequent
+ calls to {@link #commit()} will throw an
+ IllegalStateException. The only course of action is to
+ call {@link #close()}, which internally will call {@link
+ #rollback()}, to undo any changes to the index since the
+ last commit. You can also just call {@link #rollback()}
+ directly.
+
+
NOTE: {@link
+ IndexWriter} instances are completely thread
+ safe, meaning multiple threads can call any of its
+ methods, concurrently. If your application requires
+ external synchronization, you should not
+ synchronize on the IndexWriter instance as
+ this may cause deadlock; use your own (non-Lucene) objects
+ instead.
+
+
NOTE: If you call
+ Thread.interrupt() on a thread that's within
+ IndexWriter, IndexWriter will try to catch this (eg, if
+ it's in a wait() or Thread.sleep()), and will then throw
+ the unchecked exception {@link ThreadInterruptedException}
+ and clear the interrupt status on the thread.
+*/
+
/*
- * Clarification: Check Points (and commits) IndexWriter writes new index files
- * to the directory without writing a new segments_N file which references these
- * new files. It also means that the state of the in memory SegmentInfos object
- * is different than the most recent segments_N file written to the directory.
- *
- * Each time the SegmentInfos is changed, and matches the (possibly modified)
- * directory files, we have a new "check point". If the modified/new
- * SegmentInfos is written to disk - as a new (generation of) segments_N file -
- * this check point is also an IndexCommit.
- *
- * A new checkpoint always replaces the previous checkpoint and becomes the new
- * "front" of the index. This allows the IndexFileDeleter to delete files that
- * are referenced only by stale checkpoints. (files that were created since the
- * last commit, but are no longer referenced by the "front" of the index). For
- * this, IndexFileDeleter keeps track of the last non commit checkpoint.
+ * Clarification: Check Points (and commits)
+ * IndexWriter writes new index files to the directory without writing a new segments_N
+ * file which references these new files. It also means that the state of
+ * the in memory SegmentInfos object is different than the most recent
+ * segments_N file written to the directory.
+ *
+ * Each time the SegmentInfos is changed, and matches the (possibly
+ * modified) directory files, we have a new "check point".
+ * If the modified/new SegmentInfos is written to disk - as a new
+ * (generation of) segments_N file - this check point is also an
+ * IndexCommit.
+ *
+ * A new checkpoint always replaces the previous checkpoint and
+ * becomes the new "front" of the index. This allows the IndexFileDeleter
+ * to delete files that are referenced only by stale checkpoints.
+ * (files that were created since the last commit, but are no longer
+ * referenced by the "front" of the index). For this, IndexFileDeleter
+ * keeps track of the last non commit checkpoint.
*/
public class IndexWriter implements Closeable, TwoPhaseCommit {
@@ -193,7 +193,7 @@
* Name of the write lock in the index.
*/
public static final String WRITE_LOCK_NAME = "write.lock";
-
+
/** Key for the source of a segment in the {@link SegmentInfo#getDiagnostics() diagnostics}. */
public static final String SOURCE = "source";
/** Source of a segment which results from a merge of other segments. */
@@ -204,50 +204,50 @@
public static final String SOURCE_ADDINDEXES_READERS = "addIndexes(IndexReader...)";
/**
- * Absolute hard maximum length for a term, in bytes once encoded as UTF8. If
- * a term arrives from the analyzer longer than this length, it is skipped and
- * a message is printed to infoStream, if set (see
- * {@link IndexWriterConfig#setInfoStream(InfoStream)}).
+ * Absolute hard maximum length for a term, in bytes once
+ * encoded as UTF8. If a term arrives from the analyzer
+ * longer than this length, it is skipped and a message is
+ * printed to infoStream, if set (see {@link
+ * IndexWriterConfig#setInfoStream(InfoStream)}).
*/
public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8;
volatile private boolean hitOOM;
-
- private final Directory directory; // where this index resides
- private final Analyzer analyzer; // how to analyze text
-
- private volatile long changeCount; // increments every time a change is
- // completed
+
+ private final Directory directory; // where this index resides
+ private final Analyzer analyzer; // how to analyze text
+
+ private volatile long changeCount; // increments every time a change is completed
private long lastCommitChangeCount; // last changeCount that was committed
-
- private List rollbackSegments; // list of segmentInfo we
- // will fallback to if
- // the commit fails
-
- volatile SegmentInfos pendingCommit; // set when a commit is pending (after
- // prepareCommit() & before commit())
+
+ private List rollbackSegments; // list of segmentInfo we will fallback to if the commit fails
+
+ volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
volatile long pendingCommitChangeCount;
+
+ volatile AtomicBoolean deletesPending; // set when there are pending deletes
+ // to be flushed before adding updates
private Collection filesToCommit;
-
- final SegmentInfos segmentInfos; // the segments
+
+ final SegmentInfos segmentInfos; // the segments
final FieldNumbers globalFieldNumberMap;
-
+
private DocumentsWriter docWriter;
final IndexFileDeleter deleter;
-
+
// used by forceMerge to note those needing merging
private Map segmentsToMerge = new HashMap();
private int mergeMaxNumSegments;
-
+
private Lock writeLock;
-
+
private volatile boolean closed;
private volatile boolean closing;
-
+
// Holds all SegmentInfo instances currently involved in
// merges
private HashSet mergingSegments = new HashSet();
-
+
private MergePolicy mergePolicy;
private final MergeScheduler mergeScheduler;
private LinkedList pendingMerges = new LinkedList();
@@ -255,106 +255,95 @@
private List mergeExceptions = new ArrayList();
private long mergeGen;
private boolean stopMerges;
-
+
final AtomicInteger flushCount = new AtomicInteger();
final AtomicInteger flushDeletesCount = new AtomicInteger();
-
+
final ReaderPool readerPool = new ReaderPool();
final BufferedDeletesStream bufferedDeletesStream;
-
- private boolean updatesPending;
-
+
// This is a "write once" variable (like the organic dye
// on a DVD-R that may or may not be heated by a laser and
// then cooled to permanently record the event): it's
// false, until getReader() is called for the first time,
// at which point it's switched to true and never changes
- // back to false. Once this is true, we hold open and
+ // back to false. Once this is true, we hold open and
// reuse SegmentReader instances internally for applying
// deletes, doing merges, and reopening near real-time
// readers.
private volatile boolean poolReaders;
-
+
// The instance that was passed to the constructor. It is saved only in order
// to allow users to query an IndexWriter settings.
private final LiveIndexWriterConfig config;
-
+
DirectoryReader getReader() throws IOException {
return getReader(true);
}
-
+
/**
- * Expert: returns a readonly reader, covering all committed as well as
- * un-committed changes to the index. This provides "near real-time"
- * searching, in that changes made during an IndexWriter session can be
- * quickly made available for searching without closing the writer nor calling
- * {@link #commit}.
- *
- *
- * Note that this is functionally equivalent to calling {#flush} and then
- * opening a new reader. But the turnaround time of this method should be
- * faster since it avoids the potentially costly {@link #commit}.
- *
- *
- *
- * You must close the {@link IndexReader} returned by this method once you are
- * done using it.
- *
- *
- *
- * It's near real-time because there is no hard guarantee on how
- * quickly you can get a new reader after making changes with IndexWriter.
- * You'll have to experiment in your situation to determine if it's fast
- * enough. As this is a new and experimental feature, please report back on
- * your findings so we can learn, improve and iterate.
- *
- *
- *
- * The resulting reader supports {@link DirectoryReader#openIfChanged}, but
- * that call will simply forward back to this method (though this may change
- * in the future).
- *
- *
- *
- * The very first time this method is called, this writer instance will make
- * every effort to pool the readers that it opens for doing merges, applying
- * deletes, etc. This means additional resources (RAM, file descriptors, CPU
- * time) will be consumed.
- *
- *
- *
- * For lower latency on reopening a reader, you should call
- * {@link IndexWriterConfig#setMergedSegmentWarmer} to pre-warm a newly merged
- * segment before it's committed to the index. This is important for
- * minimizing index-to-search delay after a large merge.
- *
- *
- *
- * If an addIndexes* call is running in another thread, then this reader will
- * only search those segments from the foreign index that have been
- * successfully copied over, so far
- *
- * .
- *
- *
- * NOTE: Once the writer is closed, any outstanding readers may
- * continue to be used. However, if you attempt to reopen any of those
- * readers, you'll hit an {@link AlreadyClosedException}.
- *
- *
+ * Expert: returns a readonly reader, covering all
+ * committed as well as un-committed changes to the index.
+ * This provides "near real-time" searching, in that
+ * changes made during an IndexWriter session can be
+ * quickly made available for searching without closing
+ * the writer nor calling {@link #commit}.
+ *
+ *
Note that this is functionally equivalent to calling
+ * {#flush} and then opening a new reader. But the turnaround time of this
+ * method should be faster since it avoids the potentially
+ * costly {@link #commit}.
+ *
+ *
You must close the {@link IndexReader} returned by
+ * this method once you are done using it.
+ *
+ *
It's near real-time because there is no hard
+ * guarantee on how quickly you can get a new reader after
+ * making changes with IndexWriter. You'll have to
+ * experiment in your situation to determine if it's
+ * fast enough. As this is a new and experimental
+ * feature, please report back on your findings so we can
+ * learn, improve and iterate.
+ *
+ *
The resulting reader supports {@link
+ * DirectoryReader#openIfChanged}, but that call will simply forward
+ * back to this method (though this may change in the
+ * future).
+ *
+ *
The very first time this method is called, this
+ * writer instance will make every effort to pool the
+ * readers that it opens for doing merges, applying
+ * deletes, etc. This means additional resources (RAM,
+ * file descriptors, CPU time) will be consumed.
+ *
+ *
For lower latency on reopening a reader, you should
+ * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
+ * pre-warm a newly merged segment before it's committed
+ * to the index. This is important for minimizing
+ * index-to-search delay after a large merge.
+ *
+ *
If an addIndexes* call is running in another thread,
+ * then this reader will only search those segments from
+ * the foreign index that have been successfully copied
+ * over, so far
.
+ *
+ *
NOTE: Once the writer is closed, any
+ * outstanding readers may continue to be used. However,
+ * if you attempt to reopen any of those readers, you'll
+ * hit an {@link AlreadyClosedException}.
+ *
* @lucene.experimental
- *
- * @return IndexReader that covers entire index plus all changes made so far
- * by this IndexWriter instance
- *
- * @throws IOException
- * If there is a low-level I/O error
+ *
+ * @return IndexReader that covers entire index plus all
+ * changes made so far by this IndexWriter instance
+ *
+ * @throws IOException If there is a low-level I/O error
*/
DirectoryReader getReader(boolean applyAllDeletes) throws IOException {
ensureOpen();
-
+
final long tStart = System.currentTimeMillis();
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at getReader");
}
@@ -366,10 +355,11 @@
doBeforeFlush();
boolean anySegmentFlushed = false;
/*
- * for releasing a NRT reader we must ensure that DW doesn't add any
- * segments or deletes until we are done with creating the NRT
- * DirectoryReader. We release the two stage full flush after we are done
- * opening the directory reader!
+ * for releasing a NRT reader we must ensure that
+ * DW doesn't add any segments or deletes until we are
+ * done with creating the NRT DirectoryReader.
+ * We release the two stage full flush after we are done opening the
+ * directory reader!
*/
boolean success2 = false;
try {
@@ -422,27 +412,26 @@
}
return r;
}
-
- /**
- * Holds shared SegmentReader instances. IndexWriter uses SegmentReaders for
- * 1) applying deletes, 2) doing merges, 3) handing out a real-time reader.
- * This pool reuses instances of the SegmentReaders in all these places if it
- * is in "near real-time mode" (getReader() has been called on this instance).
- */
-
+
+ /** Holds shared SegmentReader instances. IndexWriter uses
+ * SegmentReaders for 1) applying deletes, 2) doing
+ * merges, 3) handing out a real-time reader. This pool
+ * reuses instances of the SegmentReaders in all these
+ * places if it is in "near real-time mode" (getReader()
+ * has been called on this instance). */
+
class ReaderPool {
private final Map readerMap = new HashMap();
-
+
// used only by asserts
public synchronized boolean infoIsLive(SegmentInfoPerCommit info) {
int idx = segmentInfos.indexOf(info);
- assert idx != -1 : "info=" + info + " isn't live";
- assert segmentInfos.info(idx) == info : "info=" + info
- + " doesn't match live info in segmentInfos";
+ assert idx != -1: "info=" + info + " isn't live";
+ assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
return true;
}
-
+
public synchronized void drop(SegmentInfoPerCommit info) throws IOException {
final ReadersAndLiveDocs rld = readerMap.get(info);
if (rld != null) {
@@ -451,7 +440,7 @@
rld.dropReaders();
}
}
-
+
public synchronized boolean anyPendingDeletes() {
for(ReadersAndLiveDocs rld : readerMap.values()) {
if (rld.getPendingDeleteCount() != 0) {
@@ -463,10 +452,10 @@
}
public synchronized void release(ReadersAndLiveDocs rld) throws IOException {
-
+
// Matches incRef in get:
rld.decRef();
-
+
// Pool still holds a ref:
assert rld.refCount() >= 1;
@@ -482,15 +471,14 @@
// created created new _X_N.del file.
deleter.checkpoint(segmentInfos, false);
}
-
+
rld.dropReaders();
readerMap.remove(rld.info);
}
}
-
- /**
- * Remove all our references to readers, and commits any pending changes.
- */
+
+ /** Remove all our references to readers, and commits
+ * any pending changes. */
synchronized void dropAll(boolean doSave) throws IOException {
Throwable priorE = null;
final Iterator> it = readerMap.entrySet().iterator();
@@ -510,13 +498,13 @@
priorE = t;
}
}
-
+
// Important to remove as-we-go, not with .clear()
// in the end, in case we hit an exception;
// otherwise we could over-decref if close() is
// called again:
it.remove();
-
+
// NOTE: it is allowed that these decRefs do not
// actually close the SRs; this happens when a
// near real-time reader is kept open after the
@@ -534,12 +522,12 @@
throw new RuntimeException(priorE);
}
}
-
+
/**
- * Commit live docs changes for the segment readers for the provided infos.
- *
- * @throws IOException
- * If there is a low-level I/O error
+ * Commit live docs changes for the segment readers for
+ * the provided infos.
+ *
+ * @throws IOException If there is a low-level I/O error
*/
public synchronized void commit(SegmentInfos infos) throws IOException {
for (SegmentInfoPerCommit info : infos) {
@@ -556,17 +544,16 @@
}
}
}
-
+
/**
- * Obtain a ReadersAndLiveDocs instance from the readerPool. If create is
- * true, you must later call {@link #release(ReadersAndLiveDocs)}.
+ * Obtain a ReadersAndLiveDocs instance from the
+ * readerPool. If create is true, you must later call
+ * {@link #release(ReadersAndLiveDocs)}.
*/
- public synchronized ReadersAndLiveDocs get(SegmentInfoPerCommit info,
- boolean create) {
-
- assert info.info.dir == directory : "info.dir=" + info.info.dir + " vs "
- + directory;
-
+ public synchronized ReadersAndLiveDocs get(SegmentInfoPerCommit info, boolean create) {
+
+ assert info.info.dir == directory: "info.dir=" + info.info.dir + " vs " + directory;
+
ReadersAndLiveDocs rld = readerMap.get(info);
if (rld == null) {
if (!create) {
@@ -576,15 +563,14 @@
// Steal initial reference:
readerMap.put(info, rld);
} else {
- assert rld.info == info : "rld.info=" + rld.info + " info=" + info
- + " isLive?=" + infoIsLive(rld.info) + " vs " + infoIsLive(info);
+ assert rld.info == info: "rld.info=" + rld.info + " info=" + info + " isLive?=" + infoIsLive(rld.info) + " vs " + infoIsLive(info);
}
-
+
if (create) {
// Return ref to caller:
rld.incRef();
}
-
+
assert noDups();
return rld;
@@ -601,22 +587,23 @@
return true;
}
}
-
+
/**
- * Obtain the number of deleted docs for a pooled reader. If the reader isn't
- * being pooled, the segmentInfo's delCount is returned.
+ * Obtain the number of deleted docs for a pooled reader.
+ * If the reader isn't being pooled, the segmentInfo's
+ * delCount is returned.
*/
public int numDeletedDocs(SegmentInfoPerCommit info) {
ensureOpen(false);
int delCount = info.getDelCount();
-
+
final ReadersAndLiveDocs rld = readerPool.get(info, false);
if (rld != null) {
delCount += rld.getPendingDeleteCount();
}
return delCount;
}
-
+
/**
* Used internally to throw an {@link AlreadyClosedException} if this
* IndexWriter has been closed or is in the process of closing.
@@ -628,34 +615,32 @@
* @throws AlreadyClosedException
* if this IndexWriter is closed or in the process of closing
*/
- protected final void ensureOpen(boolean failIfClosing)
- throws AlreadyClosedException {
+ protected final void ensureOpen(boolean failIfClosing) throws AlreadyClosedException {
if (closed || (failIfClosing && closing)) {
throw new AlreadyClosedException("this IndexWriter is closed");
}
}
-
+
/**
- * Used internally to throw an {@link AlreadyClosedException} if this
- * IndexWriter has been closed ({@code closed=true}) or is in the process of
+ * Used internally to throw an {@link
+ * AlreadyClosedException} if this IndexWriter has been
+ * closed ({@code closed=true}) or is in the process of
* closing ({@code closing=true}).
*
* Calls {@link #ensureOpen(boolean) ensureOpen(true)}.
- *
- * @throws AlreadyClosedException
- * if this IndexWriter is closed
+ * @throws AlreadyClosedException if this IndexWriter is closed
*/
protected final void ensureOpen() throws AlreadyClosedException {
ensureOpen(true);
}
-
+
final Codec codec; // for writing new segments
-
+
/**
* Constructs a new IndexWriter per the settings given in conf.
- * Note that the passed in {@link IndexWriterConfig} is privately cloned; if
- * you need to make subsequent "live" changes to the configuration use
- * {@link #getConfig}.
+ * Note that the passed in {@link IndexWriterConfig} is
+ * privately cloned; if you need to make subsequent "live"
+ * changes to the configuration use {@link #getConfig}.
*
*
* @param d
@@ -679,15 +664,16 @@
mergePolicy.setIndexWriter(this);
mergeScheduler = config.getMergeScheduler();
codec = config.getCodec();
-
+
bufferedDeletesStream = new BufferedDeletesStream(infoStream);
poolReaders = config.getReaderPooling();
+ deletesPending = new AtomicBoolean(false);
writeLock = directory.makeLock(WRITE_LOCK_NAME);
-
+
if (!writeLock.obtain(config.getWriteLockTimeout())) // obtain write lock
- throw new LockObtainFailedException("Index locked for write: " + writeLock);
-
+ throw new LockObtainFailedException("Index locked for write: " + writeLock);
+
boolean success = false;
try {
OpenMode mode = config.getOpenMode();
@@ -700,17 +686,17 @@
// CREATE_OR_APPEND - create only if an index does not exist
create = !DirectoryReader.indexExists(directory);
}
-
+
// If index is too old, reading the segments will throw
// IndexFormatTooOldException.
segmentInfos = new SegmentInfos();
-
+
boolean initialIndexExists = true;
if (create) {
- // Try to read first. This is to allow create
+ // Try to read first. This is to allow create
// against an index that's currently open for
- // searching. In this case we write the next
+ // searching. In this case we write the next
// segments_N file with no segments:
try {
segmentInfos.read(directory);
@@ -719,49 +705,47 @@
// Likely this means it's a fresh directory
initialIndexExists = false;
}
-
+
// Record that we have a change (zero out all
// segments) pending:
changed();
} else {
segmentInfos.read(directory);
-
+
IndexCommit commit = config.getIndexCommit();
if (commit != null) {
// Swap out all segments, but, keep metadata in
// SegmentInfos, like version & generation, to
- // preserve write-once. This is important if
+ // preserve write-once. This is important if
// readers are open against the future commit
// points.
- if (commit.getDirectory() != directory) throw new IllegalArgumentException(
- "IndexCommit's directory doesn't match my directory");
+ if (commit.getDirectory() != directory)
+ throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory");
SegmentInfos oldInfos = new SegmentInfos();
oldInfos.read(directory, commit.getSegmentsFileName());
segmentInfos.replace(oldInfos);
changed();
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW",
- "init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
+ infoStream.message("IW", "init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
}
}
}
-
+
rollbackSegments = segmentInfos.createBackupSegmentInfos();
-
+
// start with previous field numbers, but new FieldInfos
globalFieldNumberMap = getFieldNumberMap();
- docWriter = new DocumentsWriter(codec, config, directory, this,
- globalFieldNumberMap, bufferedDeletesStream);
-
+ docWriter = new DocumentsWriter(codec, config, directory, this, globalFieldNumberMap, bufferedDeletesStream);
+
// Default deleter (for backwards compatibility) is
// KeepOnlyLastCommitDeleter:
- synchronized (this) {
+ synchronized(this) {
deleter = new IndexFileDeleter(directory,
config.getIndexDeletionPolicy(),
segmentInfos, infoStream, this,
initialIndexExists);
}
-
+
if (deleter.startingCommitDeleted) {
// Deletion policy deleted the "head" commit point.
// We have to mark ourself as changed so that if we
@@ -769,19 +753,18 @@
// segments_N file.
changed();
}
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "init: create=" + create);
messageState();
}
-
+
success = true;
-
+
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW",
- "init: hit exception on init; releasing write lock");
+ infoStream.message("IW", "init: hit exception on init; releasing write lock");
}
try {
writeLock.release();
@@ -792,31 +775,31 @@
}
}
}
-
+
private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
Directory cfsDir = null;
try {
if (info.getUseCompoundFile()) {
cfsDir = new CompoundFileDirectory(info.dir,
- IndexFileNames.segmentFileName(info.name, "",
- IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE,
- false);
+ IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION),
+ IOContext.READONCE,
+ false);
} else {
cfsDir = info.dir;
}
- return info.getCodec().fieldInfosFormat().getFieldInfosReader()
- .read(cfsDir, info.name, IOContext.READONCE);
+ return info.getCodec().fieldInfosFormat().getFieldInfosReader().read(cfsDir,
+ info.name,
+ IOContext.READONCE);
} finally {
if (info.getUseCompoundFile() && cfsDir != null) {
cfsDir.close();
}
}
}
-
+
/**
- * Loads or returns the already loaded the global field number map for this
- * {@link SegmentInfos}. If this {@link SegmentInfos} has no global field
- * number map the returned instance is empty
+ * Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
+ * If this {@link SegmentInfos} has no global field number map the returned instance is empty
*/
private FieldNumbers getFieldNumberMap() throws IOException {
final FieldNumbers map = new FieldNumbers();
@@ -826,55 +809,57 @@
map.addOrGet(fi.name, fi.number, fi.getDocValuesType());
}
}
-
+
return map;
}
/**
- * Returns a {@link LiveIndexWriterConfig}, which can be used to query the
- * IndexWriter current settings, as well as modify "live" ones.
+ * Returns a {@link LiveIndexWriterConfig}, which can be used to query the IndexWriter
+ * current settings, as well as modify "live" ones.
*/
public LiveIndexWriterConfig getConfig() {
ensureOpen(false);
return config;
}
-
+
private void messageState() {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "\ndir=" + directory + "\n" + "index="
- + segString() + "\n" + "version=" + Constants.LUCENE_VERSION + "\n"
- + config.toString());
+ infoStream.message("IW", "\ndir=" + directory + "\n" +
+ "index=" + segString() + "\n" +
+ "version=" + Constants.LUCENE_VERSION + "\n" +
+ config.toString());
}
}
-
+
/**
- * Commits all changes to an index, waits for pending merges to complete, and
- * closes all associated files.
+ * Commits all changes to an index, waits for pending merges
+ * to complete, and closes all associated files.
*
- * This is a "slow graceful shutdown" which may take a long time especially if
- * a big merge is pending: If you only want to close resources use
- * {@link #rollback()}. If you only want to commit pending changes and close
- * resources see {@link #close(boolean)}.
+ * This is a "slow graceful shutdown" which may take a long time
+ * especially if a big merge is pending: If you only want to close
+ * resources use {@link #rollback()}. If you only want to commit
+ * pending changes and close resources see {@link #close(boolean)}.
*
- * Note that this may be a costly operation, so, try to re-use a single writer
- * instead of closing and opening a new one. See {@link #commit()} for caveats
- * about write caching done by some IO devices.
- *
- *
- * If an Exception is hit during close, eg due to disk full or some other
- * reason, then both the on-disk index and the internal state of the
- * IndexWriter instance will be consistent. However, the close will not be
- * complete even though part of it (flushing buffered documents) may have
- * succeeded, so the write lock will still be held.
- *
- *
- *
- * If you can correct the underlying cause (eg free up some disk space) then
- * you can call close() again. Failing that, if you want to force the write
- * lock to be released (dangerous, because you may then lose buffered docs in
- * the IndexWriter instance) then you can do something like this:
- *
- *
+ * Note that this may be a costly
+ * operation, so, try to re-use a single writer instead of
+ * closing and opening a new one. See {@link #commit()} for
+ * caveats about write caching done by some IO devices.
+ *
+ *
If an Exception is hit during close, eg due to disk
+ * full or some other reason, then both the on-disk index
+ * and the internal state of the IndexWriter instance will
+ * be consistent. However, the close will not be complete
+ * even though part of it (flushing buffered documents)
+ * may have succeeded, so the write lock will still be
+ * held.
+ *
+ *
If you can correct the underlying cause (eg free up
+ * some disk space) then you can call close() again.
+ * Failing that, if you want to force the write lock to be
+ * released (dangerous, because you may then lose buffered
+ * docs in the IndexWriter instance) then you can do
+ * something like this:
- *
- * after which, you must be certain not to use the writer instance
- * anymore.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer, again. See above for details.
- *
- *
- * @throws IOException
- * if there is a low-level IO error
+ *
+ * after which, you must be certain not to use the writer
+ * instance anymore.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer, again. See above for details.
+ *
+ * @throws IOException if there is a low-level IO error
*/
@Override
public void close() throws IOException {
close(true);
}
-
+
/**
- * Closes the index with or without waiting for currently running merges to
- * finish. This is only meaningful when using a MergeScheduler that runs
- * merges in background threads.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer, again. See above for details.
- *
- *
- *
- * NOTE: it is dangerous to always call close(false), especially when
- * IndexWriter is not open for very long, because this can result in "merge
- * starvation" whereby long merges will never have a chance to finish. This
- * will cause too many segments in your index over time.
- *
- *
- * @param waitForMerges
- * if true, this call will block until all merges complete; else, it
- * will ask all running merges to abort, wait until those merges have
- * finished (which should be at most a few seconds), and then return.
+ * Closes the index with or without waiting for currently
+ * running merges to finish. This is only meaningful when
+ * using a MergeScheduler that runs merges in background
+ * threads.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer, again. See above for details.
+ *
+ *
NOTE: it is dangerous to always call
+ * close(false), especially when IndexWriter is not open
+ * for very long, because this can result in "merge
+ * starvation" whereby long merges will never have a
+ * chance to finish. This will cause too many segments in
+ * your index over time.
+ *
+ * @param waitForMerges if true, this call will block
+ * until all merges complete; else, it will ask all
+ * running merges to abort, wait until those merges have
+ * finished (which should be at most a few seconds), and
+ * then return.
*/
public void close(boolean waitForMerges) throws IOException {
-
+
// Ensure that only one thread actually gets to do the
// closing, and make sure no commit is also in progress:
- synchronized (commitLock) {
+ synchronized(commitLock) {
if (shouldClose()) {
// If any methods have hit OutOfMemoryError, then abort
// on close, in case the internal state of IndexWriter
@@ -940,12 +924,12 @@
}
}
}
-
+
// Returns true if this thread should attempt to close, or
// false if IndexWriter is now closed; else, waits until
// another thread finishes closing
synchronized private boolean shouldClose() {
- while (true) {
+ while(true) {
if (!closed) {
if (!closing) {
closing = true;
@@ -961,39 +945,34 @@
}
}
}
-
- private void closeInternal(boolean waitForMerges, boolean doFlush)
- throws IOException {
+
+ private void closeInternal(boolean waitForMerges, boolean doFlush) throws IOException {
boolean interrupted = false;
try {
-
+
if (pendingCommit != null) {
- throw new IllegalStateException(
- "cannot close: prepareCommit was already called with no corresponding call to commit");
+ throw new IllegalStateException("cannot close: prepareCommit was already called with no corresponding call to commit");
}
-
+
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "now flush at close waitForMerges="
- + waitForMerges);
+ infoStream.message("IW", "now flush at close waitForMerges=" + waitForMerges);
}
-
+
try {
// Only allow a new merge to be triggered if we are
// going to wait for merges:
if (doFlush) {
flush(waitForMerges, true);
+ docWriter.close();
} else {
- docWriter.abort(); // already closed
+ docWriter.abort(); // already closed -- never sync on IW
}
- docWriter.close();
-
} finally {
try {
- // clean up merge scheduler in all cases, although flushing may have
- // failed:
+ // clean up merge scheduler in all cases, although flushing may have failed:
interrupted = Thread.interrupted();
-
+
if (waitForMerges) {
try {
// Give merge scheduler last chance to run, in case
@@ -1003,13 +982,12 @@
// ignore any interruption, does not matter
interrupted = true;
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW",
- "interrupted while waiting for final merges");
+ infoStream.message("IW", "interrupted while waiting for final merges");
}
}
}
- synchronized (this) {
+ synchronized(this) {
for (;;) {
try {
finishMerges(waitForMerges && !interrupted);
@@ -1020,8 +998,7 @@
// so it will not wait
interrupted = true;
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW",
- "interrupted while waiting for merges to finish");
+ infoStream.message("IW", "interrupted while waiting for merges to finish");
}
}
}
@@ -1029,44 +1006,42 @@
}
} finally {
- // shutdown policy, scheduler and all threads (this call is not
- // interruptible):
+ // shutdown policy, scheduler and all threads (this call is not interruptible):
IOUtils.closeWhileHandlingException(mergePolicy, mergeScheduler);
}
}
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "now call final commit()");
}
-
+
if (doFlush) {
commitInternal();
}
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "at close: " + segString());
}
// used by assert below
final DocumentsWriter oldWriter = docWriter;
- synchronized (this) {
+ synchronized(this) {
readerPool.dropAll(true);
docWriter = null;
deleter.close();
}
-
+
if (writeLock != null) {
- writeLock.release(); // release write lock
+ writeLock.release(); // release write lock
writeLock = null;
}
- synchronized (this) {
+ synchronized(this) {
closed = true;
}
- assert oldWriter.perThreadPool.numDeactivatedThreadStates() == oldWriter.perThreadPool
- .getMaxThreadStates();
+ assert oldWriter.perThreadPool.numDeactivatedThreadStates() == oldWriter.perThreadPool.getMaxThreadStates();
} catch (OutOfMemoryError oom) {
handleOOM(oom, "closeInternal");
} finally {
- synchronized (this) {
+ synchronized(this) {
closing = false;
notifyAll();
if (!closed) {
@@ -1079,54 +1054,54 @@
if (interrupted) Thread.currentThread().interrupt();
}
}
-
+
/** Returns the Directory used by this index. */
public Directory getDirectory() {
return directory;
}
-
+
/** Returns the analyzer used by this index. */
public Analyzer getAnalyzer() {
ensureOpen();
return analyzer;
}
-
- /**
- * Returns total number of docs in this index, including docs not yet flushed
- * (still in the RAM buffer), not counting deletions.
- *
- * @see #numDocs
- */
+
+ /** Returns total number of docs in this index, including
+ * docs not yet flushed (still in the RAM buffer),
+ * not counting deletions.
+ * @see #numDocs */
public synchronized int maxDoc() {
ensureOpen();
int count;
- if (docWriter != null) count = docWriter.getNumDocs();
- else count = 0;
-
+ if (docWriter != null)
+ count = docWriter.getNumDocs();
+ else
+ count = 0;
+
count += segmentInfos.totalDocCount();
return count;
}
-
- /**
- * Returns total number of docs in this index, including docs not yet flushed
- * (still in the RAM buffer), and including deletions. NOTE: buffered
- * deletions are not counted. If you really need these to be counted you
- * should call {@link #commit()} first.
- *
- * @see #numDocs
- */
+
+ /** Returns total number of docs in this index, including
+ * docs not yet flushed (still in the RAM buffer), and
+ * including deletions. NOTE: buffered deletions
+ * are not counted. If you really need these to be
+ * counted you should call {@link #commit()} first.
+ * @see #numDocs */
public synchronized int numDocs() {
ensureOpen();
int count;
- if (docWriter != null) count = docWriter.getNumDocs();
- else count = 0;
-
+ if (docWriter != null)
+ count = docWriter.getNumDocs();
+ else
+ count = 0;
+
for (final SegmentInfoPerCommit info : segmentInfos) {
count += info.info.getDocCount() - numDeletedDocs(info);
}
return count;
}
-
+
/**
* Returns true if this index has deletions (including buffered deletions).
*/
@@ -1148,159 +1123,143 @@
}
return false;
}
-
+
/**
* Adds a document to this index.
- *
- *
- * Note that if an Exception is hit (for example disk full) then the index
- * will be consistent, but this document may not have been added. Furthermore,
- * it's possible the index will have one segment in non-compound format even
- * when using compound files (when a merge has partially succeeded).
- *
- *
- *
- * This method periodically flushes pending documents to the Directory (see above), and also periodically triggers segment merges in
- * the index according to the {@link MergePolicy} in use.
- *
- *
- *
- * Merges temporarily consume space in the directory. The amount of space
- * required is up to 1X the size of all segments being merged, when no
- * readers/searchers are open against the index, and up to 2X the size of all
- * segments being merged when readers/searchers are open against the index
- * (see {@link #forceMerge(int)} for details). The sequence of primitive merge
- * operations performed is governed by the merge policy.
- *
- *
- * Note that each term in the document can be no longer than 16383 characters,
- * otherwise an IllegalArgumentException will be thrown.
- *
- *
- *
- * Note that it's possible to create an invalid Unicode string in java if a
- * UTF16 surrogate pair is malformed. In this case, the invalid characters are
- * silently replaced with the Unicode replacement character U+FFFD.
- *
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ *
+ *
Note that if an Exception is hit (for example disk full)
+ * then the index will be consistent, but this document
+ * may not have been added. Furthermore, it's possible
+ * the index will have one segment in non-compound format
+ * even when using compound files (when a merge has
+ * partially succeeded).
+ *
+ *
This method periodically flushes pending documents
+ * to the Directory (see above), and
+ * also periodically triggers segment merges in the index
+ * according to the {@link MergePolicy} in use.
+ *
+ *
Merges temporarily consume space in the
+ * directory. The amount of space required is up to 1X the
+ * size of all segments being merged, when no
+ * readers/searchers are open against the index, and up to
+ * 2X the size of all segments being merged when
+ * readers/searchers are open against the index (see
+ * {@link #forceMerge(int)} for details). The sequence of
+ * primitive merge operations performed is governed by the
+ * merge policy.
+ *
+ *
Note that each term in the document can be no longer
+ * than 16383 characters, otherwise an
+ * IllegalArgumentException will be thrown.
+ *
+ *
Note that it's possible to create an invalid Unicode
+ * string in java if a UTF16 surrogate pair is malformed.
+ * In this case, the invalid characters are silently
+ * replaced with the Unicode replacement character
+ * U+FFFD.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
public void addDocument(IndexDocument doc) throws IOException {
addDocument(doc, analyzer);
}
-
+
/**
* Adds a document to this index, using the provided analyzer instead of the
* value of {@link #getAnalyzer()}.
- *
- *
- * See {@link #addDocument(IndexDocument)} for details on index and
- * IndexWriter state after an Exception, and flushing/merging temporary free
- * space requirements.
- *
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ *
+ *
See {@link #addDocument(IndexDocument)} for details on
+ * index and IndexWriter state after an Exception, and
+ * flushing/merging temporary free space requirements.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
- public void addDocument(IndexDocument doc, Analyzer analyzer)
- throws IOException {
- replaceDocument(null, doc, analyzer);
+ public void addDocument(IndexDocument doc, Analyzer analyzer) throws IOException {
+ updateDocument(null, doc, analyzer);
}
-
+
/**
- * Atomically adds a block of documents with sequentially assigned document
- * IDs, such that an external reader will see all or none of the documents.
- *
- *
- * WARNING: the index does not currently record which documents were
- * added as a block. Today this is fine, because merging will preserve a
- * block. The order of documents within a segment will be preserved, even when
- * child documents within a block are deleted. Most search features (like
- * result grouping and block joining) require you to mark documents; when
- * these documents are deleted these search features will not work as
- * expected. Obviously adding documents to an existing block will require you
- * the reindex the entire block.
- *
- *
- * However it's possible that in the future Lucene may merge more aggressively
- * re-order documents (for example, perhaps to obtain better index
- * compression), in which case you may need to fully re-index your documents
- * at that time.
- *
- *
- * See {@link #addDocument(IndexDocument)} for details on index and
- * IndexWriter state after an Exception, and flushing/merging temporary free
- * space requirements.
- *
- *
- *
- * NOTE: tools that do offline splitting of an index (for example,
- * IndexSplitter in contrib) or re-sorting of documents (for example,
- * IndexSorter in contrib) are not aware of these atomically added documents
- * and will likely break them up. Use such tools at your own risk!
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
- *
+ * Atomically adds a block of documents with sequentially
+ * assigned document IDs, such that an external reader
+ * will see all or none of the documents.
+ *
+ *
WARNING: the index does not currently record
+ * which documents were added as a block. Today this is
+ * fine, because merging will preserve a block. The order of
+ * documents within a segment will be preserved, even when child
+ * documents within a block are deleted. Most search features
+ * (like result grouping and block joining) require you to
+ * mark documents; when these documents are deleted these
+ * search features will not work as expected. Obviously adding
+ * documents to an existing block will require you the reindex
+ * the entire block.
+ *
+ *
However it's possible that in the future Lucene may
+ * merge more aggressively re-order documents (for example,
+ * perhaps to obtain better index compression), in which case
+ * you may need to fully re-index your documents at that time.
+ *
+ *
See {@link #addDocument(IndexDocument)} for details on
+ * index and IndexWriter state after an Exception, and
+ * flushing/merging temporary free space requirements.
+ *
+ *
NOTE: tools that do offline splitting of an index
+ * (for example, IndexSplitter in contrib) or
+ * re-sorting of documents (for example, IndexSorter in
+ * contrib) are not aware of these atomically added documents
+ * and will likely break them up. Use such tools at your
+ * own risk!
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ *
* @lucene.experimental
*/
- public void addDocuments(Iterable extends IndexDocument> docs)
- throws IOException {
+ public void addDocuments(Iterable extends IndexDocument> docs) throws IOException {
addDocuments(docs, analyzer);
}
-
+
/**
- * Atomically adds a block of documents, analyzed using the provided analyzer,
- * with sequentially assigned document IDs, such that an external reader will
- * see all or none of the documents.
- *
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
- *
+ * Atomically adds a block of documents, analyzed using the
+ * provided analyzer, with sequentially assigned document
+ * IDs, such that an external reader will see all or none
+ * of the documents.
+ *
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ *
* @lucene.experimental
*/
- public void addDocuments(Iterable extends IndexDocument> docs,
- Analyzer analyzer) throws IOException {
+ public void addDocuments(Iterable extends IndexDocument> docs, Analyzer analyzer) throws IOException {
updateDocuments(null, docs, analyzer);
}
-
+
/**
- * Atomically deletes documents matching the provided delTerm and adds a block
- * of documents with sequentially assigned document IDs, such that an external
- * reader will see all or none of the documents.
- *
+ * Atomically deletes documents matching the provided
+ * delTerm and adds a block of documents with sequentially
+ * assigned document IDs, such that an external reader
+ * will see all or none of the documents.
+ *
* See {@link #addDocuments(Iterable)}.
- *
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
- *
+ *
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ *
* @lucene.experimental
*/
public void replaceDocuments(Term delTerm,
@@ -1318,20 +1277,19 @@
Iterable extends IndexDocument> docs) throws IOException {
replaceDocuments(delTerm, docs, analyzer);
}
-
+
/**
- * Atomically deletes documents matching the provided delTerm and adds a block
- * of documents, analyzed using the provided analyzer, with sequentially
- * assigned document IDs, such that an external reader will see all or none of
- * the documents.
- *
+ * Atomically deletes documents matching the provided
+ * delTerm and adds a block of documents, analyzed using
+ * the provided analyzer, with sequentially
+ * assigned document IDs, such that an external reader
+ * will see all or none of the documents.
+ *
* See {@link #addDocuments(Iterable)}.
- *
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
- *
+ *
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ *
* @lucene.experimental
*/
public void replaceDocuments(Term delTerm,
@@ -1344,6 +1302,9 @@
try {
anySegmentFlushed = docWriter.updateDocuments(docs, analyzer, delTerm);
success = true;
+ if (delTerm != null) {
+ deletesPending.set(true);
+ }
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
@@ -1435,6 +1396,12 @@
public void updateFields(FieldsUpdate.Operation operation, Term term,
IndexDocument fields, Analyzer analyzer) throws IOException {
ensureOpen();
+
+ if (deletesPending.get()) {
+ commit();
+ deletesPending.set(false);
+ }
+
try {
boolean success = false;
boolean anySegmentFlushed = false;
@@ -1442,7 +1409,6 @@
anySegmentFlushed = docWriter.updateFields(term, operation, fields,
analyzer, globalFieldNumberMap);
success = true;
- updatesPending = true;
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
@@ -1461,44 +1427,43 @@
/**
* Deletes the document(s) containing term.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
- * @param term
- * the term to identify the documents to be deleted
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * @param term the term to identify the documents to be deleted
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
public void deleteDocuments(Term term) throws IOException {
ensureOpen();
try {
docWriter.deleteTerms(term);
+ if (term != null) {
+ deletesPending.set(true);
+ }
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Term)");
}
}
-
- /**
- * Expert: attempts to delete by document ID, as long as the provided reader
- * is a near-real-time reader (from
- * {@link DirectoryReader#open(IndexWriter,boolean)}). If the provided reader
- * is an NRT reader obtained from this writer, and its segment has not been
- * merged away, then the delete succeeds and this method returns true; else,
- * it returns false the caller must then separately delete by Term or Query.
- *
- * NOTE: this method can only delete documents visible to the currently
- * open NRT reader. If you need to delete documents indexed after opening the
- * NRT reader you must use the other deleteDocument methods (e.g.,
- * {@link #deleteDocuments(Term)}).
- */
- public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID)
- throws IOException {
-
+
+ /** Expert: attempts to delete by document ID, as long as
+ * the provided reader is a near-real-time reader (from {@link
+ * DirectoryReader#open(IndexWriter,boolean)}). If the
+ * provided reader is an NRT reader obtained from this
+ * writer, and its segment has not been merged away, then
+ * the delete succeeds and this method returns true; else, it
+ * returns false the caller must then separately delete by
+ * Term or Query.
+ *
+ * NOTE: this method can only delete documents
+ * visible to the currently open NRT reader. If you need
+ * to delete documents indexed after opening the NRT
+ * reader you must use the other deleteDocument methods
+ * (e.g., {@link #deleteDocuments(Term)}). */
+ public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException {
+
final AtomicReader reader;
if (readerIn instanceof AtomicReader) {
// Reader is already atomic: use the incoming docID:
@@ -1512,27 +1477,25 @@
assert docID >= 0;
assert docID < reader.maxDoc();
}
-
+
if (!(reader instanceof SegmentReader)) {
- throw new IllegalArgumentException(
- "the reader must be a SegmentReader or composite reader containing only SegmentReaders");
+ throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders");
}
-
+
final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo();
-
+
// TODO: this is a slow linear search, but, number of
// segments should be contained unless something is
// seriously wrong w/ the index, so it should be a minor
// cost:
-
+
if (segmentInfos.indexOf(info) != -1) {
ReadersAndLiveDocs rld = readerPool.get(info, false);
if (rld != null) {
- synchronized (bufferedDeletesStream) {
+ synchronized(bufferedDeletesStream) {
rld.initWritableLiveDocs();
if (rld.delete(docID)) {
- final int fullDelCount = rld.info.getDelCount()
- + rld.getPendingDeleteCount();
+ final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
if (fullDelCount == rld.info.info.getDocCount()) {
// If a merge has already registered for this
// segment, we leave it in the readerPool; the
@@ -1544,92 +1507,92 @@
checkpoint();
}
}
-
+
// Must bump changeCount so if no other changes
// happened, we still commit this change:
changed();
}
- // System.out.println(" yes " + info.info.name + " " + docID);
+ //System.out.println(" yes " + info.info.name + " " + docID);
return true;
}
} else {
- // System.out.println(" no rld " + info.info.name + " " + docID);
+ //System.out.println(" no rld " + info.info.name + " " + docID);
}
} else {
- // System.out.println(" no seg " + info.info.name + " " + docID);
+ //System.out.println(" no seg " + info.info.name + " " + docID);
}
return false;
}
-
+
/**
- * Deletes the document(s) containing any of the terms. All given deletes are
- * applied and flushed atomically at the same time.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
- * @param terms
- * array of terms to identify the documents to be deleted
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ * Deletes the document(s) containing any of the
+ * terms. All given deletes are applied and flushed atomically
+ * at the same time.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * @param terms array of terms to identify the documents
+ * to be deleted
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
public void deleteDocuments(Term... terms) throws IOException {
ensureOpen();
try {
docWriter.deleteTerms(terms);
+ if (terms != null && terms.length > 0) {
+ deletesPending.set(true);
+ }
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Term..)");
}
}
-
+
/**
* Deletes the document(s) matching the provided query.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
- * @param query
- * the query to identify the documents to be deleted
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * @param query the query to identify the documents to be deleted
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
public void deleteDocuments(Query query) throws IOException {
ensureOpen();
try {
docWriter.deleteQueries(query);
+ if (query != null) {
+ deletesPending.set(true);
+ }
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Query)");
}
}
-
+
/**
- * Deletes the document(s) matching any of the provided queries. All given
- * deletes are applied and flushed atomically at the same time.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
- * @param queries
- * array of queries to identify the documents to be deleted
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ * Deletes the document(s) matching any of the provided queries.
+ * All given deletes are applied and flushed atomically at the same time.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * @param queries array of queries to identify the documents
+ * to be deleted
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
public void deleteDocuments(Query... queries) throws IOException {
ensureOpen();
try {
docWriter.deleteQueries(queries);
+ if (queries != null && queries.length > 0) {
+ deletesPending.set(true);
+ }
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Query..)");
}
@@ -1673,28 +1636,24 @@
ensureOpen();
replaceDocument(term, doc, analyzer);
}
-
+
/**
- * Updates a document by first deleting the document(s) containing
- * term and then adding the new document. The delete and then add
- * are atomic as seen by a reader on the same index (flush may happen only
- * after the add).
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
- * @param term
- * the term to identify the document(s) to be deleted
- * @param doc
- * the document to be added
- * @param analyzer
- * the analyzer to use when analyzing the document
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ * Updates a document by first deleting the document(s)
+ * containing term and then adding the new
+ * document. The delete and then add are atomic as seen
+ * by a reader on the same index (flush may happen only after
+ * the add).
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * @param term the term to identify the document(s) to be
+ * deleted
+ * @param doc the document to be added
+ * @param analyzer the analyzer to use when analyzing the document
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
public void replaceDocument(Term term, IndexDocument doc, Analyzer analyzer)
throws IOException {
@@ -1705,6 +1664,9 @@
try {
anySegmentFlushed = docWriter.updateDocument(doc, analyzer, term);
success = true;
+ if (term != null) {
+ deletesPending.set(true);
+ }
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
@@ -1712,7 +1674,7 @@
}
}
}
-
+
if (anySegmentFlushed) {
maybeMerge(MergeTrigger.SEGMENT_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);
}
@@ -1738,20 +1700,20 @@
}
// for test purpose
- final synchronized int getSegmentCount() {
+ final synchronized int getSegmentCount(){
return segmentInfos.size();
}
-
+
// for test purpose
- final synchronized int getNumBufferedDocuments() {
+ final synchronized int getNumBufferedDocuments(){
return docWriter.getNumDocs();
}
-
+
// for test purpose
final synchronized Collection getIndexFileNames() throws IOException {
return segmentInfos.files(directory, true);
}
-
+
// for test purpose
final synchronized int getDocCount(int i) {
if (i >= 0 && i < segmentInfos.size()) {
@@ -1760,407 +1722,392 @@
return -1;
}
}
-
+
// for test purpose
final int getFlushCount() {
return flushCount.get();
}
-
+
// for test purpose
final int getFlushDeletesCount() {
return flushDeletesCount.get();
}
-
+
final String newSegmentName() {
// Cannot synchronize on IndexWriter because that causes
// deadlock
- synchronized (segmentInfos) {
+ synchronized(segmentInfos) {
// Important to increment changeCount so that the
- // segmentInfos is written on close. Otherwise we
+ // segmentInfos is written on close. Otherwise we
// could close, re-open and re-return the same segment
// name that was previously returned which can cause
// problems at least with ConcurrentMergeScheduler.
changeCount++;
segmentInfos.changed();
- return "_"
- + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
+ return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
}
}
-
- /**
- * If non-null, information about merges will be printed to this.
+
+ /** If non-null, information about merges will be printed to this.
*/
final InfoStream infoStream;
-
+
/**
- * Forces merge policy to merge segments until there are <= maxNumSegments.
- * The actual merges to be executed are determined by the {@link MergePolicy}.
+ * Forces merge policy to merge segments until there are <=
+ * maxNumSegments. The actual merges to be
+ * executed are determined by the {@link MergePolicy}.
+ *
+ *
This is a horribly costly operation, especially when
+ * you pass a small {@code maxNumSegments}; usually you
+ * should only call this if the index is static (will no
+ * longer be changed).
+ *
+ *
Note that this requires up to 2X the index size free
+ * space in your Directory (3X if you're using compound
+ * file format). For example, if your index size is 10 MB
+ * then you need up to 20 MB free for this to complete (30
+ * MB if you're using compound file format). Also,
+ * it's best to call {@link #commit()} afterwards,
+ * to allow IndexWriter to free up disk space.
+ *
+ *
If some but not all readers re-open while merging
+ * is underway, this will cause > 2X temporary
+ * space to be consumed as those new readers will then
+ * hold open the temporary segments at that time. It is
+ * best not to re-open readers while merging is running.
+ *
+ *
The actual temporary usage could be much less than
+ * these figures (it depends on many factors).
+ *
+ *
In general, once this completes, the total size of the
+ * index will be less than the size of the starting index.
+ * It could be quite a bit smaller (if there were many
+ * pending deletes) or just slightly smaller.
+ *
+ *
If an Exception is hit, for example
+ * due to disk full, the index will not be corrupted and no
+ * documents will be lost. However, it may have
+ * been partially merged (some segments were merged but
+ * not all), and it's possible that one of the segments in
+ * the index will be in non-compound format even when
+ * using compound file format. This will occur when the
+ * Exception is hit during conversion of the segment into
+ * compound format.
+ *
+ *
This call will merge those segments present in
+ * the index when the call started. If other threads are
+ * still adding documents and flushing segments, those
+ * newly created segments will not be merged unless you
+ * call forceMerge again.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ *
NOTE: if you call {@link #close(boolean)}
+ * with false, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
+ *
+ * @param maxNumSegments maximum number of segments left
+ * in the index after merging finishes
*
- *
- * This is a horribly costly operation, especially when you pass a small
- * {@code maxNumSegments}; usually you should only call this if the index is
- * static (will no longer be changed).
- *
- *
- *
- * Note that this requires up to 2X the index size free space in your
- * Directory (3X if you're using compound file format). For example, if your
- * index size is 10 MB then you need up to 20 MB free for this to complete (30
- * MB if you're using compound file format). Also, it's best to call
- * {@link #commit()} afterwards, to allow IndexWriter to free up disk space.
- *
- *
- *
- * If some but not all readers re-open while merging is underway, this will
- * cause > 2X temporary space to be consumed as those new readers will then
- * hold open the temporary segments at that time. It is best not to re-open
- * readers while merging is running.
- *
- *
- *
- * The actual temporary usage could be much less than these figures (it
- * depends on many factors).
- *
- *
- *
- * In general, once this completes, the total size of the index will be less
- * than the size of the starting index. It could be quite a bit smaller (if
- * there were many pending deletes) or just slightly smaller.
- *
- *
- *
- * If an Exception is hit, for example due to disk full, the index will not be
- * corrupted and no documents will be lost. However, it may have been
- * partially merged (some segments were merged but not all), and it's possible
- * that one of the segments in the index will be in non-compound format even
- * when using compound file format. This will occur when the Exception is hit
- * during conversion of the segment into compound format.
- *
- *
- *
- * This call will merge those segments present in the index when the call
- * started. If other threads are still adding documents and flushing segments,
- * those newly created segments will not be merged unless you call forceMerge
- * again.
- *
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
- *
- * NOTE: if you call {@link #close(boolean)} with false, which
- * aborts all running merges, then any thread still running this method might
- * hit a {@link MergePolicy.MergeAbortedException}.
- *
- * @param maxNumSegments
- * maximum number of segments left in the index after merging
- * finishes
- *
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
* @see MergePolicy#findMerges
- *
- */
+ *
+ */
public void forceMerge(int maxNumSegments) throws IOException {
forceMerge(maxNumSegments, true);
}
-
- /**
- * Just like {@link #forceMerge(int)}, except you can specify whether the call
- * should block until all merging completes. This is only meaningful with a
- * {@link MergeScheduler} that is able to run merges in background threads.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
+
+ /** Just like {@link #forceMerge(int)}, except you can
+ * specify whether the call should block until
+ * all merging completes. This is only meaningful with a
+ * {@link MergeScheduler} that is able to run merges in
+ * background threads.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
*/
public void forceMerge(int maxNumSegments, boolean doWait) throws IOException {
ensureOpen();
-
- if (maxNumSegments < 1) throw new IllegalArgumentException(
- "maxNumSegments must be >= 1; got " + maxNumSegments);
-
+
+ if (maxNumSegments < 1)
+ throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "forceMerge: index now " + segString());
infoStream.message("IW", "now flush at forceMerge");
}
-
+
flush(true, true);
-
- synchronized (this) {
+
+ synchronized(this) {
resetMergeExceptions();
segmentsToMerge.clear();
- for (SegmentInfoPerCommit info : segmentInfos) {
+ for(SegmentInfoPerCommit info : segmentInfos) {
segmentsToMerge.put(info, Boolean.TRUE);
}
mergeMaxNumSegments = maxNumSegments;
-
+
// Now mark all pending & running merges for forced
// merge:
- for (final MergePolicy.OneMerge merge : pendingMerges) {
+ for(final MergePolicy.OneMerge merge : pendingMerges) {
merge.maxNumSegments = maxNumSegments;
segmentsToMerge.put(merge.info, Boolean.TRUE);
}
-
- for (final MergePolicy.OneMerge merge : runningMerges) {
+
+ for (final MergePolicy.OneMerge merge: runningMerges) {
merge.maxNumSegments = maxNumSegments;
segmentsToMerge.put(merge.info, Boolean.TRUE);
}
}
-
+
maybeMerge(MergeTrigger.EXPLICIT, maxNumSegments);
-
+
if (doWait) {
- synchronized (this) {
- while (true) {
-
+ synchronized(this) {
+ while(true) {
+
if (hitOOM) {
- throw new IllegalStateException(
- "this writer hit an OutOfMemoryError; cannot complete forceMerge");
+ throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");
}
-
+
if (mergeExceptions.size() > 0) {
// Forward any exceptions in background merge
// threads to the current thread:
final int size = mergeExceptions.size();
- for (int i = 0; i < size; i++) {
+ for(int i=0;i
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
- *
- * NOTE: if you call {@link #close(boolean)} with false, which
- * aborts all running merges, then any thread still running this method might
- * hit a {@link MergePolicy.MergeAbortedException}.
+
+ /** Just like {@link #forceMergeDeletes()}, except you can
+ * specify whether the call should block until the
+ * operation completes. This is only meaningful with a
+ * {@link MergeScheduler} that is able to run merges in
+ * background threads.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ *
NOTE: if you call {@link #close(boolean)}
+ * with false, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
*/
- public void forceMergeDeletes(boolean doWait) throws IOException {
+ public void forceMergeDeletes(boolean doWait)
+ throws IOException {
ensureOpen();
-
+
flush(true, true);
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "forceMergeDeletes: index now " + segString());
}
-
+
MergePolicy.MergeSpecification spec;
-
- synchronized (this) {
+
+ synchronized(this) {
spec = mergePolicy.findForcedDeletesMerges(segmentInfos);
if (spec != null) {
final int numMerges = spec.merges.size();
- for (int i = 0; i < numMerges; i++)
+ for(int i=0;i
- * This is often a horribly costly operation; rarely is it warranted.
- *
- *
- *
- * To see how many deletions you have pending in your index, call
- * {@link IndexReader#numDeletedDocs}.
- *
- *
- *
- * NOTE: this method first flushes a new segment (if there are indexed
- * documents), and applies all buffered deletes.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
+ * Forces merging of all segments that have deleted
+ * documents. The actual merges to be executed are
+ * determined by the {@link MergePolicy}. For example,
+ * the default {@link TieredMergePolicy} will only
+ * pick a segment if the percentage of
+ * deleted docs is over 10%.
+ *
+ *
This is often a horribly costly operation; rarely
+ * is it warranted.
+ *
+ *
To see how
+ * many deletions you have pending in your index, call
+ * {@link IndexReader#numDeletedDocs}.
+ *
+ *
NOTE: this method first flushes a new
+ * segment (if there are indexed documents), and applies
+ * all buffered deletes.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
*/
public void forceMergeDeletes() throws IOException {
forceMergeDeletes(true);
}
-
+
/**
- * Expert: asks the mergePolicy whether any merges are necessary now and if
- * so, runs the requested merges and then iterate (test again if merges are
- * needed) until no more merges are returned by the mergePolicy.
+ * Expert: asks the mergePolicy whether any merges are
+ * necessary now and if so, runs the requested merges and
+ * then iterate (test again if merges are needed) until no
+ * more merges are returned by the mergePolicy.
+ *
+ * Explicit calls to maybeMerge() are usually not
+ * necessary. The most common case is when merge policy
+ * parameters have changed.
*
- * Explicit calls to maybeMerge() are usually not necessary. The most common
- * case is when merge policy parameters have changed.
- *
* This method will call the {@link MergePolicy} with
* {@link MergeTrigger#EXPLICIT}.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
*/
public final void maybeMerge() throws IOException {
maybeMerge(MergeTrigger.EXPLICIT, UNBOUNDED_MAX_MERGE_SEGMENTS);
}
-
- private final void maybeMerge(MergeTrigger trigger, int maxNumSegments)
- throws IOException {
+
+ private final void maybeMerge(MergeTrigger trigger, int maxNumSegments) throws IOException {
ensureOpen(false);
updatePendingMerges(trigger, maxNumSegments);
mergeScheduler.merge(this);
}
-
- private synchronized void updatePendingMerges(MergeTrigger trigger,
- int maxNumSegments) throws IOException {
+
+ private synchronized void updatePendingMerges(MergeTrigger trigger, int maxNumSegments)
+ throws IOException {
assert maxNumSegments == -1 || maxNumSegments > 0;
assert trigger != null;
if (stopMerges) {
return;
}
-
+
// Do not start new merges if we've hit OOME
if (hitOOM) {
return;
}
-
+
final MergePolicy.MergeSpecification spec;
if (maxNumSegments != UNBOUNDED_MAX_MERGE_SEGMENTS) {
- assert trigger == MergeTrigger.EXPLICIT
- || trigger == MergeTrigger.MERGE_FINISHED : "Expected EXPLICT or MERGE_FINISHED as trigger even with maxNumSegments set but was: "
- + trigger.name();
- spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments,
- Collections.unmodifiableMap(segmentsToMerge));
+ assert trigger == MergeTrigger.EXPLICIT || trigger == MergeTrigger.MERGE_FINISHED :
+ "Expected EXPLICT or MERGE_FINISHED as trigger even with maxNumSegments set but was: " + trigger.name();
+ spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge));
if (spec != null) {
final int numMerges = spec.merges.size();
- for (int i = 0; i < numMerges; i++) {
+ for(int i=0;i
- * Do not alter the returned collection!
- */
+
+ /** Expert: to be used by a {@link MergePolicy} to avoid
+ * selecting merges for segments already being merged.
+ * The returned collection is not cloned, and thus is
+ * only safe to access if you hold IndexWriter's lock
+ * (which you do when IndexWriter invokes the
+ * MergePolicy).
+ *
+ *
Do not alter the returned collection! */
public synchronized Collection getMergingSegments() {
return mergingSegments;
}
-
+
/**
* Expert: the {@link MergeScheduler} calls this method to retrieve the next
* merge requested by the MergePolicy
@@ -2177,7 +2124,7 @@
return merge;
}
}
-
+
/**
* Expert: returns true if there are merges waiting to be scheduled.
*
@@ -2186,97 +2133,96 @@
public synchronized boolean hasPendingMerges() {
return pendingMerges.size() != 0;
}
-
+
/**
- * Close the IndexWriter without committing any changes that have
- * occurred since the last commit (or since it was opened, if commit hasn't
- * been called). This removes any temporary files that had been created, after
- * which the state of the index will be the same as it was when commit() was
- * last called or when this writer was first opened. This also clears a
- * previous call to {@link #prepareCommit}.
- *
- * @throws IOException
- * if there is a low-level IO error
+ * Close the IndexWriter without committing
+ * any changes that have occurred since the last commit
+ * (or since it was opened, if commit hasn't been called).
+ * This removes any temporary files that had been created,
+ * after which the state of the index will be the same as
+ * it was when commit() was last called or when this
+ * writer was first opened. This also clears a previous
+ * call to {@link #prepareCommit}.
+ * @throws IOException if there is a low-level IO error
*/
@Override
public void rollback() throws IOException {
ensureOpen();
-
+
// Ensure that only one thread actually gets to do the
// closing, and make sure no commit is also in progress:
- synchronized (commitLock) {
+ synchronized(commitLock) {
if (shouldClose()) {
rollbackInternal();
}
}
}
-
+
private void rollbackInternal() throws IOException {
-
+
boolean success = false;
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "rollback");
}
try {
- synchronized (this) {
+ synchronized(this) {
finishMerges(false);
stopMerges = true;
}
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "rollback: done finish merges");
}
-
+
// Must pre-close these two, in case they increment
// changeCount so that we can then set it to false
// before calling closeInternal
mergePolicy.close();
mergeScheduler.close();
-
+
bufferedDeletesStream.clear();
- docWriter.close(); // mark it as closed first to prevent subsequent
- // indexing actions/flushes
- docWriter.abort();
- synchronized (this) {
-
+ docWriter.close(); // mark it as closed first to prevent subsequent indexing actions/flushes
+ docWriter.abort(); // don't sync on IW here
+ synchronized(this) {
+
if (pendingCommit != null) {
pendingCommit.rollbackCommit(directory);
deleter.decRef(pendingCommit);
pendingCommit = null;
notifyAll();
}
-
+
// Don't bother saving any changes in our segmentInfos
readerPool.dropAll(false);
-
+
// Keep the same segmentInfos instance but replace all
- // of its SegmentInfo instances. This is so the next
+ // of its SegmentInfo instances. This is so the next
// attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write
// once").
segmentInfos.rollbackSegmentInfos(rollbackSegments);
- if (infoStream.isEnabled("IW")) {
- infoStream
- .message("IW", "rollback: infos=" + segString(segmentInfos));
+ if (infoStream.isEnabled("IW") ) {
+ infoStream.message("IW", "rollback: infos=" + segString(segmentInfos));
}
+
assert testPoint("rollback before checkpoint");
-
+
// Ask deleter to locate unreferenced files & remove
// them:
deleter.checkpoint(segmentInfos, false);
deleter.refresh();
-
+
lastCommitChangeCount = changeCount;
}
-
+
success = true;
} catch (OutOfMemoryError oom) {
handleOOM(oom, "rollbackInternal");
} finally {
- synchronized (this) {
+ synchronized(this) {
if (!success) {
closing = false;
notifyAll();
@@ -2286,114 +2232,131 @@
}
}
}
-
+
closeInternal(false, false);
}
-
+
/**
* Delete all documents in the index.
- *
- *
- * This method will drop all buffered documents and will remove all segments
- * from the index. This change will not be visible until a {@link #commit()}
- * has been called. This method can be rolled back using {@link #rollback()}.
- *
- *
- *
- * NOTE: this method is much faster than using deleteDocuments( new
- * MatchAllDocsQuery() ).
- *
- *
- *
- * NOTE: this method will forcefully abort all merges in progress. If other
- * threads are running {@link #forceMerge}, {@link #addIndexes(IndexReader[])}
- * or {@link #forceMergeDeletes} methods, they may receive
- * {@link MergePolicy.MergeAbortedException}s.
+ *
+ *
This method will drop all buffered documents and will
+ * remove all segments from the index. This change will not be
+ * visible until a {@link #commit()} has been called. This method
+ * can be rolled back using {@link #rollback()}.
+ *
+ *
NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).
+ * Yet, this method also has different semantics compared to {@link #deleteDocuments(Query)}
+ * / {@link #deleteDocuments(Query...)} since internal data-structures are cleared as well
+ * as all segment information is forcefully dropped anti-viral semantics like omitting norms
+ * are reset or doc value types are cleared. Essentially a call to {@link #deleteAll()} is equivalent
+ * to creating a new {@link IndexWriter} with {@link OpenMode#CREATE} which a delete query only marks
+ * documents as deleted.
+ *
+ *
NOTE: this method will forcefully abort all merges
+ * in progress. If other threads are running {@link
+ * #forceMerge}, {@link #addIndexes(IndexReader[])} or
+ * {@link #forceMergeDeletes} methods, they may receive
+ * {@link MergePolicy.MergeAbortedException}s.
*/
- public synchronized void deleteAll() throws IOException {
+ public void deleteAll() throws IOException {
ensureOpen();
+ // Remove any buffered docs
boolean success = false;
- try {
-
- // Abort any running merges
- finishMerges(false);
-
- // Remove any buffered docs
- docWriter.abort();
-
- // Remove all segments
- segmentInfos.clear();
-
- // Ask deleter to locate unreferenced files & remove them:
- deleter.checkpoint(segmentInfos, false);
- deleter.refresh();
-
- globalFieldNumberMap.clear();
-
- // Don't bother saving any changes in our segmentInfos
- readerPool.dropAll(false);
-
- // Mark that the index has changed
- ++changeCount;
- segmentInfos.changed();
- success = true;
- } catch (OutOfMemoryError oom) {
- handleOOM(oom, "deleteAll");
- } finally {
- if (!success) {
- if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "hit exception during deleteAll");
+ /* hold the full flush lock to prevent concurrency commits / NRT reopens to
+ * get in our way and do unnecessary work. -- if we don't lock this here we might
+ * get in trouble if */
+ synchronized (fullFlushLock) {
+ /*
+ * We first abort and trash everything we have in-memory
+ * and keep the thread-states locked, the lockAndAbortAll operation
+ * also guarantees "point in time semantics" ie. the checkpoint that we need in terms
+ * of logical happens-before relationship in the DW. So we do
+ * abort all in memory structures
+ * We also drop global field numbering before during abort to make
+ * sure it's just like a fresh index.
+ */
+ try {
+ docWriter.lockAndAbortAll();
+ synchronized (this) {
+ try {
+ // Abort any running merges
+ finishMerges(false);
+ // Remove all segments
+ segmentInfos.clear();
+ // Ask deleter to locate unreferenced files & remove them:
+ deleter.checkpoint(segmentInfos, false);
+ /* don't refresh the deleter here since there might
+ * be concurrent indexing requests coming in opening
+ * files on the directory after we called DW#abort()
+ * if we do so these indexing requests might hit FNF exceptions.
+ * We will remove the files incrementally as we go...
+ */
+ // Don't bother saving any changes in our segmentInfos
+ readerPool.dropAll(false);
+ // Mark that the index has changed
+ ++changeCount;
+ segmentInfos.changed();
+ globalFieldNumberMap.clear();
+ success = true;
+ } catch (OutOfMemoryError oom) {
+ handleOOM(oom, "deleteAll");
+ } finally {
+ if (!success) {
+ if (infoStream.isEnabled("IW")) {
+ infoStream.message("IW", "hit exception during deleteAll");
+ }
+ }
+ }
}
+ } finally {
+ docWriter.unlockAllAfterAbortAll();
}
}
}
-
+
private synchronized void finishMerges(boolean waitForMerges) {
if (!waitForMerges) {
-
+
stopMerges = true;
-
+
// Abort all pending & running merges:
for (final MergePolicy.OneMerge merge : pendingMerges) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "now abort pending merge "
- + segString(merge.segments));
+ infoStream.message("IW", "now abort pending merge " + segString(merge.segments));
}
merge.abort();
mergeFinish(merge);
}
pendingMerges.clear();
-
+
for (final MergePolicy.OneMerge merge : runningMerges) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "now abort running merge "
- + segString(merge.segments));
+ infoStream.message("IW", "now abort running merge " + segString(merge.segments));
}
merge.abort();
}
-
+
// These merges periodically check whether they have
- // been aborted, and stop if so. We wait here to make
- // sure they all stop. It should not take very long
+ // been aborted, and stop if so. We wait here to make
+ // sure they all stop. It should not take very long
// because the merge threads periodically check if
// they are aborted.
- while (runningMerges.size() > 0) {
+ while(runningMerges.size() > 0) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "now wait for " + runningMerges.size()
- + " running merge/s to abort");
+ infoStream.message("IW", "now wait for " + runningMerges.size() + " running merge/s to abort");
}
doWait();
}
-
+
stopMerges = false;
notifyAll();
-
+
assert 0 == mergingSegments.size();
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "all running merges have aborted");
}
-
+
} else {
// waitForMerges() will ensure any running addIndexes finishes.
// It's fine if a new one attempts to start because from our
@@ -2403,35 +2366,34 @@
waitForMerges();
}
}
-
+
/**
* Wait for any currently outstanding merges to finish.
- *
- *
- * It is guaranteed that any merges started prior to calling this method will
- * have completed once this method completes.
- *
+ *
+ *
It is guaranteed that any merges started prior to calling this method
+ * will have completed once this method completes.
*/
public synchronized void waitForMerges() {
ensureOpen(false);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "waitForMerges");
}
- while (pendingMerges.size() > 0 || runningMerges.size() > 0) {
+ while(pendingMerges.size() > 0 || runningMerges.size() > 0) {
doWait();
}
-
+
// sanity check
assert 0 == mergingSegments.size();
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "waitForMerges done");
}
}
-
+
/**
- * Called whenever the SegmentInfos has been updated and the index files
- * referenced exist (correctly) in the index directory.
+ * Called whenever the SegmentInfos has been updated and
+ * the index files referenced exist (correctly) in the
+ * index directory.
*/
synchronized void checkpoint() throws IOException {
changed();
@@ -2468,8 +2430,7 @@
* segments SegmentInfo to the index writer.
*/
synchronized void publishFlushedSegment(SegmentInfoPerCommit newSegment,
- FrozenBufferedDeletes packet, FrozenBufferedDeletes globalPacket)
- throws IOException {
+ FrozenBufferedDeletes packet, FrozenBufferedDeletes globalPacket) throws IOException {
// Lock order IW -> BDS
synchronized (bufferedDeletesStream) {
if (infoStream.isEnabled("IW")) {
@@ -2479,7 +2440,7 @@
if (globalPacket != null
&& (globalPacket.anyDeletes() || globalPacket.anyUpdates())) {
bufferedDeletesStream.push(globalPacket);
- }
+ }
// Publishing the segment must be synched on IW -> BDS to make the sure
// that no merge prunes away the seg. private delete packet
final long nextGen;
@@ -2491,8 +2452,7 @@
nextGen = bufferedDeletesStream.getNextGen();
}
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "publish sets newSegment delGen=" + nextGen
- + " seg=" + segString(newSegment));
+ infoStream.message("IW", "publish sets newSegment delGen=" + nextGen + " seg=" + segString(newSegment));
}
newSegment.setBufferedDeletesGen(nextGen);
segmentInfos.add(newSegment);
@@ -2507,82 +2467,81 @@
}
return mergePolicy.useCompoundFile(segmentInfos, segmentInfo);
}
-
+
private synchronized void resetMergeExceptions() {
mergeExceptions = new ArrayList();
mergeGen++;
}
-
+
private void noDupDirs(Directory... dirs) {
HashSet dups = new HashSet();
- for (int i = 0; i < dirs.length; i++) {
- if (dups.contains(dirs[i])) throw new IllegalArgumentException(
- "Directory " + dirs[i] + " appears more than once");
- if (dirs[i] == directory) throw new IllegalArgumentException(
- "Cannot add directory to itself");
+ for(int i=0;iThis may be used to parallelize batch indexing. A large document
+ * collection can be broken into sub-collections. Each sub-collection can be
+ * indexed in parallel, on a different thread, process or machine. The
+ * complete index can then be created by merging sub-collection indexes
+ * with this method.
+ *
*
- * This may be used to parallelize batch indexing. A large document collection
- * can be broken into sub-collections. Each sub-collection can be indexed in
- * parallel, on a different thread, process or machine. The complete index can
- * then be created by merging sub-collection indexes with this method.
- *
- *
- * NOTE: the index in each {@link Directory} must not be changed
- * (opened by a writer) while this method is running. This method does not
- * acquire a write lock in each input Directory, so it is up to the caller to
+ * NOTE: the index in each {@link Directory} must not be
+ * changed (opened by a writer) while this method is
+ * running. This method does not acquire a write lock in
+ * each input Directory, so it is up to the caller to
* enforce this.
- *
+ *
+ *
This method is transactional in how Exceptions are
+ * handled: it does not commit a new segments_N file until
+ * all indexes are added. This means if an Exception
+ * occurs (for example disk full), then either no indexes
+ * will have been added or they all will have been.
+ *
+ *
Note that this requires temporary free space in the
+ * {@link Directory} up to 2X the sum of all input indexes
+ * (including the starting index). If readers/searchers
+ * are open against the starting index, then temporary
+ * free space required will be higher by the size of the
+ * starting index (see {@link #forceMerge(int)} for details).
+ *
*
- * This method is transactional in how Exceptions are handled: it does not
- * commit a new segments_N file until all indexes are added. This means if an
- * Exception occurs (for example disk full), then either no indexes will have
- * been added or they all will have been.
- *
- *
- * Note that this requires temporary free space in the {@link Directory} up to
- * 2X the sum of all input indexes (including the starting index). If
- * readers/searchers are open against the starting index, then temporary free
- * space required will be higher by the size of the starting index (see
- * {@link #forceMerge(int)} for details).
- *
- *
* NOTE: this method only copies the segments of the incoming indexes
* and does not merge them. Therefore deleted documents are not removed and
* the new segments are not merged with the existing ones.
- *
+ *
+ *
This requires this index not be among those to be added.
+ *
*
- * This requires this index not be among those to be added.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ * NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
public void addIndexes(Directory... dirs) throws IOException {
ensureOpen();
-
+
noDupDirs(dirs);
-
+
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at addIndexes(Directory...)");
}
-
+
flush(false, true);
-
+
List infos = new ArrayList();
-
+
boolean success = false;
try {
for (Directory dir : dirs) {
@@ -2591,18 +2550,14 @@
}
SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.read(dir);
-
+
for (SegmentInfoPerCommit info : sis) {
- assert !infos.contains(info) : "dup info dir=" + info.info.dir
- + " name=" + info.info.name;
-
+ assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;
+
String newSegName = newSegmentName();
-
+
if (infoStream.isEnabled("IW")) {
- infoStream
- .message("IW", "addIndexes: process segment origName="
- + info.info.name + " newName=" + newSegName + " info="
- + info);
+ infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " info=" + info);
}
IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.sizeInBytes(), true, -1));
@@ -2616,16 +2571,17 @@
success = true;
} finally {
if (!success) {
- for (SegmentInfoPerCommit sipc : infos) {
- for (String file : sipc.files()) {
+ for(SegmentInfoPerCommit sipc : infos) {
+ for(String file : sipc.files()) {
try {
directory.deleteFile(file);
- } catch (Throwable t) {}
+ } catch (Throwable t) {
+ }
}
}
}
}
-
+
synchronized (this) {
success = false;
try {
@@ -2633,11 +2589,12 @@
success = true;
} finally {
if (!success) {
- for (SegmentInfoPerCommit sipc : infos) {
- for (String file : sipc.files()) {
+ for(SegmentInfoPerCommit sipc : infos) {
+ for(String file : sipc.files()) {
try {
directory.deleteFile(file);
- } catch (Throwable t) {}
+ } catch (Throwable t) {
+ }
}
}
}
@@ -2645,7 +2602,7 @@
segmentInfos.addAll(infos);
checkpoint();
}
-
+
} catch (OutOfMemoryError oom) {
handleOOM(oom, "addIndexes(Directory...)");
}
@@ -2689,13 +2646,13 @@
public void addIndexes(IndexReader... readers) throws IOException {
ensureOpen();
int numDocs = 0;
-
+
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at addIndexes(IndexReader...)");
}
flush(false, true);
-
+
String mergedName = newSegmentName();
final List mergeReaders = new ArrayList();
for (IndexReader indexReader : readers) {
@@ -2705,7 +2662,7 @@
}
}
final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1));
-
+
// TODO: somehow we should fix this merge so it's
// abortable so that IW.close(false) is able to stop it
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
@@ -2719,11 +2676,11 @@
MergeState mergeState;
boolean success = false;
try {
- mergeState = merger.merge(); // merge 'em
+ mergeState = merger.merge(); // merge 'em
success = true;
} finally {
- if (!success) {
- synchronized (this) {
+ if (!success) {
+ synchronized(this) {
deleter.refresh(info.name);
}
}
@@ -2734,20 +2691,19 @@
info.setFiles(new HashSet(trackingDir.getCreatedFiles()));
trackingDir.getCreatedFiles().clear();
-
+
setDiagnostics(info, SOURCE_ADDINDEXES_READERS);
-
+
boolean useCompoundFile;
- synchronized (this) { // Guard segmentInfos
+ synchronized(this) { // Guard segmentInfos
if (stopMerges) {
deleter.deleteNewFiles(infoPerCommit.files());
return;
}
ensureOpen();
- useCompoundFile = mergePolicy.useCompoundFile(segmentInfos,
- infoPerCommit);
+ useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, infoPerCommit);
}
-
+
// Now create the compound file if needed
if (useCompoundFile) {
Collection filesToDelete = infoPerCommit.files();
@@ -2757,14 +2713,14 @@
} finally {
// delete new non cfs files directly: they were never
// registered with IFD
- synchronized (this) {
+ synchronized(this) {
deleter.deleteNewFiles(filesToDelete);
}
}
info.setUseCompoundFile(true);
}
-
- // Have codec write SegmentInfo. Must do this after
+
+ // Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
@@ -2777,16 +2733,16 @@
success = true;
} finally {
if (!success) {
- synchronized (this) {
+ synchronized(this) {
deleter.refresh(info.name);
}
}
}
-
+
info.addFiles(trackingDir.getCreatedFiles());
-
+
// Register the new segment
- synchronized (this) {
+ synchronized(this) {
if (stopMerges) {
deleter.deleteNewFiles(info.files());
return;
@@ -2799,10 +2755,10 @@
handleOOM(oom, "addIndexes(IndexReader...)");
}
}
-
+
/** Copies the segment files as-is into the IndexWriter's directory. */
- private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info,
- String segName, IOContext context) throws IOException {
+ private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName, IOContext context)
+ throws IOException {
// note: we don't really need this fis (its copied), but we load it up
// so we don't pass a null value to the si writer
@@ -2810,40 +2766,35 @@
final Map attributes;
// copy the attributes map, we might modify it below.
- // also we need to ensure its read-write, since we will invoke the SIwriter
- // (which might want to set something).
+ // also we need to ensure its read-write, since we will invoke the SIwriter (which might want to set something).
if (info.info.attributes() == null) {
attributes = new HashMap();
} else {
attributes = new HashMap(info.info.attributes());
}
-
- // System.out.println("copy seg=" + info.info.name + " version=" +
- // info.info.getVersion());
+
+ //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
// Same SI as before but we change directory and name
- SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(),
- segName, info.info.getDocCount(), info.info.getUseCompoundFile(),
- info.info.getCodec(), info.info.getDiagnostics(), attributes);
- SegmentInfoPerCommit newInfoPerCommit = new SegmentInfoPerCommit(newInfo,
- info.getDelCount(), info.getDelGen(), info.getUpdateGen());
-
+ SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
+ info.info.getUseCompoundFile(),
+ info.info.getCodec(), info.info.getDiagnostics(), attributes);
+ SegmentInfoPerCommit newInfoPerCommit = new SegmentInfoPerCommit(newInfo, info.getDelCount(), info.getDelGen(), info.getUpdateGen());
+
Set segFiles = new HashSet();
-
- // Build up new segment's file names. Must do this
+
+ // Build up new segment's file names. Must do this
// before writing SegmentInfo:
for (String file : info.files()) {
final String newFileName = getNewFileName(file, segName);
segFiles.add(newFileName);
}
newInfo.setFiles(segFiles);
-
- // We must rewrite the SI file because it references segment name in its
- // list of files, etc
- TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(
- directory);
-
+
+ // We must rewrite the SI file because it references segment name in its list of files, etc
+ TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
+
boolean success = false;
-
+
try {
SegmentInfoWriter segmentInfoWriter = newInfo.getCodec()
@@ -2851,27 +2802,29 @@
segmentInfoWriter.write(trackingDir, newInfo, fis, context);
final Collection siFiles = trackingDir.getCreatedFiles();
-
+
// Copy the segment's files
- for (String file : info.files()) {
+ for (String file: info.files()) {
+
final String newFileName = getNewFileName(file, segName);
+
if (siFiles.contains(newFileName)) {
// We already rewrote this above
continue;
}
-
- assert !directory.fileExists(newFileName) : "file \"" + newFileName
- + "\" already exists; siFiles=" + siFiles;
-
+
+ assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles;
+
info.info.dir.copy(directory, file, newFileName, context);
}
success = true;
} finally {
if (!success) {
- for (String file : newInfo.files()) {
+ for(String file : newInfo.files()) {
try {
directory.deleteFile(file);
- } catch (Throwable t) {}
+ } catch (Throwable t) {
+ }
}
}
}
@@ -2898,99 +2851,95 @@
* is committed (new segments_N file written).
*/
protected void doAfterFlush() throws IOException {}
-
+
/**
* A hook for extending classes to execute operations before pending added and
* deleted documents are flushed to the Directory.
*/
protected void doBeforeFlush() throws IOException {}
-
- /**
- *
- * Expert: prepare for commit. This does the first phase of 2-phase commit.
- * This method does all steps necessary to commit changes since this writer
- * was opened: flushes pending added and deleted docs, syncs the index files,
- * writes most of next segments_N file. After calling this you must call
- * either {@link #commit()} to finish the commit, or {@link #rollback()} to
- * revert the commit and undo all changes done since the writer was opened.
- *
- *
- *
- * You can also just call {@link #commit()} directly without prepareCommit
- * first in which case that method will internally call prepareCommit.
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
+
+ /**
Expert: prepare for commit. This does the
+ * first phase of 2-phase commit. This method does all
+ * steps necessary to commit changes since this writer
+ * was opened: flushes pending added and deleted docs,
+ * syncs the index files, writes most of next segments_N
+ * file. After calling this you must call either {@link
+ * #commit()} to finish the commit, or {@link
+ * #rollback()} to revert the commit and undo all changes
+ * done since the writer was opened.
+ *
+ *
You can also just call {@link #commit()} directly
+ * without prepareCommit first in which case that method
+ * will internally call prepareCommit.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
*/
@Override
public final void prepareCommit() throws IOException {
ensureOpen();
prepareCommitInternal();
}
-
+
private void prepareCommitInternal() throws IOException {
- synchronized (commitLock) {
+ synchronized(commitLock) {
ensureOpen(false);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "prepareCommit: flush");
infoStream.message("IW", " index before flush " + segString());
}
-
+
if (hitOOM) {
- throw new IllegalStateException(
- "this writer hit an OutOfMemoryError; cannot commit");
+ throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
}
-
+
if (pendingCommit != null) {
- throw new IllegalStateException(
- "prepareCommit was already called with no corresponding call to commit");
+ throw new IllegalStateException("prepareCommit was already called with no corresponding call to commit");
}
-
+
doBeforeFlush();
assert testPoint("startDoFlush");
SegmentInfos toCommit = null;
boolean anySegmentsFlushed = false;
-
+
// This is copied from doFlush, except it's modified to
// clone & incRef the flushed SegmentInfos inside the
// sync block:
-
+
try {
-
+
synchronized (fullFlushLock) {
boolean flushSuccess = false;
boolean success = false;
try {
anySegmentsFlushed = docWriter.flushAllThreads();
if (!anySegmentsFlushed) {
- // prevent double increment since docWriter#doFlush increments the
- // flushcount
+ // prevent double increment since docWriter#doFlush increments the flushcount
// if we flushed anything.
flushCount.incrementAndGet();
}
flushSuccess = true;
-
- synchronized (this) {
+
+ synchronized(this) {
maybeApplyDeletes(true);
-
+
readerPool.commit(segmentInfos);
-
+
// Must clone the segmentInfos while we still
// hold fullFlushLock and while sync'd so that
// no partial changes (eg a delete w/o
// corresponding add from an updateDocument) can
// sneak into the commit point:
toCommit = segmentInfos.clone();
-
+
pendingCommitChangeCount = changeCount;
-
+
// This protects the segmentInfos we are now going
- // to commit. This is important in case, eg, while
+ // to commit. This is important in case, eg, while
// we are trying to sync all referenced files, a
// merge completes which would otherwise have
- // removed the files we are now syncing.
+ // removed the files we are now syncing.
filesToCommit = toCommit.files(directory, false);
deleter.incRef(filesToCommit);
}
@@ -3009,7 +2958,7 @@
} catch (OutOfMemoryError oom) {
handleOOM(oom, "prepareCommit");
}
-
+
boolean success = false;
try {
if (anySegmentsFlushed) {
@@ -3024,17 +2973,17 @@
}
}
}
-
+
startCommit(toCommit);
}
}
/**
* Sets the commit user data map. That method is considered a transaction by
- * {@link IndexWriter} and will be {@link #commit() committed} even if no
- * other changes were made to the writer instance. Note that you must call
- * this method before {@link #prepareCommit()}, or otherwise it won't be
- * included in the follow-on {@link #commit()}.
+ * {@link IndexWriter} and will be {@link #commit() committed} even if no other
+ * changes were made to the writer instance. Note that you must call this method
+ * before {@link #prepareCommit()}, or otherwise it won't be included in the
+ * follow-on {@link #commit()}.
*
* NOTE: the map is cloned internally, therefore altering the map's
* contents after calling this method has no effect.
@@ -3055,34 +3004,34 @@
// Used only by commit and prepareCommit, below; lock
// order is commitLock -> IW
private final Object commitLock = new Object();
-
+
/**
- *
- * Commits all pending changes (added & deleted documents, segment merges,
- * added indexes, etc.) to the index, and syncs all referenced index files,
- * such that a reader will see the changes and the index updates will survive
- * an OS or machine crash or power loss. Note that this does not wait for any
- * running background merges to finish. This may be a costly operation, so you
- * should test the cost in your application and do it only when really
- * necessary.
- *
- *
- *
- * Note that this operation calls Directory.sync on the index files. That call
- * should not return until the file contents & metadata are on stable storage.
- * For FSDirectory, this calls the OS's fsync. But, beware: some hardware
- * devices may in fact cache writes even during fsync, and return before the
- * bits are actually on stable storage, to give the appearance of faster
- * performance. If you have such a device, and it does not have a battery
- * backup (for example) then on power loss it may still lose data. Lucene
- * cannot guarantee consistency on such devices.
- *
- *
- *
- * NOTE: if this method hits an OutOfMemoryError you should immediately
- * close the writer. See above for details.
- *
- *
+ *
Commits all pending changes (added & deleted
+ * documents, segment merges, added
+ * indexes, etc.) to the index, and syncs all referenced
+ * index files, such that a reader will see the changes
+ * and the index updates will survive an OS or machine
+ * crash or power loss. Note that this does not wait for
+ * any running background merges to finish. This may be a
+ * costly operation, so you should test the cost in your
+ * application and do it only when really necessary.
+ *
+ *
Note that this operation calls Directory.sync on
+ * the index files. That call should not return until the
+ * file contents & metadata are on stable storage. For
+ * FSDirectory, this calls the OS's fsync. But, beware:
+ * some hardware devices may in fact cache writes even
+ * during fsync, and return before the bits are actually
+ * on stable storage, to give the appearance of faster
+ * performance. If you have such a device, and it does
+ * not have a battery backup (for example) then on power
+ * loss it may still lose data. Lucene cannot guarantee
+ * consistency on such devices.
+ *
+ *
NOTE: if this method hits an OutOfMemoryError
+ * you should immediately close the writer. See above for details.
+ *
* @see #prepareCommit
*/
@Override
@@ -3090,20 +3039,20 @@
ensureOpen();
commitInternal();
}
-
+
private final void commitInternal() throws IOException {
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: start");
}
-
- synchronized (commitLock) {
+
+ synchronized(commitLock) {
ensureOpen(false);
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: enter lock");
}
-
+
if (pendingCommit == null) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: now prepare");
@@ -3114,13 +3063,13 @@
infoStream.message("IW", "commit: already prepared");
}
}
-
+
finishCommit();
}
}
-
+
private synchronized final void finishCommit() throws IOException {
-
+
if (pendingCommit != null) {
try {
if (infoStream.isEnabled("IW")) {
@@ -3128,8 +3077,7 @@
}
pendingCommit.finishCommit(directory);
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "commit: wrote segments file \""
- + pendingCommit.getSegmentsFileName() + "\"");
+ infoStream.message("IW", "commit: wrote segments file \"" + pendingCommit.getSegmentsFileName() + "\"");
}
lastCommitChangeCount = pendingCommitChangeCount;
segmentInfos.updateGeneration(pendingCommit);
@@ -3140,72 +3088,70 @@
deleter.decRef(filesToCommit);
filesToCommit = null;
pendingCommit = null;
- updatesPending = false;
notifyAll();
}
-
+
} else {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: pendingCommit == null; skip");
}
}
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: done");
}
}
-
+
// Ensures only one flush() is actually flushing segments
// at a time:
private final Object fullFlushLock = new Object();
+ // for assert
+ boolean holdsFullFlushLock() {
+ return Thread.holdsLock(fullFlushLock);
+ }
+
/**
- * Flush all in-memory buffered updates (adds and deletes) to the Directory.
- *
- * @param triggerMerge
- * if true, we may merge segments (if deletes or docs were flushed)
- * if necessary
- * @param applyAllDeletes
- * whether pending deletes should also
+ * Flush all in-memory buffered updates (adds and deletes)
+ * to the Directory.
+ * @param triggerMerge if true, we may merge segments (if
+ * deletes or docs were flushed) if necessary
+ * @param applyAllDeletes whether pending deletes should also
*/
- protected final void flush(boolean triggerMerge, boolean applyAllDeletes)
- throws IOException {
-
+ protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws IOException {
+
// NOTE: this method cannot be sync'd because
// maybeMerge() in turn calls mergeScheduler.merge which
// in turn can take a long time to run and we don't want
- // to hold the lock for that. In the case of
+ // to hold the lock for that. In the case of
// ConcurrentMergeScheduler this can lead to deadlock
// when it stalls due to too many running merges.
-
- // We can be called during close, when closing==true, so we must pass false
- // to ensureOpen:
+
+ // We can be called during close, when closing==true, so we must pass false to ensureOpen:
ensureOpen(false);
if (doFlush(applyAllDeletes) && triggerMerge) {
maybeMerge(MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);
}
}
-
+
private boolean doFlush(boolean applyAllDeletes) throws IOException {
if (hitOOM) {
- throw new IllegalStateException(
- "this writer hit an OutOfMemoryError; cannot flush");
+ throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
}
-
+
doBeforeFlush();
assert testPoint("startDoFlush");
boolean success = false;
try {
-
+
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", " start flush: applyAllDeletes="
- + applyAllDeletes);
+ infoStream.message("IW", " start flush: applyAllDeletes=" + applyAllDeletes);
infoStream.message("IW", " index before flush " + segString());
}
final boolean anySegmentFlushed;
synchronized (fullFlushLock) {
- boolean flushSuccess = false;
+ boolean flushSuccess = false;
try {
anySegmentFlushed = docWriter.flushAllThreads();
flushSuccess = true;
@@ -3213,7 +3159,7 @@
docWriter.finishFullFlush(flushSuccess);
}
}
- synchronized (this) {
+ synchronized(this) {
maybeApplyDeletes(applyAllDeletes);
doAfterFlush();
if (!anySegmentFlushed) {
@@ -3236,32 +3182,27 @@
}
}
- final synchronized void maybeApplyDeletes(boolean applyAllDeletes)
- throws IOException {
+ final synchronized void maybeApplyDeletes(boolean applyAllDeletes) throws IOException {
if (applyAllDeletes) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "apply all deletes during flush");
}
applyAllDeletes();
} else if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "don't apply deletes now delTermCount="
- + bufferedDeletesStream.numTerms() + " bytesUsed="
- + bufferedDeletesStream.bytesUsed());
+ infoStream.message("IW", "don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
}
}
final synchronized void applyAllDeletes() throws IOException {
flushDeletesCount.incrementAndGet();
final BufferedDeletesStream.ApplyDeletesResult result;
- result = bufferedDeletesStream.applyDeletes(readerPool,
- segmentInfos.asList());
+ result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos.asList());
if (result.anyDeletes) {
checkpoint();
}
if (!keepFullyDeletedSegments && result.allDeleted != null) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "drop 100% deleted segments: "
- + segString(result.allDeleted));
+ infoStream.message("IW", "drop 100% deleted segments: " + segString(result.allDeleted));
}
for (SegmentInfoPerCommit info : result.allDeleted) {
// If a merge has already registered for this
@@ -3277,15 +3218,13 @@
}
bufferedDeletesStream.prune(segmentInfos);
}
-
- /**
- * Expert: Return the total size of all index files currently cached in
- * memory. Useful for size management with flushRamDocs()
+
+ /** Expert: Return the total size of all index files currently cached in memory.
+ * Useful for size management with flushRamDocs()
*/
public final long ramSizeInBytes() {
ensureOpen();
- return docWriter.flushControl.netBytes()
- + bufferedDeletesStream.bytesUsed();
+ return docWriter.flushControl.netBytes() + bufferedDeletesStream.bytesUsed();
}
// for testing only
@@ -3294,21 +3233,18 @@
assert test = true;
return test ? docWriter : null;
}
-
- /**
- * Expert: Return the number of documents currently buffered in RAM.
- */
+
+ /** Expert: Return the number of documents currently
+ * buffered in RAM. */
public final synchronized int numRamDocs() {
ensureOpen();
return docWriter.getNumDocs();
}
-
+
private synchronized void ensureValidMerge(MergePolicy.OneMerge merge) {
- for (SegmentInfoPerCommit info : merge.segments) {
+ for(SegmentInfoPerCommit info : merge.segments) {
if (!segmentInfos.contains(info)) {
- throw new MergePolicy.MergeException("MergePolicy selected a segment ("
- + info.info.name + ") that is not in the current index "
- + segString(), directory);
+ throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.info.name + ") that is not in the current index " + segString(), directory);
}
}
}
@@ -3325,19 +3261,18 @@
synchronized private ReadersAndLiveDocs commitMergedDeletes(MergePolicy.OneMerge merge, MergeState mergeState) throws IOException {
assert testPoint("startCommitMergeDeletes");
-
+
final List sourceSegments = merge.segments;
-
+
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "commitMergeDeletes "
- + segString(merge.segments));
+ infoStream.message("IW", "commitMergeDeletes " + segString(merge.segments));
}
-
+
// Carefully merge deletes that occurred after we
// started merging:
int docUpto = 0;
long minGen = Long.MAX_VALUE;
-
+
// Lazy init (only when we find a delete to carry over):
ReadersAndLiveDocs mergedDeletes = null;
MergePolicy.DocMap docMap = null;
@@ -3350,35 +3285,35 @@
final Bits currentLiveDocs;
final ReadersAndLiveDocs rld = readerPool.get(info, false);
// We hold a ref so it should still be in the pool:
- assert rld != null : "seg=" + info.info.name;
+ assert rld != null: "seg=" + info.info.name;
currentLiveDocs = rld.getLiveDocs();
-
+
if (prevLiveDocs != null) {
-
+
// If we had deletions on starting the merge we must
// still have deletions now:
assert currentLiveDocs != null;
assert prevLiveDocs.length() == docCount;
assert currentLiveDocs.length() == docCount;
-
+
// There were deletes on this segment when the merge
- // started. The merge has collapsed away those
+ // started. The merge has collapsed away those
// deletes, but, if new deletes were flushed since
// the merge started, we must now carefully keep any
// newly flushed deletes but mapping them to the new
// docIDs.
-
+
// Since we copy-on-write, if any new deletes were
// applied after merging has started, we can just
// check if the before/after liveDocs have changed.
// If so, we must carefully merge the liveDocs one
// doc at a time:
if (currentLiveDocs != prevLiveDocs) {
-
+
// This means this segment received new deletes
// since we started the merge, so we
// must merge them:
- for (int j = 0; j < docCount; j++) {
+ for(int j=0;j 0 || dropSegment;
-
- assert merge.info.info.getDocCount() != 0 || keepFullyDeletedSegments
- || dropSegment;
-
+
+ assert merge.info.info.getDocCount() != 0 || keepFullyDeletedSegments || dropSegment;
+
segmentInfos.applyMergeChanges(merge, dropSegment);
-
+
if (mergedDeletes != null) {
if (dropSegment) {
mergedDeletes.dropChanges();
}
readerPool.release(mergedDeletes);
}
-
+
if (dropSegment) {
assert !segmentInfos.contains(merge.info);
readerPool.drop(merge.info);
deleter.deleteNewFiles(merge.info.files());
}
-
+
boolean success = false;
try {
// Must close before checkpoint, otherwise IFD won't be
@@ -3535,38 +3463,35 @@
}
}
}
-
+
deleter.deletePendingFiles();
- deleter.deletePendingFiles();
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "after commitMerge: " + segString());
}
-
+
if (merge.maxNumSegments != -1 && !dropSegment) {
// cascade the forceMerge:
if (!segmentsToMerge.containsKey(merge.info)) {
segmentsToMerge.put(merge.info, Boolean.FALSE);
}
}
-
+
return true;
}
-
- final private void handleMergeException(Throwable t,
- MergePolicy.OneMerge merge) throws IOException {
-
+
+ final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
+
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "handleMergeException: merge="
- + segString(merge.segments) + " exc=" + t);
+ infoStream.message("IW", "handleMergeException: merge=" + segString(merge.segments) + " exc=" + t);
}
-
+
// Set the exception on the merge, so if
// forceMerge is waiting on us it sees the root
// cause exception:
merge.setException(t);
addMergeException(merge);
-
+
if (t instanceof MergePolicy.MergeAbortedException) {
// We can ignore this exception (it happens when
// close(false) or rollback is called), unless the
@@ -3574,40 +3499,43 @@
// in which case we must throw it so, for example, the
// rollbackTransaction code in addIndexes* is
// executed.
- if (merge.isExternal) throw (MergePolicy.MergeAbortedException) t;
- } else if (t instanceof IOException) throw (IOException) t;
- else if (t instanceof RuntimeException) throw (RuntimeException) t;
- else if (t instanceof Error) throw (Error) t;
+ if (merge.isExternal)
+ throw (MergePolicy.MergeAbortedException) t;
+ } else if (t instanceof IOException)
+ throw (IOException) t;
+ else if (t instanceof RuntimeException)
+ throw (RuntimeException) t;
+ else if (t instanceof Error)
+ throw (Error) t;
else
- // Should not get here
- throw new RuntimeException(t);
+ // Should not get here
+ throw new RuntimeException(t);
}
-
+
/**
- * Merges the indicated segments, replacing them in the stack with a single
- * segment.
+ * Merges the indicated segments, replacing them in the stack with a
+ * single segment.
*
* @lucene.experimental
*/
public void merge(MergePolicy.OneMerge merge) throws IOException {
-
+
boolean success = false;
-
+
final long t0 = System.currentTimeMillis();
-
+
try {
try {
try {
mergeInit(merge);
- // if (merge.info != null) {
- // System.out.println("MERGE: " + merge.info.info.name);
- // }
-
+ //if (merge.info != null) {
+ //System.out.println("MERGE: " + merge.info.info.name);
+ //}
+
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "now merge\n merge="
- + segString(merge.segments) + "\n index=" + segString());
+ infoStream.message("IW", "now merge\n merge=" + segString(merge.segments) + "\n index=" + segString());
}
-
+
mergeMiddle(merge);
mergeSuccess(merge);
success = true;
@@ -3615,9 +3543,9 @@
handleMergeException(t, merge);
}
} finally {
- synchronized (this) {
+ synchronized(this) {
mergeFinish(merge);
-
+
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception during merge");
@@ -3626,14 +3554,12 @@
deleter.refresh(merge.info.info.name);
}
}
-
+
// This merge (and, generally, any change to the
// segments) may now enable new merges, so we call
// merge policy & update pending merges.
- if (success && !merge.isAborted()
- && (merge.maxNumSegments != -1 || (!closed && !closing))) {
- updatePendingMerges(MergeTrigger.MERGE_FINISHED,
- merge.maxNumSegments);
+ if (success && !merge.isAborted() && (merge.maxNumSegments != -1 || (!closed && !closing))) {
+ updatePendingMerges(MergeTrigger.MERGE_FINISHED, merge.maxNumSegments);
}
}
}
@@ -3642,52 +3568,44 @@
}
if (merge.info != null && !merge.isAborted()) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "merge time "
- + (System.currentTimeMillis() - t0) + " msec for "
- + merge.info.info.getDocCount() + " docs");
+ infoStream.message("IW", "merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.info.getDocCount() + " docs");
}
}
}
-
+
/** Hook that's called when the specified merge is complete. */
- void mergeSuccess(MergePolicy.OneMerge merge) {}
-
- /**
- * Checks whether this merge involves any segments already participating in a
- * merge. If not, this merge is "registered", meaning we record that its
- * segments are now participating in a merge, and true is returned. Else (the
- * merge conflicts) false is returned.
- */
- final synchronized boolean registerMerge(MergePolicy.OneMerge merge)
- throws IOException {
-
+ void mergeSuccess(MergePolicy.OneMerge merge) {
+ }
+
+ /** Checks whether this merge involves any segments
+ * already participating in a merge. If not, this merge
+ * is "registered", meaning we record that its segments
+ * are now participating in a merge, and true is
+ * returned. Else (the merge conflicts) false is
+ * returned. */
+ final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws IOException {
+
if (merge.registerDone) {
return true;
}
assert merge.segments.size() > 0;
-
+
if (stopMerges) {
merge.abort();
- throw new MergePolicy.MergeAbortedException("merge is aborted: "
- + segString(merge.segments));
+ throw new MergePolicy.MergeAbortedException("merge is aborted: " + segString(merge.segments));
}
-
+
boolean isExternal = false;
- for (SegmentInfoPerCommit info : merge.segments) {
+ for(SegmentInfoPerCommit info : merge.segments) {
if (mergingSegments.contains(info)) {
if (infoStream.isEnabled("IW")) {
- infoStream
- .message("IW", "reject merge " + segString(merge.segments)
- + ": segment " + segString(info)
- + " is already marked for merge");
+ infoStream.message("IW", "reject merge " + segString(merge.segments) + ": segment " + segString(info) + " is already marked for merge");
}
return false;
}
if (!segmentInfos.contains(info)) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "reject merge " + segString(merge.segments)
- + ": segment " + segString(info)
- + " does not exist in live infos");
+ infoStream.message("IW", "reject merge " + segString(merge.segments) + ": segment " + segString(info) + " does not exist in live infos");
}
return false;
}
@@ -3698,20 +3616,18 @@
merge.maxNumSegments = mergeMaxNumSegments;
}
}
-
+
ensureValidMerge(merge);
-
+
pendingMerges.add(merge);
-
+
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "add merge to pendingMerges: "
- + segString(merge.segments) + " [total " + pendingMerges.size()
- + " pending]");
+ infoStream.message("IW", "add merge to pendingMerges: " + segString(merge.segments) + " [total " + pendingMerges.size() + " pending]");
}
-
+
merge.mergeGen = mergeGen;
merge.isExternal = isExternal;
-
+
// OK it does not conflict; now record that this merge
// is running (while synchronized) to avoid race
// condition where two conflicting merges from different
@@ -3719,23 +3635,22 @@
if (infoStream.isEnabled("IW")) {
StringBuilder builder = new StringBuilder("registerMerge merging= [");
for (SegmentInfoPerCommit info : mergingSegments) {
- builder.append(info.info.name).append(", ");
+ builder.append(info.info.name).append(", ");
}
builder.append("]");
- // don't call mergingSegments.toString() could lead to
- // ConcurrentModException
+ // don't call mergingSegments.toString() could lead to ConcurrentModException
// since merge updates the segments FieldInfos
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", builder.toString());
+ infoStream.message("IW", builder.toString());
}
}
- for (SegmentInfoPerCommit info : merge.segments) {
+ for(SegmentInfoPerCommit info : merge.segments) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "registerMerge info=" + segString(info));
}
mergingSegments.add(info);
}
-
+
assert merge.estimatedMergeBytes == 0;
assert merge.totalMergeBytes == 0;
for(SegmentInfoPerCommit info : merge.segments) {
@@ -3750,16 +3665,13 @@
// Merge is now registered
merge.registerDone = true;
-
+
return true;
}
-
- /**
- * Does initial setup for a merge, which is fast but holds the synchronized
- * lock on IndexWriter instance.
- */
- final synchronized void mergeInit(MergePolicy.OneMerge merge)
- throws IOException {
+
+ /** Does initial setup for a merge, which is fast but holds
+ * the synchronized lock on IndexWriter instance. */
+ final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
boolean success = false;
try {
_mergeInit(merge);
@@ -3773,48 +3685,44 @@
}
}
}
-
- synchronized private void _mergeInit(MergePolicy.OneMerge merge)
- throws IOException {
-
+
+ synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException {
+
assert testPoint("startMergeInit");
-
+
assert merge.registerDone;
assert merge.maxNumSegments == -1 || merge.maxNumSegments > 0;
-
+
if (hitOOM) {
- throw new IllegalStateException(
- "this writer hit an OutOfMemoryError; cannot merge");
+ throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
}
-
+
if (merge.info != null) {
// mergeInit already done
return;
}
-
+
if (merge.isAborted()) {
return;
}
-
+
// TODO: in the non-pool'd case this is somewhat
// wasteful, because we open these readers, close them,
- // and then open them again for merging. Maybe we
+ // and then open them again for merging. Maybe we
// could pre-pool them somehow in that case...
-
+
// Lock order: IW -> BD
- final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
- .applyDeletes(readerPool, merge.segments);
-
+ final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
+
if (result.anyDeletes) {
checkpoint();
}
-
+
if (!keepFullyDeletedSegments && result.allDeleted != null) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "drop 100% deleted segments: "
- + result.allDeleted);
+ infoStream.message("IW", "drop 100% deleted segments: " + result.allDeleted);
}
- for (SegmentInfoPerCommit info : result.allDeleted) {
+ for(SegmentInfoPerCommit info : result.allDeleted) {
segmentInfos.remove(info);
if (merge.segments.contains(info)) {
mergingSegments.remove(info);
@@ -3824,7 +3732,7 @@
}
checkpoint();
}
-
+
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
@@ -3838,19 +3746,17 @@
// Lock order: IW -> BD
bufferedDeletesStream.prune(segmentInfos);
-
+
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "merge seg=" + merge.info.info.name + " "
- + segString(merge.segments));
+ infoStream.message("IW", "merge seg=" + merge.info.info.name + " " + segString(merge.segments));
}
}
-
+
static void setDiagnostics(SegmentInfo info, String source) {
setDiagnostics(info, source, null);
}
-
- private static void setDiagnostics(SegmentInfo info, String source,
- Map details) {
+
+ private static void setDiagnostics(SegmentInfo info, String source, Map details) {
Map diagnostics = new HashMap();
diagnostics.put("source", source);
diagnostics.put("lucene.version", Constants.LUCENE_VERSION);
@@ -3865,43 +3771,39 @@
}
info.setDiagnostics(diagnostics);
}
-
- /**
- * Does fininishing for a merge, which is fast but holds the synchronized lock
- * on IndexWriter instance.
- */
+
+ /** Does fininishing for a merge, which is fast but holds
+ * the synchronized lock on IndexWriter instance. */
final synchronized void mergeFinish(MergePolicy.OneMerge merge) {
-
+
// forceMerge, addIndexes or finishMerges may be waiting
// on merges to finish.
notifyAll();
-
+
// It's possible we are called twice, eg if there was an
// exception inside mergeInit
if (merge.registerDone) {
final List sourceSegments = merge.segments;
- for (SegmentInfoPerCommit info : sourceSegments) {
+ for(SegmentInfoPerCommit info : sourceSegments) {
mergingSegments.remove(info);
}
merge.registerDone = false;
}
-
+
runningMerges.remove(merge);
}
-
- private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge,
- boolean suppressExceptions) throws IOException {
+
+ private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
final int numSegments = merge.readers.size();
Throwable th = null;
-
+
boolean drop = !suppressExceptions;
for (int i = 0; i < numSegments; i++) {
final SegmentReader sr = merge.readers.get(i);
if (sr != null) {
try {
- final ReadersAndLiveDocs rld = readerPool.get(sr.getSegmentInfo(),
- false);
+ final ReadersAndLiveDocs rld = readerPool.get(sr.getSegmentInfo(), false);
// We still hold a ref so it should not have been removed:
assert rld != null;
if (drop) {
@@ -3929,17 +3831,16 @@
throw new RuntimeException(th);
}
}
-
- /**
- * Does the actual (time-consuming) work of the merge, but without holding
- * synchronized lock on IndexWriter instance
- */
+
+ /** Does the actual (time-consuming) work of the merge,
+ * but without holding synchronized lock on IndexWriter
+ * instance */
private int mergeMiddle(MergePolicy.OneMerge merge) throws IOException {
-
+
merge.checkAborted(directory);
-
+
final String mergedName = merge.info.info.name;
-
+
List sourceSegments = merge.segments;
IOContext context = new IOContext(merge.getMergeInfo());
@@ -3950,52 +3851,48 @@
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "merging " + segString(merge.segments));
}
-
+
merge.readers = new ArrayList();
-
+
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
try {
int segUpto = 0;
- while (segUpto < sourceSegments.size()) {
-
+ while(segUpto < sourceSegments.size()) {
+
final SegmentInfoPerCommit info = sourceSegments.get(segUpto);
-
+
// Hold onto the "live" reader; we will use this to
// commit merged deletes
final ReadersAndLiveDocs rld = readerPool.get(info, true);
SegmentReader reader = rld.getMergeReader(context);
assert reader != null;
-
+
// Carefully pull the most recent live docs:
final Bits liveDocs;
final int delCount;
-
- synchronized (this) {
+
+ synchronized(this) {
// Must sync to ensure BufferedDeletesStream
// cannot change liveDocs/pendingDeleteCount while
// we pull a copy:
liveDocs = rld.getReadOnlyLiveDocs();
delCount = rld.getPendingDeleteCount() + info.getDelCount();
-
+
assert rld.verifyDocCounts();
-
+
if (infoStream.isEnabled("IW")) {
if (rld.getPendingDeleteCount() != 0) {
- infoStream.message("IW",
- "seg=" + segString(info) + " delCount=" + info.getDelCount()
- + " pendingDelCount=" + rld.getPendingDeleteCount());
+ infoStream.message("IW", "seg=" + segString(info) + " delCount=" + info.getDelCount() + " pendingDelCount=" + rld.getPendingDeleteCount());
} else if (info.getDelCount() != 0) {
- infoStream.message("IW", "seg=" + segString(info) + " delCount="
- + info.getDelCount());
+ infoStream.message("IW", "seg=" + segString(info) + " delCount=" + info.getDelCount());
} else {
- infoStream
- .message("IW", "seg=" + segString(info) + " no deletes");
+ infoStream.message("IW", "seg=" + segString(info) + " no deletes");
}
}
}
-
+
// Deletes might have happened after we pulled the merge reader and
// before we got a read-only copy of the segment's actual live docs
// (taking pending deletes into account). In that case we need to
@@ -4015,18 +3912,15 @@
newReader.decRef();
}
}
-
+
reader = newReader;
}
-
+
merge.readers.add(reader);
- assert delCount <= info.info.getDocCount() : "delCount=" + delCount
- + " info.docCount=" + info.info.getDocCount()
- + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount()
- + " info.getDelCount()=" + info.getDelCount();
+ assert delCount <= info.info.getDocCount(): "delCount=" + delCount + " info.docCount=" + info.info.getDocCount() + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount();
segUpto++;
}
-
+
// we pass merge.getMergeReaders() instead of merge.readers to allow the
// OneMerge to return a view over the actual segments to merge
final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(),
@@ -4034,7 +3928,7 @@
checkAbort, globalFieldNumberMap, context);
merge.checkAborted(directory);
-
+
// This is where all the work happens:
MergeState mergeState;
boolean success3 = false;
@@ -4043,54 +3937,45 @@
success3 = true;
} finally {
if (!success3) {
- synchronized (this) {
+ synchronized(this) {
deleter.refresh(merge.info.info.name);
}
}
}
assert mergeState.segmentInfo == merge.info.info;
- merge.info.info
- .setFiles(new HashSet(dirWrapper.getCreatedFiles()));
-
+ merge.info.info.setFiles(new HashSet(dirWrapper.getCreatedFiles()));
+
// Record which codec was used to write the segment
-
+
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "merge codec="
- + codec
- + " docCount="
- + merge.info.info.getDocCount()
- + "; merged segment has "
- + (mergeState.fieldInfos.hasVectors() ? "vectors" : "no vectors")
- + "; "
- + (mergeState.fieldInfos.hasNorms() ? "norms" : "no norms")
- + "; "
- + (mergeState.fieldInfos.hasDocValues() ? "docValues"
- : "no docValues") + "; "
- + (mergeState.fieldInfos.hasProx() ? "prox" : "no prox") + "; "
- + (mergeState.fieldInfos.hasProx() ? "freqs" : "no freqs"));
+ infoStream.message("IW", "merge codec=" + codec + " docCount=" + merge.info.info.getDocCount() + "; merged segment has " +
+ (mergeState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " +
+ (mergeState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " +
+ (mergeState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " +
+ (mergeState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " +
+ (mergeState.fieldInfos.hasProx() ? "freqs" : "no freqs"));
}
-
+
// Very important to do this before opening the reader
// because codec must know if prox was written for
// this segment:
- // System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" +
- // merge.info.name);
+ //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
boolean useCompoundFile;
synchronized (this) { // Guard segmentInfos
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
}
-
+
if (useCompoundFile) {
success = false;
-
+
Collection filesToRemove = merge.info.files();
-
+
try {
filesToRemove = createCompoundFile(infoStream, directory, checkAbort,
merge.info.info, context, -1);
success = true;
} catch (IOException ioe) {
- synchronized (this) {
+ synchronized(this) {
if (merge.isAborted()) {
// This can happen if rollback or close(false)
// is called -- fall through to logic below to
@@ -4104,43 +3989,38 @@
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW",
- "hit exception creating compound file during merge");
+ infoStream.message("IW", "hit exception creating compound file during merge");
}
-
- synchronized (this) {
- deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "",
- IndexFileNames.COMPOUND_FILE_EXTENSION));
- deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "",
- IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
+
+ synchronized(this) {
+ deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
+ deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
deleter.deleteNewFiles(merge.info.files());
}
}
}
-
+
// So that, if we hit exc in deleteNewFiles (next)
// or in commitMerge (later), we close the
// per-segment readers in the finally clause below:
success = false;
-
- synchronized (this) {
-
+
+ synchronized(this) {
+
// delete new non cfs files directly: they were never
// registered with IFD
deleter.deleteNewFiles(filesToRemove);
-
+
if (merge.isAborted()) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "abort merge after building CFS");
}
- deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "",
- IndexFileNames.COMPOUND_FILE_EXTENSION));
- deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "",
- IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
+ deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
+ deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
return 0;
}
}
-
+
merge.info.info.setUseCompoundFile(true);
} else {
// So that, if we hit exc in commitMerge (later),
@@ -4148,8 +4028,8 @@
// clause below:
success = false;
}
-
- // Have codec write SegmentInfo. Must do this after
+
+ // Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
@@ -4162,48 +4042,43 @@
success2 = true;
} finally {
if (!success2) {
- synchronized (this) {
+ synchronized(this) {
deleter.deleteNewFiles(merge.info.files());
}
}
}
-
+
// TODO: ideally we would freeze merge.info here!!
// because any changes after writing the .si will be
- // lost...
-
+ // lost...
+
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", String.format(Locale.ROOT,
- "merged segment size=%.3f MB vs estimate=%.3f MB",
- merge.info.sizeInBytes() / 1024. / 1024.,
- merge.estimatedMergeBytes / 1024 / 1024.));
+ infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
}
-
- final IndexReaderWarmer mergedSegmentWarmer = config
- .getMergedSegmentWarmer();
- if (poolReaders && mergedSegmentWarmer != null
- && merge.info.info.getDocCount() != 0) {
+
+ final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
+ if (poolReaders && mergedSegmentWarmer != null && merge.info.info.getDocCount() != 0) {
final ReadersAndLiveDocs rld = readerPool.get(merge.info, true);
final SegmentReader sr = rld.getReader(IOContext.READ);
try {
mergedSegmentWarmer.warm(sr);
} finally {
- synchronized (this) {
+ synchronized(this) {
rld.release(sr);
readerPool.release(rld);
}
}
}
-
+
// Force READ context because we merge deletes onto
// this reader:
if (!commitMerge(merge, mergeState)) {
// commitMerge will return false if this merge was aborted
return 0;
}
-
+
success = true;
-
+
} finally {
// Readers are already closed in commitMerge if we didn't hit
// an exc:
@@ -4211,50 +4086,47 @@
closeMergeReaders(merge, true);
}
}
-
+
return merge.info.info.getDocCount();
}
-
+
synchronized void addMergeException(MergePolicy.OneMerge merge) {
assert merge.getException() != null;
if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen) {
mergeExceptions.add(merge);
}
}
-
+
// For test purposes.
final int getBufferedDeleteTermsSize() {
return docWriter.getBufferedDeleteTermsSize();
}
-
+
// For test purposes.
final int getNumBufferedDeleteTerms() {
return docWriter.getNumBufferedDeleteTerms();
}
-
+
// utility routines for tests
synchronized SegmentInfoPerCommit newestSegment() {
- return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size() - 1)
- : null;
+ return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
}
-
- /**
- * Returns a string description of all segments, for debugging.
- *
- * @lucene.internal
- */
+
+ /** Returns a string description of all segments, for
+ * debugging.
+ *
+ * @lucene.internal */
public synchronized String segString() {
return segString(segmentInfos);
}
-
- /**
- * Returns a string description of the specified segments, for debugging.
- *
- * @lucene.internal
- */
+
+ /** Returns a string description of the specified
+ * segments, for debugging.
+ *
+ * @lucene.internal */
public synchronized String segString(Iterable infos) {
final StringBuilder buffer = new StringBuilder();
- for (final SegmentInfoPerCommit info : infos) {
+ for(final SegmentInfoPerCommit info : infos) {
if (buffer.length() > 0) {
buffer.append(' ');
}
@@ -4262,17 +4134,15 @@
}
return buffer.toString();
}
-
- /**
- * Returns a string description of the specified segment, for debugging.
- *
- * @lucene.internal
- */
+
+ /** Returns a string description of the specified
+ * segment, for debugging.
+ *
+ * @lucene.internal */
public synchronized String segString(SegmentInfoPerCommit info) {
- return info.toString(info.info.dir,
- numDeletedDocs(info) - info.getDelCount());
+ return info.toString(info.info.dir, numDeletedDocs(info) - info.getDelCount());
}
-
+
private synchronized void doWait() {
// NOTE: the callers of this method should in theory
// be able to do simply wait(), but, as a defense
@@ -4286,128 +4156,120 @@
throw new ThreadInterruptedException(ie);
}
}
-
+
private boolean keepFullyDeletedSegments;
-
- /**
- * Only for testing.
- *
- * @lucene.internal
- */
+
+ /** Only for testing.
+ *
+ * @lucene.internal */
void keepFullyDeletedSegments() {
keepFullyDeletedSegments = true;
}
-
+
boolean getKeepFullyDeletedSegments() {
return keepFullyDeletedSegments;
}
-
+
// called only from assert
private boolean filesExist(SegmentInfos toSync) throws IOException {
Collection files = toSync.files(directory, false);
- for (final String fileName : files) {
- assert directory.fileExists(fileName) : "file " + fileName
- + " does not exist";
+ for(final String fileName: files) {
+ assert directory.fileExists(fileName): "file " + fileName + " does not exist";
// If this trips it means we are missing a call to
// .checkpoint somewhere, because by the time we
// are called, deleter should know about every
// file referenced by the current head
// segmentInfos:
- assert deleter.exists(fileName) : "IndexFileDeleter doesn't know about file "
- + fileName;
+ assert deleter.exists(fileName): "IndexFileDeleter doesn't know about file " + fileName;
}
return true;
}
-
+
// For infoStream output
synchronized SegmentInfos toLiveInfos(SegmentInfos sis) {
final SegmentInfos newSIS = new SegmentInfos();
- final Map liveSIS = new HashMap();
- for (SegmentInfoPerCommit info : segmentInfos) {
+ final Map liveSIS = new HashMap();
+ for(SegmentInfoPerCommit info : segmentInfos) {
liveSIS.put(info, info);
}
- for (SegmentInfoPerCommit info : sis) {
+ for(SegmentInfoPerCommit info : sis) {
SegmentInfoPerCommit liveInfo = liveSIS.get(info);
if (liveInfo != null) {
info = liveInfo;
}
newSIS.add(info);
}
-
+
return newSIS;
}
-
- /**
- * Walk through all files referenced by the current segmentInfos and ask the
- * Directory to sync each file, if it wasn't already. If that succeeds, then
- * we prepare a new segments_N file but do not fully commit it.
- */
+
+ /** Walk through all files referenced by the current
+ * segmentInfos and ask the Directory to sync each file,
+ * if it wasn't already. If that succeeds, then we
+ * prepare a new segments_N file but do not fully commit
+ * it. */
private void startCommit(final SegmentInfos toSync) throws IOException {
-
+
assert testPoint("startStartCommit");
assert pendingCommit == null;
-
+
if (hitOOM) {
- throw new IllegalStateException(
- "this writer hit an OutOfMemoryError; cannot commit");
+ throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
}
-
+
try {
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "startCommit(): start");
}
-
- synchronized (this) {
-
- assert lastCommitChangeCount <= changeCount : "lastCommitChangeCount="
- + lastCommitChangeCount + " changeCount=" + changeCount;
-
+
+ synchronized(this) {
+
+ assert lastCommitChangeCount <= changeCount: "lastCommitChangeCount=" + lastCommitChangeCount + " changeCount=" + changeCount;
+
if (pendingCommitChangeCount == lastCommitChangeCount) {
if (infoStream.isEnabled("IW")) {
- infoStream
- .message("IW", " skip startCommit(): no changes pending");
+ infoStream.message("IW", " skip startCommit(): no changes pending");
}
deleter.decRef(filesToCommit);
filesToCommit = null;
return;
}
-
+
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "startCommit index="
- + segString(toLiveInfos(toSync)) + " changeCount=" + changeCount);
+ infoStream.message("IW", "startCommit index=" + segString(toLiveInfos(toSync)) + " changeCount=" + changeCount);
}
-
+
assert filesExist(toSync);
}
-
+
assert testPoint("midStartCommit");
-
+
boolean pendingCommitSet = false;
-
+
try {
-
+
assert testPoint("midStartCommit2");
-
- synchronized (this) {
-
+
+ synchronized(this) {
+
assert pendingCommit == null;
-
+
assert segmentInfos.getGeneration() == toSync.getGeneration();
-
+
// Exception here means nothing is prepared
// (this method unwinds everything it did on
// an exception)
toSync.prepareCommit(directory);
- // System.out.println("DONE prepareCommit");
-
+ //System.out.println("DONE prepareCommit");
+
pendingCommitSet = true;
pendingCommit = toSync;
}
-
+
// This call can take a long time -- 10s of seconds
- // or more. We do it without syncing on this:
+ // or more. We do it without syncing on this:
boolean success = false;
final Collection filesToSync;
try {
@@ -4421,27 +4283,26 @@
toSync.rollbackCommit(directory);
}
}
-
+
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "done all syncs: " + filesToSync);
}
-
+
assert testPoint("midStartCommitSuccess");
-
+
} finally {
- synchronized (this) {
+ synchronized(this) {
// Have our master segmentInfos record the
- // generations we just prepared. We do this
+ // generations we just prepared. We do this
// on error or success so we don't
// double-write a segments_N file.
segmentInfos.updateGeneration(toSync);
-
+
if (!pendingCommitSet) {
if (infoStream.isEnabled("IW")) {
- infoStream
- .message("IW", "hit exception committing segments file");
+ infoStream.message("IW", "hit exception committing segments file");
}
-
+
// Hit exception
deleter.decRef(filesToCommit);
filesToCommit = null;
@@ -4453,60 +4314,54 @@
}
assert testPoint("finishStartCommit");
}
-
+
/**
- * Returns true iff the index in the named directory is currently
- * locked.
- *
- * @param directory
- * the directory to check for a lock
- * @throws IOException
- * if there is a low-level IO error
+ * Returns true iff the index in the named directory is
+ * currently locked.
+ * @param directory the directory to check for a lock
+ * @throws IOException if there is a low-level IO error
*/
public static boolean isLocked(Directory directory) throws IOException {
return directory.makeLock(WRITE_LOCK_NAME).isLocked();
}
-
+
/**
* Forcibly unlocks the index in the named directory.
*
- * Caution: this should only be used by failure recovery code, when it is
- * known that no other process nor thread is in fact currently accessing this
- * index.
+ * Caution: this should only be used by failure recovery code,
+ * when it is known that no other process nor thread is in fact
+ * currently accessing this index.
*/
public static void unlock(Directory directory) throws IOException {
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
}
-
- /**
- * If {@link DirectoryReader#open(IndexWriter,boolean)} has been called (ie,
- * this writer is in near real-time mode), then after a merge completes, this
- * class can be invoked to warm the reader on the newly merged segment, before
- * the merge commits. This is not required for near real-time search, but will
- * reduce search latency on opening a new near real-time reader after a merge
- * completes.
- *
+
+ /** If {@link DirectoryReader#open(IndexWriter,boolean)} has
+ * been called (ie, this writer is in near real-time
+ * mode), then after a merge completes, this class can be
+ * invoked to warm the reader on the newly merged
+ * segment, before the merge commits. This is not
+ * required for near real-time search, but will reduce
+ * search latency on opening a new near real-time reader
+ * after a merge completes.
+ *
* @lucene.experimental
- *
- *
- * NOTE: warm is called before any deletes have
- * been carried over to the merged segment.
- */
+ *
+ *
NOTE: warm is called before any deletes have
+ * been carried over to the merged segment. */
public static abstract class IndexReaderWarmer {
-
- /**
- * Sole constructor. (For invocation by subclass constructors, typically
- * implicit.)
- */
- protected IndexReaderWarmer() {}
-
- /**
- * Invoked on the {@link AtomicReader} for the newly merged segment, before
- * that segment is made visible to near-real-time readers.
- */
+
+ /** Sole constructor. (For invocation by subclass
+ * constructors, typically implicit.) */
+ protected IndexReaderWarmer() {
+ }
+
+ /** Invoked on the {@link AtomicReader} for the newly
+ * merged segment, before that segment is made visible
+ * to near-real-time readers. */
public abstract void warm(AtomicReader reader) throws IOException;
}
-
+
private void handleOOM(OutOfMemoryError oom, String location) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit OutOfMemoryError inside " + location);
@@ -4514,72 +4369,66 @@
hitOOM = true;
throw oom;
}
-
- // Used only by assert for testing. Current points:
- // startDoFlush
- // startCommitMerge
- // startStartCommit
- // midStartCommit
- // midStartCommit2
- // midStartCommitSuccess
- // finishStartCommit
- // startCommitMergeDeletes
- // startMergeInit
- // DocumentsWriter.ThreadState.init start
+
+ // Used only by assert for testing. Current points:
+ // startDoFlush
+ // startCommitMerge
+ // startStartCommit
+ // midStartCommit
+ // midStartCommit2
+ // midStartCommitSuccess
+ // finishStartCommit
+ // startCommitMergeDeletes
+ // startMergeInit
+ // DocumentsWriter.ThreadState.init start
boolean testPoint(String name) {
return true;
}
-
+
synchronized boolean nrtIsCurrent(SegmentInfos infos) {
- // System.out.println("IW.nrtIsCurrent " + (infos.version ==
- // segmentInfos.version && !docWriter.anyChanges() &&
- // !bufferedDeletesStream.any()));
+ //System.out.println("IW.nrtIsCurrent " + (infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any()));
ensureOpen();
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW",
- "nrtIsCurrent: infoVersion matches: "
- + (infos.version == segmentInfos.version) + " DW changes: "
- + docWriter.anyChanges() + " BD changes: "
- + bufferedDeletesStream.any());
+ infoStream.message("IW", "nrtIsCurrent: infoVersion matches: " + (infos.version == segmentInfos.version) + "; DW changes: " + docWriter.anyChanges() + "; BD changes: "+ bufferedDeletesStream.any());
}
- return infos.version == segmentInfos.version && !docWriter.anyChanges()
- && !bufferedDeletesStream.any();
+ return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
}
-
+
synchronized boolean isClosed() {
return closed;
}
-
- /**
- * Expert: remove any index files that are no longer used.
- *
- *
- * IndexWriter normally deletes unused files itself, during indexing. However,
- * on Windows, which disallows deletion of open files, if there is a reader
- * open on the index then those files cannot be deleted. This is fine, because
- * IndexWriter will periodically retry the deletion.
- *
- *
- *
- * However, IndexWriter doesn't try that often: only on open, close, flushing
- * a new segment, and finishing a merge. If you don't do any of these actions
- * with your IndexWriter, you'll see the unused files linger. If that's a
- * problem, call this method to delete them (once you've closed the open
- * readers that were preventing their deletion).
- *
- *
- * In addition, you can call this method to delete unreferenced index commits.
- * This might be useful if you are using an {@link IndexDeletionPolicy} which
- * holds onto index commits until some criteria are met, but those commits are
- * no longer needed. Otherwise, those commits will be deleted the next time
- * commit() is called.
+
+ /** Expert: remove any index files that are no longer
+ * used.
+ *
+ *
IndexWriter normally deletes unused files itself,
+ * during indexing. However, on Windows, which disallows
+ * deletion of open files, if there is a reader open on
+ * the index then those files cannot be deleted. This is
+ * fine, because IndexWriter will periodically retry
+ * the deletion.
+ *
+ *
However, IndexWriter doesn't try that often: only
+ * on open, close, flushing a new segment, and finishing
+ * a merge. If you don't do any of these actions with your
+ * IndexWriter, you'll see the unused files linger. If
+ * that's a problem, call this method to delete them
+ * (once you've closed the open readers that were
+ * preventing their deletion).
+ *
+ *
In addition, you can call this method to delete
+ * unreferenced index commits. This might be useful if you
+ * are using an {@link IndexDeletionPolicy} which holds
+ * onto index commits until some criteria are met, but those
+ * commits are no longer needed. Otherwise, those commits will
+ * be deleted the next time commit() is called.
*/
public synchronized void deleteUnusedFiles() throws IOException {
ensureOpen(false);
deleter.deletePendingFiles();
deleter.revisitPolicy();
}
-
+
// Called by DirectoryReader.doClose
synchronized void deletePendingFiles() throws IOException {
deleter.deletePendingFiles();
@@ -4657,7 +4506,7 @@
directory.copy(cfsDir, file, file, context);
checkAbort.work(directory.fileLength(file));
}
- } catch (IOException ex) {
+ } catch(IOException ex) {
prior = ex;
} finally {
boolean success = false;
@@ -4668,14 +4517,15 @@
if (!success) {
try {
directory.deleteFile(fileName);
- } catch (Throwable t) {}
+ } catch (Throwable t) {
+ }
try {
directory.deleteFile(cfeFileName);
} catch (Throwable t) {}
}
}
}
-
+
// Replace all previous files with the CFS/CFE files:
Set siFiles = new HashSet();
siFiles.addAll(info.files());
@@ -4683,29 +4533,23 @@
siFiles.add(fileName);
siFiles.add(cfeFileName);
info.setFiles(siFiles);
-
+
return files;
}
/**
* Tries to delete the given files if unreferenced
- *
- * @param files
- * the files to delete
- * @throws IOException
- * if an {@link IOException} occurs
+ * @param files the files to delete
+ * @throws IOException if an {@link IOException} occurs
* @see IndexFileDeleter#deleteNewFiles(Collection)
*/
- synchronized final void deleteNewFiles(Collection files)
- throws IOException {
+ synchronized final void deleteNewFiles(Collection files) throws IOException {
deleter.deleteNewFiles(files);
}
/**
- * Cleans up residuals from a segment that could not be entirely flushed due
- * to an error
- *
- * @see IndexFileDeleter#refresh(String)
+ * Cleans up residuals from a segment that could not be entirely flushed due to an error
+ * @see IndexFileDeleter#refresh(String)
*/
synchronized final void flushFailed(SegmentInfo info) throws IOException {
deleter.refresh(info.name);
Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy)
@@ -33,23 +33,22 @@
// javadocs
/**
- * IndexReader implementation over a single segment.
+ * IndexReader implementation over a single segment.
*
- * Instances pointing to the same segment (but with different deletes, etc) may
- * share the same core data.
- *
+ * Instances pointing to the same segment (but with different deletes, etc)
+ * may share the same core data.
* @lucene.experimental
*/
public final class SegmentReader extends AtomicReader {
-
+
private final SegmentInfoPerCommit si;
private final Bits liveDocs;
-
+
// Normally set to si.docCount - si.delDocCount, unless we
// were created as an NRT reader from IW, in which case IW
// tells us the docCount:
private final int numDocs;
-
+
final SegmentCoreReaders core;
final SegmentCoreReaders[] updates;
@@ -62,15 +61,11 @@
/**
* Constructs a new SegmentReader with a new core.
- *
- * @throws CorruptIndexException
- * if the index is corrupt
- * @throws IOException
- * if there is a low-level IO error
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
// TODO: why is this public?
- public SegmentReader(SegmentInfoPerCommit si, int termInfosIndexDivisor,
- IOContext context) throws IOException {
+ public SegmentReader(SegmentInfoPerCommit si, int termInfosIndexDivisor, IOContext context) throws IOException {
this.si = si;
this.context = context;
core = new SegmentCoreReaders(this, si.info, -1, context, termInfosIndexDivisor);
@@ -79,8 +74,7 @@
try {
if (si.hasDeletions()) {
// NOTE: the bitvector is stored using the regular directory, not cfs
- liveDocs = si.info.getCodec().liveDocsFormat()
- .readLiveDocs(directory(), si, new IOContext(IOContext.READ, true));
+ liveDocs = si.info.getCodec().liveDocsFormat().readLiveDocs(directory(), si, new IOContext(IOContext.READ, true));
} else {
assert si.getDelCount() == 0;
liveDocs = null;
@@ -89,7 +83,7 @@
success = true;
} finally {
// With lock-less commits, it's entirely possible (and
- // fine) to hit a FileNotFound exception above. In
+ // fine) to hit a FileNotFound exception above. In
// this case, we want to explicitly close any subset
// of things that were opened so that we don't have to
// wait for a GC to do so.
@@ -131,7 +125,7 @@
assert liveDocs != null;
this.liveDocs = liveDocs;
-
+
this.numDocs = numDocs;
}
@@ -154,10 +148,10 @@
ensureOpen();
return liveDocs;
}
-
+
@Override
protected void doClose() throws IOException {
- // System.out.println("SR.close seg=" + si);
+ //System.out.println("SR.close seg=" + si);
core.decRef();
if (updates != null) {
for (int i = 0; i < updates.length; i++) {
@@ -165,7 +159,7 @@
}
}
}
-
+
@Override
public FieldInfos getFieldInfos() {
ensureOpen();
@@ -245,12 +239,11 @@
}
@Override
- public void document(int docID, StoredFieldVisitor visitor)
- throws IOException {
+ public void document(int docID, StoredFieldVisitor visitor) throws IOException {
checkBounds(docID);
getFieldsReader().visitDocument(docID, visitor, null);
}
-
+
@Override
public Fields fields() throws IOException {
ensureOpen();
@@ -275,13 +268,13 @@
}
return fields;
}
-
+
@Override
public int numDocs() {
// Don't call ensureOpen() here (it could affect performance)
return numDocs;
}
-
+
@Override
public int maxDoc() {
// Don't call ensureOpen() here (it could affect performance)
@@ -365,13 +358,12 @@
return new StackedFields(fields, replacementsMap, docID);
}
-
+
@Override
public String toString() {
// SegmentInfo.toString takes dir and number of
// *pending* deletions; so we reverse compute that here:
- return si.toString(si.info.dir,
- si.info.getDocCount() - numDocs - si.getDelCount());
+ return si.toString(si.info.dir, si.info.getDocCount() - numDocs - si.getDelCount());
}
/**
@@ -387,7 +379,7 @@
public SegmentInfoPerCommit getSegmentInfo() {
return si;
}
-
+
/** Returns the directory this index resides in. */
public Directory directory() {
// Don't ensureOpen here -- in certain cases, when a
@@ -395,30 +387,29 @@
// this method on the closed original reader
return si.info.dir;
}
-
+
// This is necessary so that cloned SegmentReaders (which
// share the underlying postings data) will map to the
- // same entry in the FieldCache. See LUCENE-1579.
+ // same entry in the FieldCache. See LUCENE-1579.
@Override
public Object getCoreCacheKey() {
return core;
}
-
+
@Override
public Object getCombinedCoreAndDeletesKey() {
return this;
}
-
- /**
- * Returns term infos index divisor originally passed to
- * {@link #SegmentReader(SegmentInfoPerCommit, int, IOContext)}.
- */
+
+ /** Returns term infos index divisor originally passed to
+ * {@link #SegmentReader(SegmentInfoPerCommit, int, IOContext)}. */
public int getTermInfosIndexDivisor() {
return core.termsIndexDivisor;
}
@Override
public NumericDocValues getNumericDocValues(String field) throws IOException {
+ ensureOpen();
return core.getNumericDocValues(field);
}
@@ -427,7 +418,7 @@
ensureOpen();
return core.getBinaryDocValues(field);
}
-
+
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
ensureOpen();
@@ -456,21 +447,21 @@
}
/**
- * Called when the shared core for this SegmentReader is closed.
+ * Called when the shared core for this SegmentReader
+ * is closed.
*
- * This listener is called only once all SegmentReaders sharing the same core
- * are closed. At this point it is safe for apps to evict this reader from any
- * caches keyed on {@link #getCoreCacheKey}. This is the same interface that
- * {@link FieldCache} uses, internally, to evict entries.
- *
+ * This listener is called only once all SegmentReaders
+ * sharing the same core are closed. At this point it
+ * is safe for apps to evict this reader from any caches
+ * keyed on {@link #getCoreCacheKey}. This is the same
+ * interface that {@link FieldCache} uses, internally,
+ * to evict entries.
*
* @lucene.experimental
*/
public static interface CoreClosedListener {
- /**
- * Invoked when the shared core of the provided {@link SegmentReader} has
- * closed.
- */
+ /** Invoked when the shared core of the provided {@link
+ * SegmentReader} has closed. */
public void onClose(SegmentReader owner);
}
Index: lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java (working copy)
@@ -119,4 +119,17 @@
segUpdates = state.segUpdates;
delCountOnFlush = state.delCountOnFlush;
}
+
+ public boolean hasDeletesWithoutUpdates() {
+ if (segDeletes == null) {
+ return false;
+ }
+ if (segUpdates == null) {
+ return true;
+ }
+ if (segUpdates.any()) {
+ return false;
+ }
+ return true;
+ }
}
Index: lucene/core/src/java/org/apache/lucene/index/SortedFieldsUpdates.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/SortedFieldsUpdates.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/SortedFieldsUpdates.java (working copy)
@@ -1,25 +0,0 @@
-package org.apache.lucene.index;
-
-import java.util.SortedSet;
-import java.util.TreeMap;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class SortedFieldsUpdates extends TreeMap> {
-
-}
Index: lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java (working copy)
@@ -22,9 +22,9 @@
import org.apache.lucene.store.Directory;
abstract class StoredFieldsConsumer {
- abstract void addField(int docID, StorableField field, FieldInfo fieldInfo) throws IOException;
- abstract void flush(SegmentWriteState state) throws IOException;
- abstract void abort() throws IOException;
- abstract void startDocument() throws IOException;
- abstract void finishDocument(Directory directory, SegmentInfo info) throws IOException;
+ abstract void addField(int docID, StorableField field, FieldInfo fieldInfo) throws IOException;
+ abstract void flush(SegmentWriteState state) throws IOException;
+ abstract void abort() throws IOException;
+ abstract void startDocument() throws IOException;
+ abstract void finishDocument(Directory directory, SegmentInfo info) throws IOException;
}
Index: lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java (working copy)
@@ -19,6 +19,8 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.InfoStream;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -45,12 +47,13 @@
static final FieldInfos EMPTY_FIELD_INFOS = new FieldInfos(new FieldInfo[0]);
/** Updates mapped by doc ID, for each do sorted list of updates. */
- private TreeMap>> docIdToUpdatesMap;
- private HashMap> updatesToDocIdMap;
- private LinkedHashMap allApplied;
+ private final TreeMap>> docIdToUpdatesMap;
+ private final HashMap> updatesToDocIdMap;
+ private final LinkedHashMap allApplied;
+ private final boolean exactSegment;
+ private final InfoStream infoStream;
private long generation;
- private boolean exactSegment;
private Map fieldGenerationReplacments;
@@ -62,15 +65,18 @@
private Analyzer analyzer;
UpdatedSegmentData(SegmentReader reader,
- SortedSet packetUpdates, boolean exactSegment)
- throws IOException {
+ SortedSet packetUpdates, boolean exactSegment,
+ InfoStream infoStream) throws IOException {
docIdToUpdatesMap = new TreeMap<>();
updatesToDocIdMap = new HashMap<>();
+ allApplied = new LinkedHashMap<>();
this.exactSegment = exactSegment;
+ this.infoStream = infoStream;
- allApplied = new LinkedHashMap<>();
-
for (FieldsUpdate update : packetUpdates) {
+ if (infoStream.isEnabled("USD")) {
+ infoStream.message("USD", "update: " + update);
+ }
// add updates according to the base reader
DocsEnum docsEnum = reader.termDocsEnum(update.term);
if (docsEnum != null) {
@@ -101,34 +107,51 @@
allApplied.put(update, new UpdateAtomicReader(update.directory,
update.segmentInfo, IOContext.DEFAULT));
}
-
+ if (infoStream.isEnabled("USD")) {
+ infoStream.message("USD", "done init");
+ }
}
private void addUpdate(int docId, FieldsUpdate fieldsUpdate) {
if (exactSegment && docId > fieldsUpdate.docIdUpto) {
return;
}
- TreeMap> prevUpdates = docIdToUpdatesMap.get(docId);
- if (prevUpdates == null) {
- prevUpdates = new TreeMap<>();
- docIdToUpdatesMap.put(docId, prevUpdates);
- } else if (fieldsUpdate.operation == Operation.REPLACE_FIELDS) {
- // set ignored fields in previous updates
- for (Entry> addIgnore : prevUpdates.entrySet()) {
- if (addIgnore.getValue() == null) {
- prevUpdates.put(addIgnore.getKey(), new HashSet<>(fieldsUpdate.replacedFields));
- } else {
- addIgnore.getValue().addAll(fieldsUpdate.replacedFields);
+ synchronized (docIdToUpdatesMap) {
+ TreeMap> prevUpdates = docIdToUpdatesMap
+ .get(docId);
+ if (prevUpdates == null) {
+ prevUpdates = new TreeMap<>();
+ docIdToUpdatesMap.put(docId, prevUpdates);
+ if (infoStream.isEnabled("USD")) {
+ infoStream.message("USD", "adding to doc " + docId);
}
+ } else if (fieldsUpdate.operation == Operation.REPLACE_FIELDS) {
+ // set ignored fields in previous updates
+ for (Entry> prev : prevUpdates.entrySet()) {
+ if (prev.getValue() == null) {
+ prevUpdates.put(prev.getKey(), new HashSet<>(
+ fieldsUpdate.replacedFields));
+ if (infoStream.isEnabled("USD")) {
+ infoStream.message("USD", "new ignored fields "
+ + fieldsUpdate.replacedFields);
+ }
+ } else {
+ prev.getValue().addAll(fieldsUpdate.replacedFields);
+ if (infoStream.isEnabled("USD")) {
+ infoStream.message("USD", "adding ignored fields "
+ + fieldsUpdate.replacedFields);
+ }
+ }
+ }
}
+ prevUpdates.put(fieldsUpdate, null);
+ List prevDocIds = updatesToDocIdMap.get(fieldsUpdate);
+ if (prevDocIds == null) {
+ prevDocIds = new ArrayList();
+ updatesToDocIdMap.put(fieldsUpdate, prevDocIds);
+ }
+ prevDocIds.add(docId);
}
- prevUpdates.put(fieldsUpdate, null);
- List prevDocIds = updatesToDocIdMap.get(fieldsUpdate);
- if (prevDocIds == null) {
- prevDocIds = new ArrayList();
- updatesToDocIdMap.put(fieldsUpdate, prevDocIds);
- }
- prevDocIds.add(docId);
}
boolean hasUpdates() {
@@ -158,7 +181,8 @@
*/
private void nextDocUpdate() {
if (updatesIterator.hasNext()) {
- Entry>> docUpdates = updatesIterator.next();
+ Entry>> docUpdates = updatesIterator
+ .next();
nextDocID = docUpdates.getKey();
nextUpdate = docUpdates.getValue();
} else {
@@ -177,42 +201,50 @@
AtomicReader nextReader() throws IOException {
AtomicReader toReturn = null;
- if (currDocID < nextDocID) {
- // empty documents reader required
- toReturn = new UpdateAtomicReader(nextDocID - currDocID);
- currDocID = nextDocID;
- } else if (currDocID < numDocs) {
- // get the an actual updates reader...
- FieldsUpdate update = nextUpdate.firstEntry().getKey();
- Set ignore = nextUpdate.remove(update);
- toReturn = allApplied.get(update);
-
- // ... and if done for this document remove from updates map
- if (nextUpdate.isEmpty()) {
- updatesIterator.remove();
- }
-
- // add generation replacements if exist
- if (update.replacedFields != null) {
- if (fieldGenerationReplacments == null) {
- fieldGenerationReplacments = new HashMap();
+ boolean success = false;
+ try {
+ if (currDocID < nextDocID) {
+ // empty documents reader required
+ toReturn = new UpdateAtomicReader(nextDocID - currDocID);
+ currDocID = nextDocID;
+ } else if (currDocID < numDocs) {
+ // get the an actual updates reader...
+ FieldsUpdate update = nextUpdate.firstEntry().getKey();
+ nextUpdate.remove(update);
+ toReturn = allApplied.get(update);
+
+ // ... and if done for this document remove from updates map
+ if (nextUpdate.isEmpty()) {
+ updatesIterator.remove();
}
- for (String fieldName : update.replacedFields) {
- FieldGenerationReplacements fieldReplacement = fieldGenerationReplacments
- .get(fieldName);
- if (fieldReplacement == null) {
- fieldReplacement = new FieldGenerationReplacements();
- fieldGenerationReplacments.put(fieldName, fieldReplacement);
+
+ // add generation replacements if exist
+ if (update.replacedFields != null) {
+ if (fieldGenerationReplacments == null) {
+ fieldGenerationReplacments = new HashMap();
}
- fieldReplacement.set(currDocID, generation);
+ for (String fieldName : update.replacedFields) {
+ FieldGenerationReplacements fieldReplacement = fieldGenerationReplacments
+ .get(fieldName);
+ if (fieldReplacement == null) {
+ fieldReplacement = new FieldGenerationReplacements();
+ fieldGenerationReplacments.put(fieldName, fieldReplacement);
+ }
+ fieldReplacement.set(currDocID, generation);
+ }
}
+ // move to next doc id
+ nextDocUpdate();
+ currDocID++;
}
- // move to next doc id
- nextDocUpdate();
- currDocID++;
+ success = true;
+ return toReturn;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(toReturn);
+ }
}
- return toReturn;
}
boolean isEmpty() {
@@ -238,7 +270,7 @@
*/
UpdateAtomicReader(Directory fieldsDir, SegmentInfo segmentInfo,
IOContext context) throws IOException {
- core = new SegmentCoreReaders(null, segmentInfo, -1, context, -1);
+ core = new SegmentCoreReaders(null, segmentInfo, -1, context, 1);
numDocs = 1;
}
@@ -254,13 +286,13 @@
if (core == null) {
return false;
}
- DocsEnum termDocsEnum = termDocsEnum(term);
- if (termDocsEnum == null) {
+ Terms terms = terms(term.field);
+ if (terms == null) {
return false;
}
- return termDocsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
+ return terms.iterator(null).seekExact(term.bytes(), false);
}
-
+
@Override
public Fields fields() throws IOException {
if (core == null) {
Index: lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java (working copy)
@@ -25,7 +25,7 @@
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.NearSpansOrdered;
import org.apache.lucene.search.spans.NearSpansUnordered;
import org.apache.lucene.search.spans.SpanNearQuery;
@@ -53,7 +53,7 @@
*
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
*
- * @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
+ * @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
*/
public class PayloadNearQuery extends SpanNearQuery {
protected String fieldName;
@@ -151,7 +151,7 @@
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
- similarity, similarity.sloppySimScorer(stats, context));
+ similarity, similarity.simScorer(stats, context));
}
@Override
@@ -161,7 +161,7 @@
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
- SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
+ SimScorer docScorer = similarity.simScorer(stats, context);
Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
@@ -190,7 +190,7 @@
private int payloadsSeen;
protected PayloadNearSpanScorer(Spans spans, Weight weight,
- Similarity similarity, Similarity.SloppySimScorer docScorer) throws IOException {
+ Similarity similarity, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
this.spans = spans;
}
Index: lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java (working copy)
@@ -27,7 +27,7 @@
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.search.spans.SpanTermQuery;
@@ -49,7 +49,7 @@
* which returns 1 by default.
*
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
- * @see org.apache.lucene.search.similarities.Similarity.SloppySimScorer#computePayloadFactor(int, int, int, BytesRef)
+ * @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
**/
public class PayloadTermQuery extends SpanTermQuery {
protected PayloadFunction function;
@@ -82,7 +82,7 @@
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
- this, similarity.sloppySimScorer(stats, context));
+ this, similarity.simScorer(stats, context));
}
protected class PayloadTermSpanScorer extends SpanScorer {
@@ -91,7 +91,7 @@
protected int payloadsSeen;
private final TermSpans termSpans;
- public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
+ public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
termSpans = spans;
}
@@ -182,7 +182,7 @@
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.sloppyFreq();
- SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
+ SimScorer docScorer = similarity.simScorer(stats, context);
Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
Index: lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java (working copy)
@@ -212,80 +212,18 @@
}
@Override
- public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
+ public final SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
BM25Stats bm25stats = (BM25Stats) stats;
- final NumericDocValues norms = context.reader().getNormValues(bm25stats.field);
- return norms == null
- ? new ExactBM25DocScorerNoNorms(bm25stats)
- : new ExactBM25DocScorer(bm25stats, norms);
+ return new BM25DocScorer(bm25stats, context.reader().getNormValues(bm25stats.field));
}
-
- @Override
- public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- BM25Stats bm25stats = (BM25Stats) stats;
- return new SloppyBM25DocScorer(bm25stats, context.reader().getNormValues(bm25stats.field));
- }
- private class ExactBM25DocScorer extends ExactSimScorer {
+ private class BM25DocScorer extends SimScorer {
private final BM25Stats stats;
- private final float weightValue;
- private final NumericDocValues norms;
- private final float[] cache;
-
- ExactBM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
- assert norms != null;
- this.stats = stats;
- this.weightValue = stats.weight * (k1 + 1); // boost * idf * (k1 + 1)
- this.cache = stats.cache;
- this.norms = norms;
- }
-
- @Override
- public float score(int doc, int freq) {
- return weightValue * freq / (freq + cache[(byte)norms.get(doc) & 0xFF]);
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- return explainScore(doc, freq, stats, norms);
- }
- }
-
- /** there are no norms, we act as if b=0 */
- private class ExactBM25DocScorerNoNorms extends ExactSimScorer {
- private final BM25Stats stats;
- private final float weightValue;
- private static final int SCORE_CACHE_SIZE = 32;
- private float[] scoreCache = new float[SCORE_CACHE_SIZE];
-
- ExactBM25DocScorerNoNorms(BM25Stats stats) {
- this.stats = stats;
- this.weightValue = stats.weight * (k1 + 1); // boost * idf * (k1 + 1)
- for (int i = 0; i < SCORE_CACHE_SIZE; i++)
- scoreCache[i] = weightValue * i / (i + k1);
- }
-
- @Override
- public float score(int doc, int freq) {
- // TODO: maybe score cache is more trouble than its worth?
- return freq < SCORE_CACHE_SIZE // check cache
- ? scoreCache[freq] // cache hit
- : weightValue * freq / (freq + k1); // cache miss
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- return explainScore(doc, freq, stats, null);
- }
- }
-
- private class SloppyBM25DocScorer extends SloppySimScorer {
- private final BM25Stats stats;
private final float weightValue; // boost * idf * (k1 + 1)
private final NumericDocValues norms;
private final float[] cache;
- SloppyBM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
+ BM25DocScorer(BM25Stats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.weightValue = stats.weight * (k1 + 1);
this.cache = stats.cache;
Index: lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java (working copy)
@@ -57,60 +57,25 @@
}
@Override
- public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- ExactSimScorer subScorers[] = new ExactSimScorer[sims.length];
+ public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
+ SimScorer subScorers[] = new SimScorer[sims.length];
for (int i = 0; i < subScorers.length; i++) {
- subScorers[i] = sims[i].exactSimScorer(((MultiStats)stats).subStats[i], context);
+ subScorers[i] = sims[i].simScorer(((MultiStats)stats).subStats[i], context);
}
- return new MultiExactDocScorer(subScorers);
+ return new MultiSimScorer(subScorers);
}
-
- @Override
- public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- SloppySimScorer subScorers[] = new SloppySimScorer[sims.length];
- for (int i = 0; i < subScorers.length; i++) {
- subScorers[i] = sims[i].sloppySimScorer(((MultiStats)stats).subStats[i], context);
- }
- return new MultiSloppyDocScorer(subScorers);
- }
- static class MultiExactDocScorer extends ExactSimScorer {
- private final ExactSimScorer subScorers[];
+ static class MultiSimScorer extends SimScorer {
+ private final SimScorer subScorers[];
- MultiExactDocScorer(ExactSimScorer subScorers[]) {
+ MultiSimScorer(SimScorer subScorers[]) {
this.subScorers = subScorers;
}
@Override
- public float score(int doc, int freq) {
- float sum = 0.0f;
- for (ExactSimScorer subScorer : subScorers) {
- sum += subScorer.score(doc, freq);
- }
- return sum;
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- Explanation expl = new Explanation(score(doc, (int)freq.getValue()), "sum of:");
- for (ExactSimScorer subScorer : subScorers) {
- expl.addDetail(subScorer.explain(doc, freq));
- }
- return expl;
- }
- }
-
- static class MultiSloppyDocScorer extends SloppySimScorer {
- private final SloppySimScorer subScorers[];
-
- MultiSloppyDocScorer(SloppySimScorer subScorers[]) {
- this.subScorers = subScorers;
- }
-
- @Override
public float score(int doc, float freq) {
float sum = 0.0f;
- for (SloppySimScorer subScorer : subScorers) {
+ for (SimScorer subScorer : subScorers) {
sum += subScorer.score(doc, freq);
}
return sum;
@@ -119,7 +84,7 @@
@Override
public Explanation explain(int doc, Explanation freq) {
Explanation expl = new Explanation(score(doc, freq.getValue()), "sum of:");
- for (SloppySimScorer subScorer : subScorers) {
+ for (SimScorer subScorer : subScorers) {
expl.addDetail(subScorer.explain(doc, freq));
}
return expl;
Index: lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java (working copy)
@@ -54,16 +54,10 @@
}
@Override
- public final ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
+ public final SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
PerFieldSimWeight perFieldWeight = (PerFieldSimWeight) weight;
- return perFieldWeight.delegate.exactSimScorer(perFieldWeight.delegateWeight, context);
+ return perFieldWeight.delegate.simScorer(perFieldWeight.delegateWeight, context);
}
-
- @Override
- public final SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
- PerFieldSimWeight perFieldWeight = (PerFieldSimWeight) weight;
- return perFieldWeight.delegate.sloppySimScorer(perFieldWeight.delegateWeight, context);
- }
/**
* Returns a {@link Similarity} for scoring a field.
Index: lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java (working copy)
@@ -88,10 +88,8 @@
* is called for each query leaf node, {@link Similarity#queryNorm(float)} is called for the top-level
* query, and finally {@link Similarity.SimWeight#normalize(float, float)} passes down the normalization value
* and any top-level boosts (e.g. from enclosing {@link BooleanQuery}s).
- *
For each segment in the index, the Query creates a {@link #exactSimScorer(SimWeight, AtomicReaderContext)}
- * (for queries with exact frequencies such as TermQuerys and exact PhraseQueries) or a
- * {@link #sloppySimScorer(SimWeight, AtomicReaderContext)} (for queries with sloppy frequencies such as
- * SpanQuerys and sloppy PhraseQueries). The score() method is called for each matching document.
+ *
For each segment in the index, the Query creates a {@link #simScorer(SimWeight, AtomicReaderContext)}
+ * The score() method is called for each matching document.
*
*
*
@@ -166,76 +164,31 @@
* @return SimWeight object with the information this Similarity needs to score a query.
*/
public abstract SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats);
-
+
/**
- * Creates a new {@link Similarity.ExactSimScorer} to score matching documents from a segment of the inverted index.
+ * Creates a new {@link Similarity.SimScorer} to score matching documents from a segment of the inverted index.
* @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
* @param context segment of the inverted index to be scored.
- * @return ExactSimScorer for scoring documents across context
- * @throws IOException if there is a low-level I/O error
- */
- public abstract ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
-
- /**
- * Creates a new {@link Similarity.SloppySimScorer} to score matching documents from a segment of the inverted index.
- * @param weight collection information from {@link #computeWeight(float, CollectionStatistics, TermStatistics...)}
- * @param context segment of the inverted index to be scored.
* @return SloppySimScorer for scoring documents across context
* @throws IOException if there is a low-level I/O error
*/
- public abstract SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
+ public abstract SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException;
/**
- * API for scoring exact queries such as {@link TermQuery} and
- * exact {@link PhraseQuery}.
+ * API for scoring "sloppy" queries such as {@link TermQuery},
+ * {@link SpanQuery}, and {@link PhraseQuery}.
*
- * Frequencies are integers (the term or phrase frequency within the document)
- */
- public static abstract class ExactSimScorer {
-
- /**
- * Sole constructor. (For invocation by subclass
- * constructors, typically implicit.)
- */
- public ExactSimScorer() {}
-
- /**
- * Score a single document
- * @param doc document id
- * @param freq term frequency
- * @return document's score
- */
- public abstract float score(int doc, int freq);
-
- /**
- * Explain the score for a single document
- * @param doc document id
- * @param freq Explanation of how the term frequency was computed
- * @return document's score
- */
- public Explanation explain(int doc, Explanation freq) {
- Explanation result = new Explanation(score(doc, (int)freq.getValue()),
- "score(doc=" + doc + ",freq=" + freq.getValue() +"), with freq of:");
- result.addDetail(freq);
- return result;
- }
- }
-
- /**
- * API for scoring "sloppy" queries such as {@link SpanQuery} and
- * sloppy {@link PhraseQuery}.
- *
* Frequencies are floating-point values: an approximate
* within-document frequency adjusted for "sloppiness" by
- * {@link SloppySimScorer#computeSlopFactor(int)}.
+ * {@link SimScorer#computeSlopFactor(int)}.
*/
- public static abstract class SloppySimScorer {
+ public static abstract class SimScorer {
/**
* Sole constructor. (For invocation by subclass
* constructors, typically implicit.)
*/
- public SloppySimScorer() {}
+ public SimScorer() {}
/**
* Score a single document
Index: lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java (working copy)
@@ -190,41 +190,23 @@
}
@Override
- public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
+ public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
if (stats instanceof MultiSimilarity.MultiStats) {
// a multi term query (e.g. phrase). return the summation,
// scoring almost as if it were boolean query
SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
- ExactSimScorer subScorers[] = new ExactSimScorer[subStats.length];
+ SimScorer subScorers[] = new SimScorer[subStats.length];
for (int i = 0; i < subScorers.length; i++) {
BasicStats basicstats = (BasicStats) subStats[i];
- subScorers[i] = new BasicExactDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
+ subScorers[i] = new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
}
- return new MultiSimilarity.MultiExactDocScorer(subScorers);
+ return new MultiSimilarity.MultiSimScorer(subScorers);
} else {
BasicStats basicstats = (BasicStats) stats;
- return new BasicExactDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
+ return new BasicSimScorer(basicstats, context.reader().getNormValues(basicstats.field));
}
}
- @Override
- public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- if (stats instanceof MultiSimilarity.MultiStats) {
- // a multi term query (e.g. phrase). return the summation,
- // scoring almost as if it were boolean query
- SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
- SloppySimScorer subScorers[] = new SloppySimScorer[subStats.length];
- for (int i = 0; i < subScorers.length; i++) {
- BasicStats basicstats = (BasicStats) subStats[i];
- subScorers[i] = new BasicSloppyDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
- }
- return new MultiSimilarity.MultiSloppyDocScorer(subScorers);
- } else {
- BasicStats basicstats = (BasicStats) stats;
- return new BasicSloppyDocScorer(basicstats, context.reader().getNormValues(basicstats.field));
- }
- }
-
/**
* Subclasses must override this method to return the name of the Similarity
* and preferably the values of parameters (if any) as well.
@@ -277,46 +259,17 @@
// --------------------------------- Classes ---------------------------------
- /** Delegates the {@link #score(int, int)} and
- * {@link #explain(int, Explanation)} methods to
- * {@link SimilarityBase#score(BasicStats, float, float)} and
- * {@link SimilarityBase#explain(BasicStats, int, Explanation, float)},
- * respectively.
- */
- private class BasicExactDocScorer extends ExactSimScorer {
- private final BasicStats stats;
- private final NumericDocValues norms;
-
- BasicExactDocScorer(BasicStats stats, NumericDocValues norms) throws IOException {
- this.stats = stats;
- this.norms = norms;
- }
-
- @Override
- public float score(int doc, int freq) {
- // We have to supply something in case norms are omitted
- return SimilarityBase.this.score(stats, freq,
- norms == null ? 1F : decodeNormValue((byte)norms.get(doc)));
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- return SimilarityBase.this.explain(stats, doc, freq,
- norms == null ? 1F : decodeNormValue((byte)norms.get(doc)));
- }
- }
-
/** Delegates the {@link #score(int, float)} and
* {@link #explain(int, Explanation)} methods to
* {@link SimilarityBase#score(BasicStats, float, float)} and
* {@link SimilarityBase#explain(BasicStats, int, Explanation, float)},
* respectively.
*/
- private class BasicSloppyDocScorer extends SloppySimScorer {
+ private class BasicSimScorer extends SimScorer {
private final BasicStats stats;
private final NumericDocValues norms;
- BasicSloppyDocScorer(BasicStats stats, NumericDocValues norms) throws IOException {
+ BasicSimScorer(BasicStats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.norms = norms;
}
Index: lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java (working copy)
@@ -572,28 +572,9 @@
* when freq is large, and smaller values when freq
* is small.
*
- *
The default implementation calls {@link #tf(float)}.
- *
* @param freq the frequency of a term within a document
* @return a score factor based on a term's within-document frequency
*/
- public float tf(int freq) {
- return tf((float)freq);
- }
-
- /** Computes a score factor based on a term or phrase's frequency in a
- * document. This value is multiplied by the {@link #idf(long, long)}
- * factor for each term in the query and these products are then summed to
- * form the initial score for a document.
- *
- *
Terms and phrases repeated in a document indicate the topic of the
- * document, so implementations of this method usually return larger values
- * when freq is large, and smaller values when freq
- * is small.
- *
- * @param freq the frequency of a term within a document
- * @return a score factor based on a term's within-document frequency
- */
public abstract float tf(float freq);
/**
@@ -655,7 +636,7 @@
/** Computes a score factor based on a term's document frequency (the number
* of documents which contain the term). This value is multiplied by the
- * {@link #tf(int)} factor for each term in the query and these products are
+ * {@link #tf(float)} factor for each term in the query and these products are
* then summed to form the initial score for a document.
*
*
Terms that occur in fewer documents are better indicators of topic, so
@@ -755,51 +736,19 @@
}
@Override
- public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
+ public final SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
IDFStats idfstats = (IDFStats) stats;
- return new ExactTFIDFDocScorer(idfstats, context.reader().getNormValues(idfstats.field));
+ return new TFIDFSimScorer(idfstats, context.reader().getNormValues(idfstats.field));
}
-
- @Override
- public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- IDFStats idfstats = (IDFStats) stats;
- return new SloppyTFIDFDocScorer(idfstats, context.reader().getNormValues(idfstats.field));
- }
- // TODO: we can specialize these for omitNorms up front, but we should test that it doesn't confuse stupid hotspot.
-
- private final class ExactTFIDFDocScorer extends ExactSimScorer {
+ private final class TFIDFSimScorer extends SimScorer {
private final IDFStats stats;
private final float weightValue;
private final NumericDocValues norms;
- ExactTFIDFDocScorer(IDFStats stats, NumericDocValues norms) throws IOException {
+ TFIDFSimScorer(IDFStats stats, NumericDocValues norms) throws IOException {
this.stats = stats;
this.weightValue = stats.value;
- this.norms = norms;
- }
-
- @Override
- public float score(int doc, int freq) {
- final float raw = tf(freq)*weightValue; // compute tf(f)*weight
-
- return norms == null ? raw : raw * decodeNormValue((byte)norms.get(doc)); // normalize for field
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- return explainScore(doc, freq, stats, norms);
- }
- }
-
- private final class SloppyTFIDFDocScorer extends SloppySimScorer {
- private final IDFStats stats;
- private final float weightValue;
- private final NumericDocValues norms;
-
- SloppyTFIDFDocScorer(IDFStats stats, NumericDocValues norms) throws IOException {
- this.stats = stats;
- this.weightValue = stats.value;
this.norms = norms;
}
Index: lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java (working copy)
@@ -34,9 +34,9 @@
protected int doc;
protected float freq;
protected int numMatches;
- protected final Similarity.SloppySimScorer docScorer;
+ protected final Similarity.SimScorer docScorer;
- protected SpanScorer(Spans spans, Weight weight, Similarity.SloppySimScorer docScorer)
+ protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
throws IOException {
super(weight);
this.docScorer = docScorer;
Index: lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java (working copy)
@@ -23,7 +23,7 @@
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Bits;
import java.io.IOException;
@@ -86,7 +86,7 @@
if (stats == null) {
return null;
} else {
- return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
+ return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context));
}
}
@@ -97,7 +97,7 @@
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.sloppyFreq();
- SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
+ SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
Index: lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java (working copy)
@@ -56,10 +56,10 @@
private int docID = -1;
private int freq;
- private final Similarity.ExactSimScorer docScorer;
+ private final Similarity.SimScorer docScorer;
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- Similarity.ExactSimScorer docScorer) throws IOException {
+ Similarity.SimScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;
Index: lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (working copy)
@@ -45,6 +45,7 @@
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.GrowableWriter;
+import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
/**
@@ -1069,11 +1070,11 @@
public static class SortedDocValuesImpl extends SortedDocValues {
private final PagedBytes.Reader bytes;
- private final PackedInts.Reader termOrdToBytesOffset;
+ private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
private final PackedInts.Reader docToTermOrd;
private final int numOrd;
- public SortedDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
+ public SortedDocValuesImpl(PagedBytes.Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
this.bytes = bytes;
this.docToTermOrd = docToTermOrd;
this.termOrdToBytesOffset = termOrdToBytesOffset;
@@ -1144,7 +1145,6 @@
final PagedBytes bytes = new PagedBytes(15);
- int startBytesBPV;
int startTermsBPV;
int startNumUniqueTerms;
@@ -1169,22 +1169,19 @@
numUniqueTerms = termCountHardLimit;
}
- startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
startNumUniqueTerms = (int) numUniqueTerms;
} else {
- startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
}
} else {
- startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
}
- GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, acceptableOverheadRatio);
+ MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);
int termOrd = 0;
@@ -1204,13 +1201,7 @@
break;
}
- if (termOrd == termOrdToBytesOffset.size()) {
- // NOTE: this code only runs if the incoming
- // reader impl doesn't implement
- // size (which should be uncommon)
- termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
- }
- termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
+ termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
while (true) {
final int docID = docs.nextDoc();
@@ -1222,14 +1213,10 @@
}
termOrd++;
}
-
- if (termOrdToBytesOffset.size() > termOrd) {
- termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd);
- }
}
// maybe an int-only impl?
- return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd);
+ return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset, docToTermOrd.getMutable(), termOrd);
}
}
Index: lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java (working copy)
@@ -31,7 +31,7 @@
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@@ -245,14 +245,14 @@
}
if (slop == 0) {
- ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
+ ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
if (s.noDocs) {
return null;
} else {
return s;
}
} else {
- return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
+ return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
}
}
@@ -263,7 +263,7 @@
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq();
- SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
+ SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
Index: lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java (working copy)
@@ -33,7 +33,7 @@
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@@ -282,7 +282,7 @@
}
if (slop == 0) { // optimize exact case
- ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
+ ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
if (s.noDocs) {
return null;
} else {
@@ -290,7 +290,7 @@
}
} else {
return
- new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
+ new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
}
}
@@ -306,7 +306,7 @@
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq();
- SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
+ SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
Index: lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java (working copy)
@@ -34,7 +34,7 @@
private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
- private final Similarity.SloppySimScorer docScorer;
+ private final Similarity.SimScorer docScorer;
private final int slop;
private final int numPostings;
@@ -52,7 +52,7 @@
private final long cost;
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- int slop, Similarity.SloppySimScorer docScorer) {
+ int slop, Similarity.SimScorer docScorer) {
super(weight);
this.docScorer = docScorer;
this.slop = slop;
Index: lucene/core/src/java/org/apache/lucene/search/TermQuery.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/TermQuery.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/TermQuery.java (working copy)
@@ -29,7 +29,7 @@
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
@@ -84,7 +84,7 @@
}
DocsEnum docs = termsEnum.docs(acceptDocs, null);
assert docs != null;
- return new TermScorer(this, docs, similarity.exactSimScorer(stats, context));
+ return new TermScorer(this, docs, similarity.simScorer(stats, context));
}
/**
@@ -116,7 +116,7 @@
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
- ExactSimScorer docScorer = similarity.exactSimScorer(stats, context);
+ SimScorer docScorer = similarity.simScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));
Index: lucene/core/src/java/org/apache/lucene/search/TermScorer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/TermScorer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/TermScorer.java (working copy)
@@ -26,7 +26,7 @@
*/
final class TermScorer extends Scorer {
private final DocsEnum docsEnum;
- private final Similarity.ExactSimScorer docScorer;
+ private final Similarity.SimScorer docScorer;
/**
* Construct a TermScorer.
@@ -36,10 +36,10 @@
* @param td
* An iterator over the documents matching the Term.
* @param docScorer
- * The Similarity.ExactSimScorer implementation
+ * The Similarity.SimScorer implementation
* to be used for score computations.
*/
- TermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) {
+ TermScorer(Weight weight, DocsEnum td, Similarity.SimScorer docScorer) {
super(weight);
this.docScorer = docScorer;
this.docsEnum = td;
Index: lucene/core/src/java/org/apache/lucene/search/package.html
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/package.html (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/search/package.html (working copy)
@@ -441,9 +441,8 @@
explain(AtomicReaderContext context, int doc)} — Provide a means for explaining why a given document was
scored the way it was.
Typically a weight such as TermWeight
- that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will make use of the Similarity's implementations:
- {@link org.apache.lucene.search.similarities.Similarity.ExactSimScorer#explain(int, Explanation) ExactSimScorer#explain(int doc, Explanation freq)},
- and {@link org.apache.lucene.search.similarities.Similarity.SloppySimScorer#explain(int, Explanation) SloppySimScorer#explain(int doc, Explanation freq)}
+ that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will make use of the Similarity's implementation:
+ {@link org.apache.lucene.search.similarities.Similarity.SimScorer#explain(int, Explanation) SimScorer#explain(int doc, Explanation freq)}.
@@ -468,7 +467,7 @@
{@link org.apache.lucene.search.Scorer#score score()} — Return the score of the
current document. This value can be determined in any appropriate way for an application. For instance, the
{@link org.apache.lucene.search.TermScorer TermScorer} simply defers to the configured Similarity:
- {@link org.apache.lucene.search.similarities.Similarity.ExactSimScorer#score(int, int) ExactSimScorer.score(int doc, int freq)}.
+ {@link org.apache.lucene.search.similarities.Similarity.SimScorer#score(int, float) SimScorer.score(int doc, float freq)}.
{@link org.apache.lucene.search.Scorer#freq freq()} — Returns the number of matches
Index: lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/fst/Builder.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/fst/Builder.java (working copy)
@@ -117,9 +117,9 @@
*
* @param doShareSuffix
* If true, the shared suffixes will be compacted into unique paths.
- * This requires an additional hash map for lookups in memory. Setting this parameter to
- * false creates a single path for all input sequences. This will result in a larger
- * graph, but may require less memory and will speed up construction.
+ * This requires an additional RAM-intensive hash map for lookups in memory. Setting this parameter to
+ * false creates a single suffix path for all input sequences. This will result in a larger
+ * FST, but requires substantially less memory and CPU during building.
*
* @param doShareNonSingletonNodes
* Only used if doShareSuffix is true. Set this to
Index: lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java (working copy)
@@ -19,21 +19,21 @@
import java.io.IOException;
-import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PagedGrowableWriter;
// Used to dedup states (lookup already-frozen states)
final class NodeHash {
- private GrowableWriter table;
- private int count;
- private int mask;
+ private PagedGrowableWriter table;
+ private long count;
+ private long mask;
private final FST fst;
private final FST.Arc scratchArc = new FST.Arc();
private final FST.BytesReader in;
public NodeHash(FST fst, FST.BytesReader in) {
- table = new GrowableWriter(8, 16, PackedInts.COMPACT);
+ table = new PagedGrowableWriter(16, 1<<30, 8, PackedInts.COMPACT);
mask = 15;
this.fst = fst;
this.in = in;
@@ -69,10 +69,10 @@
// hash code for an unfrozen node. This must be identical
// to the un-frozen case (below)!!
- private int hash(Builder.UnCompiledNode node) {
+ private long hash(Builder.UnCompiledNode node) {
final int PRIME = 31;
//System.out.println("hash unfrozen");
- int h = 0;
+ long h = 0;
// TODO: maybe if number of arcs is high we can safely subsample?
for(int arcIdx=0;arcIdx arc = node.arcs[arcIdx];
@@ -87,14 +87,14 @@
}
}
//System.out.println(" ret " + (h&Integer.MAX_VALUE));
- return h & Integer.MAX_VALUE;
+ return h & Long.MAX_VALUE;
}
// hash code for a frozen node
- private int hash(long node) throws IOException {
+ private long hash(long node) throws IOException {
final int PRIME = 31;
//System.out.println("hash frozen node=" + node);
- int h = 0;
+ long h = 0;
fst.readFirstRealTargetArc(node, scratchArc, in);
while(true) {
//System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal() + " pos=" + in.getPosition());
@@ -111,13 +111,13 @@
fst.readNextRealArc(scratchArc, in);
}
//System.out.println(" ret " + (h&Integer.MAX_VALUE));
- return h & Integer.MAX_VALUE;
+ return h & Long.MAX_VALUE;
}
public long add(Builder.UnCompiledNode nodeIn) throws IOException {
- // System.out.println("hash: add count=" + count + " vs " + table.size());
- final int h = hash(nodeIn);
- int pos = h & mask;
+ //System.out.println("hash: add count=" + count + " vs " + table.size() + " mask=" + mask);
+ final long h = hash(nodeIn);
+ long pos = h & mask;
int c = 0;
while(true) {
final long v = table.get(pos);
@@ -128,7 +128,8 @@
assert hash(node) == h : "frozenHash=" + hash(node) + " vs h=" + h;
count++;
table.set(pos, node);
- if (table.size() < 2*count) {
+ // Rehash at 2/3 occupancy:
+ if (count > 2*table.size()/3) {
rehash();
}
return node;
@@ -144,7 +145,7 @@
// called only by rehash
private void addNew(long address) throws IOException {
- int pos = hash(address) & mask;
+ long pos = hash(address) & mask;
int c = 0;
while(true) {
if (table.get(pos) == 0) {
@@ -158,23 +159,15 @@
}
private void rehash() throws IOException {
- final GrowableWriter oldTable = table;
+ final PagedGrowableWriter oldTable = table;
- if (oldTable.size() >= Integer.MAX_VALUE/2) {
- throw new IllegalStateException("FST too large (> 2.1 GB)");
- }
-
- table = new GrowableWriter(oldTable.getBitsPerValue(), 2*oldTable.size(), PackedInts.COMPACT);
+ table = new PagedGrowableWriter(2*oldTable.size(), 1<<30, PackedInts.bitsRequired(count), PackedInts.COMPACT);
mask = table.size()-1;
- for(int idx=0;idx 0;
assert output2 > 0;
return Math.min(output1, output2);
- } else if (output1.equals(output2)) {
- return output1;
- } else {
- return NO_OUTPUT;
}
}
@@ -134,6 +117,6 @@
@Override
public String toString() {
- return "PositiveIntOutputs(doShare=" + doShare + ")";
+ return "PositiveIntOutputs";
}
}
Index: lucene/core/src/java/org/apache/lucene/util/fst/Util.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/fst/Util.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/fst/Util.java (working copy)
@@ -93,9 +93,7 @@
*
*
NOTE: this only works with {@code FST}, only
* works when the outputs are ascending in order with
- * the inputs and only works when you shared
- * the outputs (pass doShare=true to {@link
- * PositiveIntOutputs#getSingleton}).
+ * the inputs.
* For example, simple ordinals (0, 1,
* 2, ...), or file offets (when appending to a file)
* fit this. */
@@ -517,11 +515,7 @@
}
/** Starting from node, find the top N min cost
- * completions to a final node.
- *
- *
NOTE: you must share the outputs when you build the
- * FST (pass doShare=true to {@link
- * PositiveIntOutputs#getSingleton}). */
+ * completions to a final node. */
public static MinResult[] shortestPaths(FST fst, FST.Arc fromNode, T startOutput, Comparator comparator, int topN,
boolean allowEmptyString) throws IOException {
Index: lucene/core/src/java/org/apache/lucene/util/fst/package.html
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/fst/package.html (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/fst/package.html (working copy)
@@ -43,7 +43,7 @@
String inputValues[] = {"cat", "dog", "dogs"};
long outputValues[] = {5, 7, 12};
- PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
BytesRef scratchBytes = new BytesRef();
IntsRef scratchInts = new IntsRef();
@@ -60,8 +60,7 @@
Retrieval by value:
- // Only works because outputs are also in sorted order, and
- // we passed 'true' for sharing to PositiveIntOutputs.getSingleton
+ // Only works because outputs are also in sorted order
IntsRef key = Util.getByOutput(fst, 12);
System.out.println(Util.toBytesRef(key, scratchBytes).utf8ToString()); // dogs
@@ -77,7 +76,6 @@
N-shortest paths by weight:
- // Only works because we passed 'true' for sharing to PositiveIntOutputs.getSingleton
Comparator<Long> comparator = new Comparator<Long>() {
public int compare(Long left, Long right) {
return left.compareTo(right);
Index: lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/packed/AbstractAppendingLongBuffer.java (working copy)
@@ -17,6 +17,8 @@
* limitations under the License.
*/
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+
import java.util.Arrays;
import org.apache.lucene.util.ArrayUtil;
@@ -25,33 +27,37 @@
/** Common functionality shared by {@link AppendingLongBuffer} and {@link MonotonicAppendingLongBuffer}. */
abstract class AbstractAppendingLongBuffer {
- static final int BLOCK_BITS = 10;
- static final int MAX_PENDING_COUNT = 1 << BLOCK_BITS;
- static final int BLOCK_MASK = MAX_PENDING_COUNT - 1;
+ static final int MIN_PAGE_SIZE = 64;
+ // More than 1M doesn't really makes sense with these appending buffers
+ // since their goal is to try to have small numbers of bits per value
+ static final int MAX_PAGE_SIZE = 1 << 20;
+ final int pageShift, pageMask;
long[] minValues;
PackedInts.Reader[] deltas;
private long deltasBytes;
int valuesOff;
- long[] pending;
+ final long[] pending;
int pendingOff;
- AbstractAppendingLongBuffer(int initialBlockCount) {
- minValues = new long[16];
- deltas = new PackedInts.Reader[16];
- pending = new long[MAX_PENDING_COUNT];
+ AbstractAppendingLongBuffer(int initialBlockCount, int pageSize) {
+ minValues = new long[initialBlockCount];
+ deltas = new PackedInts.Reader[initialBlockCount];
+ pending = new long[pageSize];
+ pageShift = checkBlockSize(pageSize, MIN_PAGE_SIZE, MAX_PAGE_SIZE);
+ pageMask = pageSize - 1;
valuesOff = 0;
pendingOff = 0;
}
/** Get the number of values that have been added to the buffer. */
public final long size() {
- return valuesOff * (long) MAX_PENDING_COUNT + pendingOff;
+ return valuesOff * (long) pending.length + pendingOff;
}
/** Append a value to this buffer. */
public final void add(long l) {
- if (pendingOff == MAX_PENDING_COUNT) {
+ if (pendingOff == pending.length) {
// check size
if (deltas.length == valuesOff) {
final int newLength = ArrayUtil.oversize(valuesOff + 1, 8);
@@ -80,8 +86,8 @@
if (index < 0 || index >= size()) {
throw new IndexOutOfBoundsException("" + index);
}
- int block = (int) (index >> BLOCK_BITS);
- int element = (int) (index & BLOCK_MASK);
+ final int block = (int) (index >> pageShift);
+ final int element = (int) (index & pageMask);
return get(block, element);
}
@@ -99,7 +105,7 @@
if (valuesOff == 0) {
currentValues = pending;
} else {
- currentValues = new long[MAX_PENDING_COUNT];
+ currentValues = new long[pending.length];
fillValues();
}
}
@@ -115,7 +121,7 @@
public final long next() {
assert hasNext();
long result = currentValues[pOff++];
- if (pOff == MAX_PENDING_COUNT) {
+ if (pOff == pending.length) {
vOff += 1;
pOff = 0;
if (vOff <= valuesOff) {
@@ -139,6 +145,7 @@
public long ramBytesUsed() {
// TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
+ + 2 * RamUsageEstimator.NUM_BYTES_INT // pageShift, pageMask
+ RamUsageEstimator.NUM_BYTES_LONG // valuesBytes
+ RamUsageEstimator.sizeOf(pending)
+ RamUsageEstimator.sizeOf(minValues)
Index: lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java (working copy)
@@ -17,6 +17,8 @@
* limitations under the License.
*/
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+
import java.io.IOException;
import java.util.Arrays;
@@ -24,22 +26,11 @@
abstract class AbstractBlockPackedWriter {
+ static final int MIN_BLOCK_SIZE = 64;
static final int MAX_BLOCK_SIZE = 1 << (30 - 3);
static final int MIN_VALUE_EQUALS_0 = 1 << 0;
static final int BPV_SHIFT = 1;
- static void checkBlockSize(int blockSize) {
- if (blockSize <= 0 || blockSize > MAX_BLOCK_SIZE) {
- throw new IllegalArgumentException("blockSize must be > 0 and < " + MAX_BLOCK_SIZE + ", got " + blockSize);
- }
- if (blockSize < 64) {
- throw new IllegalArgumentException("blockSize must be >= 64, got " + blockSize);
- }
- if ((blockSize & (blockSize - 1)) != 0) {
- throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
- }
- }
-
static long zigZagEncode(long n) {
return (n >> 63) ^ (n << 1);
}
@@ -66,7 +57,7 @@
* @param blockSize the number of values of a single block, must be a multiple of 64
*/
public AbstractBlockPackedWriter(DataOutput out, int blockSize) {
- checkBlockSize(blockSize);
+ checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
reset(out);
values = new long[blockSize];
}
Index: lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/packed/AppendingLongBuffer.java (working copy)
@@ -27,9 +27,16 @@
*/
public final class AppendingLongBuffer extends AbstractAppendingLongBuffer {
- /** Sole constructor. */
+ /** @param initialPageCount the initial number of pages
+ * @param pageSize the size of a single page */
+ public AppendingLongBuffer(int initialPageCount, int pageSize) {
+ super(initialPageCount, pageSize);
+ }
+
+ /** Create an {@link AppendingLongBuffer} with initialPageCount=16 and
+ * pageSize=1024. */
public AppendingLongBuffer() {
- super(16);
+ this(16, 1024);
}
@Override
@@ -43,8 +50,9 @@
}
}
+ @Override
void packPendingValues() {
- assert pendingOff == MAX_PENDING_COUNT;
+ assert pendingOff == pending.length;
// compute max delta
long minValue = pending[0];
@@ -71,6 +79,7 @@
}
/** Return an iterator over the values of this buffer. */
+ @Override
public Iterator iterator() {
return new Iterator();
}
@@ -78,20 +87,21 @@
/** A long iterator. */
public final class Iterator extends AbstractAppendingLongBuffer.Iterator {
- private Iterator() {
+ Iterator() {
super();
}
+ @Override
void fillValues() {
if (vOff == valuesOff) {
currentValues = pending;
} else if (deltas[vOff] == null) {
Arrays.fill(currentValues, minValues[vOff]);
} else {
- for (int k = 0; k < MAX_PENDING_COUNT; ) {
- k += deltas[vOff].get(k, currentValues, k, MAX_PENDING_COUNT - k);
+ for (int k = 0; k < pending.length; ) {
+ k += deltas[vOff].get(k, currentValues, k, pending.length - k);
}
- for (int k = 0; k < MAX_PENDING_COUNT; ++k) {
+ for (int k = 0; k < pending.length; ++k) {
currentValues[k] += minValues[vOff];
}
}
Index: lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java (working copy)
@@ -17,11 +17,14 @@
* limitations under the License.
*/
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.BPV_SHIFT;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MAX_BLOCK_SIZE;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_BLOCK_SIZE;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_VALUE_EQUALS_0;
import static org.apache.lucene.util.packed.BlockPackedReaderIterator.readVLong;
import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
-import static org.apache.lucene.util.packed.BlockPackedWriter.BPV_SHIFT;
-import static org.apache.lucene.util.packed.BlockPackedWriter.MIN_VALUE_EQUALS_0;
-import static org.apache.lucene.util.packed.BlockPackedWriter.checkBlockSize;
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+import static org.apache.lucene.util.packed.PackedInts.numBlocks;
import java.io.IOException;
@@ -40,14 +43,10 @@
/** Sole constructor. */
public BlockPackedReader(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
- checkBlockSize(blockSize);
this.valueCount = valueCount;
- blockShift = Integer.numberOfTrailingZeros(blockSize);
+ blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
blockMask = blockSize - 1;
- final int numBlocks = (int) (valueCount / blockSize) + (valueCount % blockSize == 0 ? 0 : 1);
- if ((long) numBlocks * blockSize < valueCount) {
- throw new IllegalArgumentException("valueCount is too large for this block size");
- }
+ final int numBlocks = numBlocks(valueCount, blockSize);
long[] minValues = null;
subReaders = new PackedInts.Reader[numBlocks];
for (int i = 0; i < numBlocks; ++i) {
Index: lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java (working copy)
@@ -17,9 +17,13 @@
* limitations under the License.
*/
-import static org.apache.lucene.util.packed.BlockPackedWriter.BPV_SHIFT;
-import static org.apache.lucene.util.packed.BlockPackedWriter.MIN_VALUE_EQUALS_0;
-import static org.apache.lucene.util.packed.BlockPackedWriter.checkBlockSize;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.BPV_SHIFT;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MAX_BLOCK_SIZE;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_BLOCK_SIZE;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_VALUE_EQUALS_0;
+import static org.apache.lucene.util.packed.BlockPackedReaderIterator.readVLong;
+import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
import java.io.EOFException;
import java.io.IOException;
@@ -87,7 +91,7 @@
* been used to write the stream
*/
public BlockPackedReaderIterator(DataInput in, int packedIntsVersion, int blockSize, long valueCount) {
- checkBlockSize(blockSize);
+ checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
this.packedIntsVersion = packedIntsVersion;
this.blockSize = blockSize;
this.values = new long[blockSize];
Index: lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/packed/MonotonicAppendingLongBuffer.java (working copy)
@@ -37,14 +37,22 @@
return (n >> 63) ^ (n << 1);
}
- private float[] averages;
+ float[] averages;
- /** Sole constructor. */
+ /** @param initialPageCount the initial number of pages
+ * @param pageSize the size of a single page */
+ public MonotonicAppendingLongBuffer(int initialPageCount, int pageSize) {
+ super(initialPageCount, pageSize);
+ averages = new float[pending.length];
+ }
+
+ /** Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16
+ * and pageSize=1024. */
public MonotonicAppendingLongBuffer() {
- super(16);
- averages = new float[16];
+ this(16, 1024);
}
-
+
+ @Override
long get(int block, int element) {
if (block == valuesOff) {
return pending[element];
@@ -66,16 +74,16 @@
@Override
void packPendingValues() {
- assert pendingOff == MAX_PENDING_COUNT;
+ assert pendingOff == pending.length;
minValues[valuesOff] = pending[0];
- averages[valuesOff] = (float) (pending[BLOCK_MASK] - pending[0]) / BLOCK_MASK;
+ averages[valuesOff] = (float) (pending[pending.length - 1] - pending[0]) / (pending.length - 1);
- for (int i = 0; i < MAX_PENDING_COUNT; ++i) {
+ for (int i = 0; i < pending.length; ++i) {
pending[i] = zigZagEncode(pending[i] - minValues[valuesOff] - (long) (averages[valuesOff] * (long) i));
}
long maxDelta = 0;
- for (int i = 0; i < MAX_PENDING_COUNT; ++i) {
+ for (int i = 0; i < pending.length; ++i) {
if (pending[i] < 0) {
maxDelta = -1;
break;
@@ -94,6 +102,7 @@
}
/** Return an iterator over the values of this buffer. */
+ @Override
public Iterator iterator() {
return new Iterator();
}
@@ -105,18 +114,19 @@
super();
}
+ @Override
void fillValues() {
if (vOff == valuesOff) {
currentValues = pending;
} else if (deltas[vOff] == null) {
- for (int k = 0; k < MAX_PENDING_COUNT; ++k) {
+ for (int k = 0; k < pending.length; ++k) {
currentValues[k] = minValues[vOff] + (long) (averages[vOff] * (long) k);
}
} else {
- for (int k = 0; k < MAX_PENDING_COUNT; ) {
- k += deltas[vOff].get(k, currentValues, k, MAX_PENDING_COUNT - k);
+ for (int k = 0; k < pending.length; ) {
+ k += deltas[vOff].get(k, currentValues, k, pending.length - k);
}
- for (int k = 0; k < MAX_PENDING_COUNT; ++k) {
+ for (int k = 0; k < pending.length; ++k) {
currentValues[k] = minValues[vOff] + (long) (averages[vOff] * (long) k) + zigZagDecode(currentValues[k]);
}
}
Index: lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java (working copy)
@@ -17,8 +17,11 @@
* limitations under the License.
*/
-import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.checkBlockSize;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MAX_BLOCK_SIZE;
+import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_BLOCK_SIZE;
import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+import static org.apache.lucene.util.packed.PackedInts.numBlocks;
import java.io.IOException;
@@ -39,14 +42,10 @@
/** Sole constructor. */
public MonotonicBlockPackedReader(IndexInput in, int packedIntsVersion, int blockSize, long valueCount, boolean direct) throws IOException {
- checkBlockSize(blockSize);
this.valueCount = valueCount;
- blockShift = Integer.numberOfTrailingZeros(blockSize);
+ blockShift = checkBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
blockMask = blockSize - 1;
- final int numBlocks = (int) (valueCount / blockSize) + (valueCount % blockSize == 0 ? 0 : 1);
- if ((long) numBlocks * blockSize < valueCount) {
- throw new IllegalArgumentException("valueCount is too large for this block size");
- }
+ final int numBlocks = numBlocks(valueCount, blockSize);
minValues = new long[numBlocks];
averages = new float[numBlocks];
subReaders = new PackedInts.Reader[numBlocks];
Index: lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java (working copy)
@@ -213,6 +213,11 @@
this.format = format;
this.bitsPerValue = bitsPerValue;
}
+
+ @Override
+ public String toString() {
+ return "FormatAndBits(format=" + format + " bitsPerValue=" + bitsPerValue + ")";
+ }
}
/**
@@ -1198,33 +1203,39 @@
for (int i = 0; i < len; ++i) {
dest.set(destPos++, src.get(srcPos++));
}
- } else {
+ } else if (len > 0) {
// use bulk operations
- long[] buf = new long[Math.min(capacity, len)];
- int remaining = 0;
- while (len > 0) {
- final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
- assert read > 0;
- srcPos += read;
- len -= read;
- remaining += read;
- final int written = dest.set(destPos, buf, 0, remaining);
- assert written > 0;
- destPos += written;
- if (written < remaining) {
- System.arraycopy(buf, written, buf, 0, remaining - written);
- }
- remaining -= written;
+ final long[] buf = new long[Math.min(capacity, len)];
+ copy(src, srcPos, dest, destPos, len, buf);
+ }
+ }
+
+ /** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */
+ static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) {
+ assert buf.length > 0;
+ int remaining = 0;
+ while (len > 0) {
+ final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
+ assert read > 0;
+ srcPos += read;
+ len -= read;
+ remaining += read;
+ final int written = dest.set(destPos, buf, 0, remaining);
+ assert written > 0;
+ destPos += written;
+ if (written < remaining) {
+ System.arraycopy(buf, written, buf, 0, remaining - written);
}
- while (remaining > 0) {
- final int written = dest.set(destPos, buf, 0, remaining);
- destPos += written;
- remaining -= written;
- System.arraycopy(buf, written, buf, 0, remaining);
- }
+ remaining -= written;
}
+ while (remaining > 0) {
+ final int written = dest.set(destPos, buf, 0, remaining);
+ destPos += written;
+ remaining -= written;
+ System.arraycopy(buf, written, buf, 0, remaining);
+ }
}
-
+
/**
* Expert: reads only the metadata from a stream. This is useful to later
* restore a stream or open a direct reader via
@@ -1261,4 +1272,26 @@
}
}
-}
\ No newline at end of file
+ /** Check that the block size is a power of 2, in the right bounds, and return
+ * its log in base 2. */
+ static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) {
+ if (blockSize < minBlockSize || blockSize > maxBlockSize) {
+ throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize);
+ }
+ if ((blockSize & (blockSize - 1)) != 0) {
+ throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
+ }
+ return Integer.numberOfTrailingZeros(blockSize);
+ }
+
+ /** Return the number of blocks required to store size values on
+ * blockSize. */
+ static int numBlocks(long size, int blockSize) {
+ final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1);
+ if ((long) numBlocks * blockSize < size) {
+ throw new IllegalArgumentException("size is too large for this block size");
+ }
+ return numBlocks;
+ }
+
+}
Index: lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java (revision 0)
+++ lucene/core/src/java/org/apache/lucene/util/packed/PagedGrowableWriter.java (working copy)
@@ -0,0 +1,136 @@
+package org.apache.lucene.util.packed;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
+import static org.apache.lucene.util.packed.PackedInts.numBlocks;
+
+/**
+ * A {@link PagedGrowableWriter}. This class slices data into fixed-size blocks
+ * which have independent numbers of bits per value and grow on-demand.
+ *
You should use this class instead of {@link AppendingLongBuffer} only when
+ * you need random write-access. Otherwise this class will likely be slower and
+ * less memory-efficient.
+ * @lucene.internal
+ */
+public final class PagedGrowableWriter {
+
+ static final int MIN_BLOCK_SIZE = 1 << 6;
+ static final int MAX_BLOCK_SIZE = 1 << 30;
+
+ final long size;
+ final int pageShift;
+ final int pageMask;
+ final GrowableWriter[] subWriters;
+ final int startBitsPerValue;
+ final float acceptableOverheadRatio;
+
+ /**
+ * Create a new {@link PagedGrowableWriter} instance.
+ *
+ * @param size the number of values to store.
+ * @param pageSize the number of values per page
+ * @param startBitsPerValue the initial number of bits per value
+ * @param acceptableOverheadRatio an acceptable overhead ratio
+ */
+ public PagedGrowableWriter(long size, int pageSize,
+ int startBitsPerValue, float acceptableOverheadRatio) {
+ this(size, pageSize, startBitsPerValue, acceptableOverheadRatio, true);
+ }
+
+ PagedGrowableWriter(long size, int pageSize,int startBitsPerValue, float acceptableOverheadRatio, boolean fillPages) {
+ this.size = size;
+ this.startBitsPerValue = startBitsPerValue;
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
+ pageShift = checkBlockSize(pageSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
+ pageMask = pageSize - 1;
+ final int numPages = numBlocks(size, pageSize);
+ subWriters = new GrowableWriter[numPages];
+ if (fillPages) {
+ for (int i = 0; i < numPages; ++i) {
+ // do not allocate for more entries than necessary on the last page
+ final int valueCount = i == numPages - 1 ? lastPageSize(size) : pageSize;
+ subWriters[i] = new GrowableWriter(startBitsPerValue, valueCount, acceptableOverheadRatio);
+ }
+ }
+ }
+
+ private int lastPageSize(long size) {
+ final int sz = indexInPage(size);
+ return sz == 0 ? pageSize() : sz;
+ }
+
+ private int pageSize() {
+ return pageMask + 1;
+ }
+
+ /** The number of values. */
+ public long size() {
+ return size;
+ }
+
+ int pageIndex(long index) {
+ return (int) (index >>> pageShift);
+ }
+
+ int indexInPage(long index) {
+ return (int) index & pageMask;
+ }
+
+ /** Get value at index. */
+ public long get(long index) {
+ assert index >= 0 && index < size;
+ final int pageIndex = pageIndex(index);
+ final int indexInPage = indexInPage(index);
+ return subWriters[pageIndex].get(indexInPage);
+ }
+
+ /** Set value at index. */
+ public void set(long index, long value) {
+ assert index >= 0 && index < size;
+ final int pageIndex = pageIndex(index);
+ final int indexInPage = indexInPage(index);
+ subWriters[pageIndex].set(indexInPage, value);
+ }
+
+ /** Create a new {@link PagedGrowableWriter} of size newSize
+ * based on the content of this buffer. This method is much more efficient
+ * than creating a new {@link PagedGrowableWriter} and copying values one by
+ * one. */
+ public PagedGrowableWriter resize(long newSize) {
+ final PagedGrowableWriter newWriter = new PagedGrowableWriter(newSize, pageSize(), startBitsPerValue, acceptableOverheadRatio, false);
+ final int numCommonPages = Math.min(newWriter.subWriters.length, subWriters.length);
+ final long[] copyBuffer = new long[1024];
+ for (int i = 0; i < newWriter.subWriters.length; ++i) {
+ final int valueCount = i == newWriter.subWriters.length - 1 ? lastPageSize(newSize) : pageSize();
+ final int bpv = i < numCommonPages ? subWriters[i].getBitsPerValue() : startBitsPerValue;
+ newWriter.subWriters[i] = new GrowableWriter(bpv, valueCount, acceptableOverheadRatio);
+ if (i < numCommonPages) {
+ final int copyLength = Math.min(valueCount, subWriters[i].size());
+ PackedInts.copy(subWriters[i], 0, newWriter.subWriters[i].getMutable(), 0, copyLength, copyBuffer);
+ }
+ }
+ return newWriter;
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")";
+ }
+
+}
Index: lucene/core/src/java/org/apache/lucene/util/packed/package.html
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/packed/package.html (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/packed/package.html (working copy)
@@ -47,6 +47,11 @@
Same as PackedInts.Mutable but grows the number of bits per values when needed.
Useful to build a PackedInts.Mutable from a read-once stream of longs.
Compression is good when values are close to each other.
Index: lucene/core/src/java/org/apache/lucene/util/BytesRef.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/BytesRef.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/BytesRef.java (working copy)
@@ -119,11 +119,17 @@
}
}
+ /**
+ * Returns a shallow clone of this instance (the underlying bytes are
+ * not copied and will be shared by both the returned object and this
+ * object.
+ *
+ * @see #deepCopyOf
+ */
@Override
public BytesRef clone() {
return new BytesRef(bytes, offset, length);
}
-
/** Calculates the hash code as required by TermsHash during indexing.
*
It is defined as:
Index: lucene/core/src/java/org/apache/lucene/util/CharsRef.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/CharsRef.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/CharsRef.java (working copy)
@@ -71,6 +71,13 @@
this.length = chars.length;
}
+ /**
+ * Returns a shallow clone of this instance (the underlying characters are
+ * not copied and will be shared by both the returned object and this
+ * object.
+ *
+ * @see #deepCopyOf
+ */
@Override
public CharsRef clone() {
return new CharsRef(chars, offset, length);
Index: lucene/core/src/java/org/apache/lucene/util/IntsRef.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/IntsRef.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/IntsRef.java (working copy)
@@ -56,6 +56,13 @@
assert isValid();
}
+ /**
+ * Returns a shallow clone of this instance (the underlying ints are
+ * not copied and will be shared by both the returned object and this
+ * object.
+ *
+ * @see #deepCopyOf
+ */
@Override
public IntsRef clone() {
return new IntsRef(ints, offset, length);
Index: lucene/core/src/java/org/apache/lucene/util/LongsRef.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/LongsRef.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/LongsRef.java (working copy)
@@ -55,6 +55,13 @@
assert isValid();
}
+ /**
+ * Returns a shallow clone of this instance (the underlying longs are
+ * not copied and will be shared by both the returned object and this
+ * object.
+ *
+ * @see #deepCopyOf
+ */
@Override
public LongsRef clone() {
return new LongsRef(longs, offset, length);
Index: lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/RollingBuffer.java (working copy)
@@ -17,9 +17,6 @@
* limitations under the License.
*/
-// TODO: probably move this to core at some point (eg,
-// cutover kuromoji, synfilter, LookaheadTokenFilter)
-
/** Acts like forever growing T[], but internally uses a
* circular buffer to reuse instances of T.
*
Index: lucene/core/src/java/org/apache/lucene/util/Sorter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/Sorter.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/Sorter.java (working copy)
@@ -72,7 +72,7 @@
first_cut = upper(from, mid, second_cut);
len11 = first_cut - from;
}
- rotate( first_cut, mid, second_cut);
+ rotate(first_cut, mid, second_cut);
final int new_mid = first_cut + len22;
mergeInPlace(from, first_cut, new_mid);
mergeInPlace(new_mid, second_cut, to);
@@ -142,7 +142,15 @@
}
}
- void rotate(int lo, int mid, int hi) {
+ final void rotate(int lo, int mid, int hi) {
+ assert lo <= mid && mid <= hi;
+ if (lo == mid || mid == hi) {
+ return;
+ }
+ doRotate(lo, mid, hi);
+ }
+
+ void doRotate(int lo, int mid, int hi) {
if (mid - lo == hi - mid) {
// happens rarely but saves n/2 swaps
while (mid < hi) {
Index: lucene/core/src/java/org/apache/lucene/util/TimSorter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/TimSorter.java (revision 1481938)
+++ lucene/core/src/java/org/apache/lucene/util/TimSorter.java (working copy)
@@ -205,9 +205,9 @@
}
@Override
- void rotate(int lo, int mid, int hi) {
- int len1 = mid - lo;
- int len2 = hi - mid;
+ void doRotate(int lo, int mid, int hi) {
+ final int len1 = mid - lo;
+ final int len2 = hi - mid;
if (len1 == len2) {
while (mid < hi) {
swap(lo++, mid++);
Index: lucene/core/src/test/org/apache/lucene/index/TestAtomicUpdate.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestAtomicUpdate.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestAtomicUpdate.java (working copy)
@@ -25,19 +25,8 @@
import org.apache.lucene.util.*;
public class TestAtomicUpdate extends LuceneTestCase {
- private static final class MockIndexWriter extends IndexWriter {
- public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException {
- super(dir, conf);
- }
+
- @Override
- boolean testPoint(String name) {
- if (LuceneTestCase.random().nextInt(4) == 2)
- Thread.yield();
- return true;
- }
- }
-
private static abstract class TimedThread extends Thread {
volatile boolean failed;
int count;
@@ -124,7 +113,7 @@
TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setMaxBufferedDocs(7);
((TieredMergePolicy) conf.getMergePolicy()).setMaxMergeAtOnce(3);
- IndexWriter writer = new MockIndexWriter(directory, conf);
+ IndexWriter writer = RandomIndexWriter.mockIndexWriter(directory, conf, random());
// Establish a base index of 100 docs:
for(int i=0;i<100;i++) {
Index: lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestCodecs.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestCodecs.java (working copy)
@@ -658,8 +658,7 @@
final int termIndexInterval = _TestUtil.nextInt(random(), 13, 27);
final Codec codec = Codec.getDefault();
final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null);
- final SegmentWriteState state =
- new SegmentWriteState(InfoStream.getDefault(), dir, si, 0, fieldInfos, termIndexInterval, null, null, newIOContext(random()));
+ final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, termIndexInterval, fieldInfos, termIndexInterval, null, null, newIOContext(random()));
final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state);
Arrays.sort(fields);
Index: lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (working copy)
@@ -58,6 +58,9 @@
boolean isClose = false;
StackTraceElement[] trace = new Exception().getStackTrace();
for (int i = 0; i < trace.length; i++) {
+ if (isDoFlush && isClose) {
+ break;
+ }
if ("flush".equals(trace[i].getMethodName())) {
isDoFlush = true;
}
Index: lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java (working copy)
@@ -112,13 +112,8 @@
}
@Override
- public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
+ public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
-
- @Override
- public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
- throw new UnsupportedOperationException();
- }
}
}
Index: lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterStallControl.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterStallControl.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterStallControl.java (working copy)
@@ -339,6 +339,7 @@
for (Thread thread : threads) {
if (thread.getState() != state) {
done = false;
+ break;
}
}
if (done) {
Index: lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java (working copy)
@@ -773,8 +773,9 @@
}
public void testReplaceLayers() throws IOException {
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+ IndexWriterConfig indexWriterConfig = newIndexWriterConfig(
+ TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
FieldType fieldType = new FieldType();
fieldType.setIndexed(true);
@@ -784,6 +785,7 @@
Document doc0 = new Document();
doc0.add(new StoredField("f1", "a", fieldType));
+ doc0.add(new StoredField("f2", "a", fieldType));
writer.addDocument(doc0);
// add f2:b
@@ -791,7 +793,7 @@
fields1.add(new StoredField("f2", "b", fieldType));
writer.updateFields(Operation.ADD_FIELDS, new Term("f1", "a"), fields1);
- // remove f2:b and add f2:c
+ // remove f2:a and f2:b, add f2:c
Document fields2 = new Document();
fields2.add(new StoredField("f2", "c", fieldType));
writer.updateFields(Operation.REPLACE_FIELDS, new Term("f2", "b"), fields2);
@@ -801,12 +803,17 @@
fields3.add(new StoredField("f2", "d", fieldType));
writer.updateFields(Operation.ADD_FIELDS, new Term("f2", "b"), fields3);
+ // do nothing since f2:a was removed
+ writer.deleteDocuments(new Term("f2", "a"));
+
writer.close();
DirectoryReader directoryReader = DirectoryReader.open(dir);
final AtomicReader atomicReader = directoryReader.leaves().get(0).reader();
printField(atomicReader, "f1");
+ assertEquals("wrong number of documents", 1, directoryReader.numDocs());
+
// check indexed fields
final DocsAndPositionsEnum termPositionsA = atomicReader
.termPositionsEnum(new Term("f1", "a"));
@@ -816,6 +823,12 @@
assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS,
termPositionsA.nextDoc());
+ final DocsAndPositionsEnum termPositionsA2 = atomicReader
+ .termPositionsEnum(new Term("f2", "a"));
+ assertNotNull("no positions for term", termPositionsA2);
+ assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS,
+ termPositionsA2.nextDoc());
+
final DocsAndPositionsEnum termPositionsB = atomicReader
.termPositionsEnum(new Term("f2", "b"));
assertNotNull("no positions for term", termPositionsB);
@@ -826,6 +839,7 @@
.termPositionsEnum(new Term("f2", "c"));
assertNotNull("no positions for term", termPositionsC);
assertEquals("wrong doc id", 0, termPositionsC.nextDoc());
+ // 100000 == 2 * StackedDocsEnum.STACKED_SEGMENT_POSITION_INCREMENT
assertEquals("wrong position", 100000, termPositionsC.nextPosition());
assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS,
termPositionsC.nextDoc());
@@ -872,7 +886,7 @@
}
public void printIndexes() throws IOException {
- File outDir = new File("D:/temp/ifu/compare/scenario/b");
+ File outDir = new File("D:/temp/ifu/compare/scenario/a");
outDir.mkdirs();
for (int i = 0; i < 42; i++) {
Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy)
@@ -1183,8 +1183,48 @@
t.join();
assertFalse(t.failed);
}
+
+ /** testThreadInterruptDeadlock but with 2 indexer threads */
+ public void testTwoThreadsInterruptDeadlock() throws Exception {
+ IndexerThreadInterrupt t1 = new IndexerThreadInterrupt();
+ t1.setDaemon(true);
+ t1.start();
+
+ IndexerThreadInterrupt t2 = new IndexerThreadInterrupt();
+ t2.setDaemon(true);
+ t2.start();
+ // Force class loader to load ThreadInterruptedException
+ // up front... else we can see a false failure if 2nd
+ // interrupt arrives while class loader is trying to
+ // init this class (in servicing a first interrupt):
+ assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException);
+ // issue 300 interrupts to child thread
+ final int numInterrupts = atLeast(300);
+ int i = 0;
+ while(i < numInterrupts) {
+ // TODO: would be nice to also sometimes interrupt the
+ // CMS merge threads too ...
+ Thread.sleep(10);
+ IndexerThreadInterrupt t = random().nextBoolean() ? t1 : t2;
+ if (t.allowInterrupt) {
+ i++;
+ t.interrupt();
+ }
+ if (!t1.isAlive() && !t2.isAlive()) {
+ break;
+ }
+ }
+ t1.finish = true;
+ t2.finish = true;
+ t1.join();
+ t2.join();
+ assertFalse(t1.failed);
+ assertFalse(t2.failed);
+ }
+
+
public void testIndexStoreCombos() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())));
Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy)
@@ -25,6 +25,8 @@
import java.util.Collections;
import java.util.List;
import java.util.Random;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -302,7 +304,70 @@
modifier.close();
dir.close();
}
+
+
+ public void testDeleteAllNoDeadLock() throws IOException, InterruptedException {
+ Directory dir = newDirectory();
+ final RandomIndexWriter modifier = new RandomIndexWriter(random(), dir);
+ int numThreads = atLeast(2);
+ Thread[] threads = new Thread[numThreads];
+ final CountDownLatch latch = new CountDownLatch(1);
+ final CountDownLatch doneLatch = new CountDownLatch(numThreads);
+ for (int i = 0; i < numThreads; i++) {
+ final int offset = i;
+ threads[i] = new Thread() {
+ @Override
+ public void run() {
+ int id = offset * 1000;
+ int value = 100;
+ try {
+ latch.await();
+ for (int i = 0; i < 1000; i++) {
+ Document doc = new Document();
+ doc.add(newTextField("content", "aaa", Field.Store.NO));
+ doc.add(newStringField("id", String.valueOf(id++), Field.Store.YES));
+ doc.add(newStringField("value", String.valueOf(value), Field.Store.NO));
+ doc.add(new NumericDocValuesField("dv", value));
+ modifier.addDocument(doc);
+ if (VERBOSE) {
+ System.out.println("\tThread["+offset+"]: add doc: " + id);
+ }
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ } finally {
+ doneLatch.countDown();
+ if (VERBOSE) {
+ System.out.println("\tThread["+offset+"]: done indexing" );
+ }
+ }
+ }
+ };
+ threads[i].start();
+ }
+ latch.countDown();
+ while(!doneLatch.await(1, TimeUnit.MILLISECONDS)) {
+ modifier.deleteAll();
+ if (VERBOSE) {
+ System.out.println("del all");
+ }
+ }
+
+ modifier.deleteAll();
+ for (Thread thread : threads) {
+ thread.join();
+ }
+
+ modifier.close();
+ DirectoryReader reader = DirectoryReader.open(dir);
+ assertEquals(reader.maxDoc(), 0);
+ assertEquals(reader.numDocs(), 0);
+ assertEquals(reader.numDeletedDocs(), 0);
+ reader.close();
+ dir.close();
+ }
+
// test rollback of deleteAll()
public void testDeleteAllRollback() throws IOException {
Directory dir = newDirectory();
Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (working copy)
@@ -210,15 +210,10 @@
ThreadLocal doFail = new ThreadLocal();
- private class MockIndexWriter extends IndexWriter {
+ private class TestPoint1 implements RandomIndexWriter.TestPoint {
Random r = new Random(random().nextLong());
-
- public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException {
- super(dir, conf);
- }
-
@Override
- boolean testPoint(String name) {
+ public void apply(String name) {
if (doFail.get() != null && !name.equals("startDoFlush") && r.nextInt(40) == 17) {
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": NOW FAIL: " + name);
@@ -226,7 +221,6 @@
}
throw new RuntimeException(Thread.currentThread().getName() + ": intentionally failing at " + name);
}
- return true;
}
}
@@ -238,8 +232,9 @@
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
- MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)
- .setRAMBufferSizeMB(0.1).setMergeScheduler(new ConcurrentMergeScheduler()));
+
+ IndexWriter writer = RandomIndexWriter.mockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)
+ .setRAMBufferSizeMB(0.1).setMergeScheduler(new ConcurrentMergeScheduler()), new TestPoint1());
((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions();
//writer.setMaxBufferedDocs(10);
if (VERBOSE) {
@@ -281,8 +276,8 @@
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
- MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)
- .setRAMBufferSizeMB(0.2).setMergeScheduler(new ConcurrentMergeScheduler()));
+ IndexWriter writer = RandomIndexWriter.mockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)
+ .setRAMBufferSizeMB(0.2).setMergeScheduler(new ConcurrentMergeScheduler()), new TestPoint1());
((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions();
//writer.setMaxBufferedDocs(10);
writer.commit();
@@ -324,19 +319,13 @@
}
// LUCENE-1198
- private static final class MockIndexWriter2 extends IndexWriter {
-
- public MockIndexWriter2(Directory dir, IndexWriterConfig conf) throws IOException {
- super(dir, conf);
- }
-
+ private static final class TestPoint2 implements RandomIndexWriter.TestPoint {
boolean doFail;
@Override
- boolean testPoint(String name) {
+ public void apply(String name) {
if (doFail && name.equals("DocumentsWriterPerThread addDocument start"))
throw new RuntimeException("intentionally failing");
- return true;
}
}
@@ -367,11 +356,12 @@
public void testExceptionDocumentsWriterInit() throws IOException {
Directory dir = newDirectory();
- MockIndexWriter2 w = new MockIndexWriter2(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+ TestPoint2 testPoint = new TestPoint2();
+ IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())), testPoint);
Document doc = new Document();
doc.add(newTextField("field", "a field", Field.Store.YES));
w.addDocument(doc);
- w.doFail = true;
+ testPoint.doFail = true;
try {
w.addDocument(doc);
fail("did not hit exception");
@@ -385,7 +375,7 @@
// LUCENE-1208
public void testExceptionJustBeforeFlush() throws IOException {
Directory dir = newDirectory();
- MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(2));
+ IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(2), new TestPoint1());
Document doc = new Document();
doc.add(newTextField("field", "a field", Field.Store.YES));
w.addDocument(doc);
@@ -412,22 +402,15 @@
dir.close();
}
- private static final class MockIndexWriter3 extends IndexWriter {
-
- public MockIndexWriter3(Directory dir, IndexWriterConfig conf) throws IOException {
- super(dir, conf);
- }
-
+ private static final class TestPoint3 implements RandomIndexWriter.TestPoint {
boolean doFail;
boolean failed;
-
@Override
- boolean testPoint(String name) {
+ public void apply(String name) {
if (doFail && name.equals("startMergeInit")) {
failed = true;
throw new RuntimeException("intentionally failing");
}
- return true;
}
}
@@ -441,8 +424,9 @@
cms.setSuppressExceptions();
conf.setMergeScheduler(cms);
((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2);
- MockIndexWriter3 w = new MockIndexWriter3(dir, conf);
- w.doFail = true;
+ TestPoint3 testPoint = new TestPoint3();
+ IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, conf, testPoint);
+ testPoint.doFail = true;
Document doc = new Document();
doc.add(newTextField("field", "a field", Field.Store.YES));
for(int i=0;i<10;i++)
@@ -453,7 +437,7 @@
}
((ConcurrentMergeScheduler) w.getConfig().getMergeScheduler()).sync();
- assertTrue(w.failed);
+ assertTrue(testPoint.failed);
w.close();
dir.close();
}
@@ -555,10 +539,15 @@
boolean sawAppend = false;
boolean sawFlush = false;
for (int i = 0; i < trace.length; i++) {
- if (FreqProxTermsWriterPerField.class.getName().equals(trace[i].getClassName()) && "flush".equals(trace[i].getMethodName()))
+ if (sawAppend && sawFlush) {
+ break;
+ }
+ if (FreqProxTermsWriterPerField.class.getName().equals(trace[i].getClassName()) && "flush".equals(trace[i].getMethodName())) {
sawAppend = true;
- if ("flush".equals(trace[i].getMethodName()))
+ }
+ if ("flush".equals(trace[i].getMethodName())) {
sawFlush = true;
+ }
}
if (sawAppend && sawFlush && count++ >= 30) {
@@ -892,12 +881,18 @@
boolean isDelete = false;
boolean isInGlobalFieldMap = false;
for (int i = 0; i < trace.length; i++) {
- if (SegmentInfos.class.getName().equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName()))
+ if (isCommit && isDelete && isInGlobalFieldMap) {
+ break;
+ }
+ if (SegmentInfos.class.getName().equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName())) {
isCommit = true;
- if (MockDirectoryWrapper.class.getName().equals(trace[i].getClassName()) && "deleteFile".equals(trace[i].getMethodName()))
+ }
+ if (MockDirectoryWrapper.class.getName().equals(trace[i].getClassName()) && "deleteFile".equals(trace[i].getMethodName())) {
isDelete = true;
- if (SegmentInfos.class.getName().equals(trace[i].getClassName()) && "writeGlobalFieldMap".equals(trace[i].getMethodName()))
+ }
+ if (SegmentInfos.class.getName().equals(trace[i].getClassName()) && "writeGlobalFieldMap".equals(trace[i].getMethodName())) {
isInGlobalFieldMap = true;
+ }
}
if (isInGlobalFieldMap && dontFailDuringGlobalFieldMap) {
@@ -1014,29 +1009,26 @@
}
// LUCENE-1347
- private static final class MockIndexWriter4 extends IndexWriter {
+ private static final class TestPoint4 implements RandomIndexWriter.TestPoint {
- public MockIndexWriter4(Directory dir, IndexWriterConfig conf) throws IOException {
- super(dir, conf);
- }
-
boolean doFail;
@Override
- boolean testPoint(String name) {
+ public void apply(String name) {
if (doFail && name.equals("rollback before checkpoint"))
throw new RuntimeException("intentionally failing");
- return true;
}
}
// LUCENE-1347
public void testRollbackExceptionHang() throws Throwable {
Directory dir = newDirectory();
- MockIndexWriter4 w = new MockIndexWriter4(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+ TestPoint4 testPoint = new TestPoint4();
+ IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())), testPoint);
+
addDoc(w);
- w.doFail = true;
+ testPoint.doFail = true;
try {
w.rollback();
fail("did not hit intentional RuntimeException");
@@ -1044,7 +1036,7 @@
// expected
}
- w.doFail = false;
+ testPoint.doFail = false;
w.rollback();
dir.close();
}
@@ -1342,6 +1334,7 @@
for (int i = 0; i < trace.length; i++) {
if (TermVectorsConsumer.class.getName().equals(trace[i].getClassName()) && stage.equals(trace[i].getMethodName())) {
fail = true;
+ break;
}
}
Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java (working copy)
@@ -18,10 +18,11 @@
*
*/
-import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintStream;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.ArrayList;
@@ -49,10 +50,6 @@
@Override @Nightly
public void testNRTThreads() throws Exception {
- String vendor = Constants.JAVA_VENDOR;
- assumeTrue(vendor + " JRE not supported.",
- vendor.startsWith("Oracle") || vendor.startsWith("Sun") || vendor.startsWith("Apple"));
-
// if we are not the fork
if (System.getProperty("tests.crashmode") == null) {
// try up to 10 times to create an index
@@ -112,19 +109,41 @@
pb.directory(tempDir);
pb.redirectErrorStream(true);
Process p = pb.start();
- InputStream is = p.getInputStream();
- BufferedInputStream isl = new BufferedInputStream(is);
- byte buffer[] = new byte[1024];
- int len = 0;
- if (VERBOSE) System.err.println(">>> Begin subprocess output");
- while ((len = isl.read(buffer)) != -1) {
- if (VERBOSE) {
- System.err.write(buffer, 0, len);
- }
- }
- if (VERBOSE) System.err.println("<<< End subprocess output");
+
+ // We pump everything to stderr.
+ PrintStream childOut = System.err;
+ Thread stdoutPumper = ThreadPumper.start(p.getInputStream(), childOut);
+ Thread stderrPumper = ThreadPumper.start(p.getErrorStream(), childOut);
+ if (VERBOSE) childOut.println(">>> Begin subprocess output");
p.waitFor();
+ stdoutPumper.join();
+ stderrPumper.join();
+ if (VERBOSE) childOut.println("<<< End subprocess output");
}
+
+ /** A pipe thread. It'd be nice to reuse guava's implementation for this... */
+ static class ThreadPumper {
+ public static Thread start(final InputStream from, final OutputStream to) {
+ Thread t = new Thread() {
+ @Override
+ public void run() {
+ try {
+ byte [] buffer = new byte [1024];
+ int len;
+ while ((len = from.read(buffer)) != -1) {
+ if (VERBOSE) {
+ to.write(buffer, 0, len);
+ }
+ }
+ } catch (IOException e) {
+ System.err.println("Couldn't pipe from the forked process: " + e.toString());
+ }
+ }
+ };
+ t.start();
+ return t;
+ }
+ }
/**
* Recursively looks for indexes underneath file,
@@ -155,20 +174,40 @@
}
return false;
}
-
+
/**
* currently, this only works/tested on Sun and IBM.
*/
public void crashJRE() {
- try {
- Class> clazz = Class.forName("sun.misc.Unsafe");
- // we should use getUnsafe instead, harmony implements it, etc.
- Field field = clazz.getDeclaredField("theUnsafe");
- field.setAccessible(true);
- Object o = field.get(null);
- Method m = clazz.getMethod("putAddress", long.class, long.class);
- m.invoke(o, 0L, 0L);
- } catch (Exception e) { e.printStackTrace(); }
- fail();
+ final String vendor = Constants.JAVA_VENDOR;
+ final boolean supportsUnsafeNpeDereference =
+ vendor.startsWith("Oracle") ||
+ vendor.startsWith("Sun") ||
+ vendor.startsWith("Apple");
+
+ try {
+ if (supportsUnsafeNpeDereference) {
+ try {
+ Class> clazz = Class.forName("sun.misc.Unsafe");
+ Field field = clazz.getDeclaredField("theUnsafe");
+ field.setAccessible(true);
+ Object o = field.get(null);
+ Method m = clazz.getMethod("putAddress", long.class, long.class);
+ m.invoke(o, 0L, 0L);
+ } catch (Throwable e) {
+ System.out.println("Couldn't kill the JVM via Unsafe.");
+ e.printStackTrace(System.out);
+ }
+ }
+
+ // Fallback attempt to Runtime.halt();
+ Runtime.getRuntime().halt(-1);
+ } catch (Exception e) {
+ System.out.println("Couldn't kill the JVM.");
+ e.printStackTrace(System.out);
+ }
+
+ // We couldn't get the JVM to crash for some reason.
+ fail();
}
}
Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java (working copy)
@@ -358,6 +358,9 @@
boolean sawClose = false;
boolean sawMerge = false;
for (int i = 0; i < trace.length; i++) {
+ if (sawAbortOrFlushDoc && sawMerge && sawClose) {
+ break;
+ }
if ("abort".equals(trace[i].getMethodName()) ||
"finishDocument".equals(trace[i].getMethodName())) {
sawAbortOrFlushDoc = true;
@@ -370,8 +373,9 @@
}
}
if (sawAbortOrFlushDoc && !sawClose && !sawMerge) {
- if (onlyOnce)
+ if (onlyOnce) {
doFail = false;
+ }
//System.out.println(Thread.currentThread().getName() + ": now fail");
//new Throwable().printStackTrace(System.out);
throw new IOException("now failing on purpose");
Index: lucene/core/src/test/org/apache/lucene/index/TestNorms.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestNorms.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestNorms.java (working copy)
@@ -179,13 +179,8 @@
}
@Override
- public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
+ public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
-
- @Override
- public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
- throw new UnsupportedOperationException();
- }
}
}
Index: lucene/core/src/test/org/apache/lucene/index/TestStressIndexing2.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestStressIndexing2.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestStressIndexing2.java (working copy)
@@ -47,21 +47,16 @@
static int maxBufferedDocs=3;
static int seed=0;
- public class MockIndexWriter extends IndexWriter {
+ public final class YieldTestPoint implements RandomIndexWriter.TestPoint {
- public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException {
- super(dir, conf);
- }
-
@Override
- boolean testPoint(String name) {
+ public void apply(String name) {
// if (name.equals("startCommit")) {
if (random().nextInt(4) == 2)
Thread.yield();
- return true;
}
}
-
+//
public void testRandomIWReader() throws Throwable {
Directory dir = newDirectory();
@@ -151,9 +146,9 @@
public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException {
Map docs = new HashMap();
- IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
+ IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE).setRAMBufferSizeMB(
- 0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy()));
+ 0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy()), new YieldTestPoint());
w.commit();
LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
lmp.setUseCompoundFile(false);
@@ -202,10 +197,10 @@
public Map indexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates,
boolean doReaderPooling) throws IOException, InterruptedException {
Map docs = new HashMap();
- IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
+ IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE)
.setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates))
- .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy()));
+ .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy()), new YieldTestPoint());
LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
lmp.setUseCompoundFile(false);
lmp.setMergeFactor(mergeFactor);
Index: lucene/core/src/test/org/apache/lucene/index/TestUniqueTermCount.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/index/TestUniqueTermCount.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/index/TestUniqueTermCount.java (working copy)
@@ -110,13 +110,8 @@
}
@Override
- public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
+ public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
-
- @Override
- public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
- throw new UnsupportedOperationException();
- }
}
}
Index: lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java (working copy)
@@ -148,7 +148,7 @@
static final class JustCompileSpanScorer extends SpanScorer {
protected JustCompileSpanScorer(Spans spans, Weight weight,
- Similarity.SloppySimScorer docScorer) throws IOException {
+ Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
}
Index: lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (working copy)
@@ -379,11 +379,11 @@
PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
Collection payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
- if(VERBOSE)
+ if(VERBOSE) {
System.out.println("Num payloads:" + payloads.size());
- for (final byte [] bytes : payloads) {
- if(VERBOSE)
+ for (final byte [] bytes : payloads) {
System.out.println(new String(bytes, "UTF-8"));
+ }
}
reader.close();
directory.close();
@@ -451,12 +451,12 @@
System.out.println("\nSpans Dump --");
if (spans.isPayloadAvailable()) {
Collection payload = spans.getPayload();
- if(VERBOSE)
+ if(VERBOSE) {
System.out.println("payloads for span:" + payload.size());
- for (final byte [] bytes : payload) {
- if(VERBOSE)
+ for (final byte [] bytes : payload) {
System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " "
+ new String(bytes, "UTF-8"));
+ }
}
assertEquals(numPayloads[cnt],payload.size());
Index: lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java (working copy)
@@ -270,16 +270,11 @@
}
@Override
- public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) {
+ public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
- public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) {
- throw new UnsupportedOperationException(UNSUPPORTED_MSG);
- }
-
- @Override
public long computeNorm(FieldInvertState state) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
Index: lucene/core/src/test/org/apache/lucene/search/TestConjunctions.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/TestConjunctions.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/search/TestConjunctions.java (working copy)
@@ -109,19 +109,9 @@
}
@Override
- public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
- return new ExactSimScorer() {
+ public SimScorer simScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
+ return new SimScorer() {
@Override
- public float score(int doc, int freq) {
- return freq;
- }
- };
- }
-
- @Override
- public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
- return new SloppySimScorer() {
- @Override
public float score(int doc, float freq) {
return freq;
}
Index: lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java (working copy)
@@ -156,34 +156,11 @@
}
@Override
- public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- final ExactSimScorer sub = sim.exactSimScorer(stats, context);
+ public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
+ final SimScorer sub = sim.simScorer(stats, context);
final FieldCache.Floats values = FieldCache.DEFAULT.getFloats(context.reader(), boostField, false);
-
- return new ExactSimScorer() {
- @Override
- public float score(int doc, int freq) {
- return values.get(doc) * sub.score(doc, freq);
- }
-
- @Override
- public Explanation explain(int doc, Explanation freq) {
- Explanation boostExplanation = new Explanation(values.get(doc), "indexDocValue(" + boostField + ")");
- Explanation simExplanation = sub.explain(doc, freq);
- Explanation expl = new Explanation(boostExplanation.getValue() * simExplanation.getValue(), "product of:");
- expl.addDetail(boostExplanation);
- expl.addDetail(simExplanation);
- return expl;
- }
- };
- }
-
- @Override
- public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
- final SloppySimScorer sub = sim.sloppySimScorer(stats, context);
- final FieldCache.Floats values = FieldCache.DEFAULT.getFloats(context.reader(), boostField, false);
- return new SloppySimScorer() {
+ return new SimScorer() {
@Override
public float score(int doc, float freq) {
return values.get(doc) * sub.score(doc, freq);
Index: lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java (working copy)
@@ -37,7 +37,7 @@
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.BooleanQuery.BooleanWeight;
import org.apache.lucene.search.similarities.DefaultSimilarity;
-import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarities.Similarity.SimWeight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -274,7 +274,7 @@
final int maxDoc;
final Set ords = new HashSet();
- final ExactSimScorer[] sims;
+ final SimScorer[] sims;
final int minNrShouldMatch;
double score = Float.NaN;
@@ -285,7 +285,7 @@
this.maxDoc = reader.maxDoc();
BooleanQuery bq = (BooleanQuery) weight.getQuery();
this.minNrShouldMatch = bq.getMinimumNumberShouldMatch();
- this.sims = new ExactSimScorer[(int)dv.getValueCount()];
+ this.sims = new SimScorer[(int)dv.getValueCount()];
for (BooleanClause clause : bq.getClauses()) {
assert !clause.isProhibited();
assert !clause.isRequired();
@@ -300,7 +300,7 @@
searcher.termStatistics(term, context));
w.getValueForNormalization(); // ignored
w.normalize(1F, 1F);
- sims[(int)ord] = weight.similarity.exactSimScorer(w, reader.getContext());
+ sims[(int)ord] = weight.similarity.simScorer(w, reader.getContext());
}
}
}
Index: lucene/core/src/test/org/apache/lucene/store/TestMockDirectoryWrapper.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/store/TestMockDirectoryWrapper.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/store/TestMockDirectoryWrapper.java (working copy)
@@ -55,7 +55,7 @@
public void testDiskFull() throws IOException {
// test writeBytes
MockDirectoryWrapper dir = newMockDirectory();
- dir.setMaxSizeInBytes(2);
+ dir.setMaxSizeInBytes(3);
final byte[] bytes = new byte[] { 1, 2};
IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
out.writeBytes(bytes, bytes.length); // first write should succeed
@@ -73,7 +73,7 @@
// test copyBytes
dir = newMockDirectory();
- dir.setMaxSizeInBytes(2);
+ dir.setMaxSizeInBytes(3);
out = dir.createOutput("foo", IOContext.DEFAULT);
out.copyBytes(new ByteArrayDataInput(bytes), bytes.length); // first copy should succeed
// flush() to ensure the written bytes are not buffered and counted
Index: lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java (revision 1481938)
+++ lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java (working copy)
@@ -34,7 +34,7 @@
import org.junit.Ignore;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
-@Ignore("Requires tons of heap to run (10G works)")
+@Ignore("Requires tons of heap to run (420G works)")
@TimeoutSuite(millis = 100 * TimeUnits.HOUR)
public class Test2BFST extends LuceneTestCase {
@@ -50,12 +50,12 @@
for(int doPackIter=0;doPackIter<2;doPackIter++) {
boolean doPack = doPackIter == 1;
- // Build FST w/ NoOutputs and stop when nodeCount > 3B
+ // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
if (!doPack) {
System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
Outputs