Index: src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- src/java/org/apache/lucene/search/IndexSearcher.java (revision 940112) +++ src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -94,7 +94,7 @@ closeReader = false; } - private IndexSearcher(IndexReader r, boolean closeReader) { + protected IndexSearcher(IndexReader r, boolean closeReader) { reader = r; this.closeReader = closeReader; Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 940112) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -69,7 +69,7 @@ initialize(subReaders, closeSubReaders); } - private void initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException { + protected void initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException { this.subReaders = subReaders.clone(); starts = new int[subReaders.length + 1]; // build starts array decrefOnClose = new boolean[subReaders.length]; Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 940112) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -3545,7 +3545,7 @@ * you should immediately close the writer. See above for details.

*/ - public final void commit(Map commitUserData) throws CorruptIndexException, IOException { + public void commit(Map commitUserData) throws CorruptIndexException, IOException { ensureOpen(); Index: contrib/splitindex/src/test/org/apache/lucene/index/SplitTestCase.java =================================================================== --- contrib/splitindex/src/test/org/apache/lucene/index/SplitTestCase.java (revision 0) +++ contrib/splitindex/src/test/org/apache/lucene/index/SplitTestCase.java (revision 0) @@ -0,0 +1,551 @@ +package org.apache.lucene.index; + +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import junit.framework.AssertionFailedError; +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestListener; +import junit.framework.TestResult; +import junit.framework.TestSuite; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.Version; + +/** + * The SplitTestCase builds on the {@link LuceneTestCase} and + * provides basic test cases and utility methods that are tailored towards + * testing split indices. Specifically, it takes care of setting up and tearing + * down {@link SplitWriter}s and {@link SplitReader}s, leaving it up to its + * sub-classes to configure the {@link SplitPolicy}s and corresponding + * {@link SplitRule}s. + * + * @author Karthick Sankarachary + */ +public abstract class SplitTestCase extends LuceneTestCase { + // Create file system directories under the system-defined temporary location. + protected final String PATH_DIRECTORY_ROOT = System + .getProperty("java.io.tmpdir") + + getClass().getSimpleName(); + + // The file system (super-)directory on which to base the split index. + protected Directory directory; + // The analyzer to use for the super-directory. + protected Analyzer analyzer; + // The configuration object to use for the super-index. + protected IndexWriterConfig config; + // The split writer based on the above (super-)directory. + protected SplitWriter writer; + // The list of index readers obtained from the above writer. + protected List readers; + // The map of index reader to its corresponding index searcher. + protected Map searchers; + + // The options hash to use for configuring the split policy. + protected Map options = new HashMap(); + + /** + * @return the name of the concrete split policy being tested. + */ + protected abstract Class getSplitPolicyClass(); + + /** + * Create the split writer, if {@link #setupWriter()} gives the greenlight. + * Similarly, create the split reader, if {@link #setupReader()} gives the + * greenlight. + */ + @Override + protected void setUp() throws Exception { + if (setupWriter()) { + createWriter(); + if (setupReader()) { + createReader(); + } + } + } + + /** + * Close all the writers, readers and directories obtained during the course + * of this test case. + */ + @Override + protected void tearDown() throws Exception { + closeAll(); + } + + /** + * By default, setup the split writer. + * + * @return + * @throws CorruptIndexException + * @throws LockObtainFailedException + * @throws IOException + */ + protected boolean setupWriter() throws CorruptIndexException, + LockObtainFailedException, IOException { + return true; + }; + + /** + * If a writer wasn't already created, create a split policy of the right + * type, and create a split writer based on that. + * + * @return + * @throws IOException + * @throws InstantiationException + * @throws IllegalAccessException + * @throws SecurityException + * @throws IllegalArgumentException + * @throws NoSuchMethodException + * @throws InvocationTargetException + */ + protected SplitWriter createWriter() throws IOException, + InstantiationException, IllegalAccessException, SecurityException, + IllegalArgumentException, NoSuchMethodException, + InvocationTargetException { + if (writer == null) { + Class splitPolicyClass = getSplitPolicyClass(); + Constructor policyConstructor = splitPolicyClass + .getConstructor(Map.class); + SplitPolicy splitPolicy = (SplitPolicy) policyConstructor + .newInstance(options); + config = new IndexWriterConfig(Version.LUCENE_31, getAnalyzer()) + .setMaxFieldLength(getMaxFieldLength()); + config.setOpenMode(getOpenMode()); + writer = new SplitWriter(getDirectory(), config, splitPolicy); + writer.deleteAll(); + writer.commit(); + } + return writer; + } + + /** + * Write a document with an initial sample field. + * + * @return + * @throws CorruptIndexException + * @throws IOException + */ + protected boolean setupReader() throws CorruptIndexException, IOException { + readers = new ArrayList(); + searchers = new HashMap(); + Document document = new Document(); + document.add(new Field("name1", "value1", Field.Store.YES, + Field.Index.ANALYZED)); + writer.addDocument(document); + writer.commit(); + return true; + } + + /** + * Obtain a reader from the writer, and add its searcher to the mix. + * + * @return + * @throws IOException + */ + protected IndexReader createReader() throws IOException { + IndexReader reader = (IndexReader) writer.getReader(); + readers.add(reader); + searchers.put(reader, createSearcher(reader)); + return reader; + } + + /** + * Close all resources obtained during the course of this test case. + * + * @throws CorruptIndexException + * @throws SplitException + * @throws IOException + */ + protected void closeAll() throws CorruptIndexException, SplitException, + IOException { + if (writer != null) { + writer.close(true); + } + if (readers != null) { + for (IndexReader reader : readers) { + reader.close(); + } + } + if (directory != null) { + directory.close(); + } + } + + /** + * Check to see if the writer has no splits to begin with. + * + * @throws Exception + */ + public void testWriterSetup() throws Exception { + assertEquals(0, writer.getNumSplits()); + } + + /** + * Check to see if the split index contains the initial sample term. + * + * @throws Exception + */ + public void testReaderSetup() throws Exception { + assertHits(1, new Term("name1", "value1"), 1); + } + + /** + * Write a second field to the split index, and illustrate that that change is + * not visible to the reader obtained during setup (this limitated is due to + * the near real-time nature of the index reader and searcher). + * + * @throws Exception + */ + public void testReadSuper() throws Exception { + Document document = new Document(); + document.add(new Field("name2", "value2", Field.Store.YES, + Field.Index.ANALYZED)); + writer.addDocument(document); + writer.commit(); + + assertHits(0, new Term("name2", "value2")); + } + + /** + * Create the split index with a clean slate for each test case. + * + * @return + */ + protected OpenMode getOpenMode() { + return OpenMode.CREATE; + } + + /** + * Create an index searcher for the given index reader + * + * @param reader + * an existing index reader + * @return a brand-new index searcher + */ + protected IndexSearcher createSearcher(IndexReader reader) { + return new IndexSearcher(reader); + } + + /** + * @return the primary searcher that was obtained the first time around + */ + protected IndexSearcher getPrimarySearcher() { + return (searchers != null && searchers.size() > 0) ? searchers + .get(getPrimaryReader()) : null; + } + + /** + * @return the primary reader that was obtained the first time around + */ + protected IndexReader getPrimaryReader() { + return (readers != null && readers.size() > 0) ? readers.get(0) : null; + } + + /** + * Get the existing index searcher for the given index reader. + * + * @param reader + * an index reader + * @return the existing index searcher for the given reader + */ + protected IndexSearcher getSearcher(IndexReader reader) { + return searchers.get(reader); + } + + /** + * @return the super-directory used by the split index + * @throws IOException + */ + protected Directory getDirectory() throws IOException { + if (directory == null) { + directory = FSDirectory.open(new File(PATH_DIRECTORY_ROOT)); + } + return directory; + } + + /** + * Create a new configuration for the split writer. + * + * @return a brand-new configuration + */ + protected IndexWriterConfig getConfig() { + return new IndexWriterConfig(Version.LUCENE_31, getAnalyzer()); + } + + /** + * @return the current analyzer, or the standard one, if one doesn't exist + */ + protected Analyzer getAnalyzer() { + if (analyzer == null) { + analyzer = new StandardAnalyzer(Version.LUCENE_30); + } + return analyzer; + } + + /** + * @return the current max field length or unlimited, if one isn't defined + */ + protected int getMaxFieldLength() { + return (config != null ? config.getMaxFieldLength() + : IndexWriterConfig.UNLIMITED_FIELD_LENGTH); + } + + /** + * Add a field to the default writer. + * + * @param field + * the field to be added + * @return the document that was added to the writer + * @throws CorruptIndexException + * @throws IOException + */ + protected Document addDocument(Field field) throws CorruptIndexException, + IOException { + return addDocument(field, writer); + } + + /** + * Add a field to the given writer. + * + * @param field + * the field to be added + * @param writer + * the writer to add the field to + * @return + * @throws CorruptIndexException + * @throws IOException + */ + protected Document addDocument(Field field, IndexWriter writer) + throws CorruptIndexException, IOException { + List fields = new ArrayList(); + fields.add(field); + return addDocument(fields, writer); + } + + /** + * Add a set of fields to the default writer. + * + * @param fields + * the set of fields to be added + * @return + * @throws CorruptIndexException + * @throws IOException + */ + protected Document addDocument(List fields) + throws CorruptIndexException, IOException { + return addDocument(fields, writer); + } + + /** + * Add a set of fields to the given writer. + * + * @param fields + * the set of fields to be added + * @param writer + * the given writer + * @return + * @throws CorruptIndexException + * @throws IOException + */ + protected Document addDocument(List fields, IndexWriter writer) + throws CorruptIndexException, IOException { + Document document = new Document(); + for (Field field : fields) { + document.add(field); + } + writer.addDocument(document); + return document; + } + + /** + * Assert if the expected hits matches the actual total hits of the given term + * in the primary searcher. + * + * @param expectedHits + * @param term + */ + protected void assertHits(int expectedHits, Term term) { + assertHits(expectedHits, new TermQuery(term), getPrimarySearcher()); + } + + /** + * Assert if the expected hits matches the actual total hits of the given term + * in the primary searcher, with the given cap on the hits. + * + * @param expectedHits + * @param term + * @param limitHits + */ + protected void assertHits(int expectedHits, Term term, int limitHits) { + assertHits(expectedHits, new TermQuery(term), limitHits, + getPrimarySearcher()); + } + + /** + * Assert if the expected hits matches the actual total hits of the given term + * in the given reader. + * + * @param expectedHits + * @param term + * @param reader + */ + protected void assertHits(int expectedHits, Term term, IndexReader reader) { + assertHits(expectedHits, new TermQuery(term), createSearcher(reader)); + } + + /** + * Assert if the expected hits matches the actual total hits of the given term + * in the given searcher. + * + * @param expectedHits + * @param term + * @param searcher + */ + protected void assertHits(int expectedHits, Term term, IndexSearcher searcher) { + assertHits(expectedHits, new TermQuery(term), searcher); + } + + /** + * Assert if the expected hits matches the actual total hits of the given term + * in the given searcher, with the given cap on hits. + * + * @param expectedHits + * @param term + * @param limitHits + * @param searcher + */ + protected void assertHits(int expectedHits, Term term, int limitHits, + IndexSearcher searcher) { + assertHits(expectedHits, new TermQuery(term), limitHits, searcher); + } + + /** + * Assert if the expected hits matches the actual total hits of the given + * query in the primary searcher. + * + * @param expectedHits + * @param query + */ + protected void assertHits(int expectedHits, Query query) { + assertHits(expectedHits, query, Integer.MAX_VALUE, getPrimarySearcher()); + } + + /** + * Assert if the expected hits matches the actual total hits of the given + * query in the given searcher. + * + * @param expectedHits + * @param query + * @param searcher + */ + protected void assertHits(int expectedHits, Query query, + IndexSearcher searcher) { + assertHits(expectedHits, query, Integer.MAX_VALUE, searcher); + } + + /** + * Assert if the expected hits matches the actual total hits of the given + * query in the given searcher. + * + * @param expectedHits + * @param query + * @param limitHits + * @param searcher + * @return + */ + protected int assertHits(int expectedHits, Query query, int limitHits, + IndexSearcher searcher) { + int totalHits = 0; + try { + assertEquals(expectedHits, + totalHits = getHits(query, limitHits, searcher)); + } catch (IOException e) { + assertTrue( + "Could not search for " + query + " because " + e.getMessage(), true); + } + return totalHits; + } + + /** + * Assert if the expected hits matches the actual total hits of the given term + * in the given searcher. + * + * @param query + * @param limitHits + * @param searcher + * @return + * @throws IOException + */ + protected int getHits(Query query, int limitHits, IndexSearcher searcher) + throws IOException { + return searcher.search(query, limitHits).totalHits; + } + + /** + * @return a test result that prints the events fired by the test case to + * standard output. + */ + protected static TestResult getTestResult() { + TestResult testResult = new TestResult(); + testResult.addListener(new TestListener() { + public void addError(Test test, Throwable throwable) { + System.out.println("{event: error, test: " + test.getClass() + + ", throwable: " + throwable + " }"); + } + + public void addFailure(Test test, AssertionFailedError error) { + System.out.println("{event: failure, test: " + test + " }"); + } + + public void endTest(Test test) { + System.out.println("{event: end, test: " + test + " }"); + } + + public void startTest(Test test) { + System.out.println("{event: start, test: " + test + " }"); + } + }); + return testResult; + } + + /** + * Run the given set of tests and collect the output in the given test result. + * + * @param tests + * @param testResult + */ + protected static void runTests(Enumeration tests, TestResult testResult) { + while (tests.hasMoreElements()) { + Object test = tests.nextElement(); + if (test instanceof TestSuite) { + System.out.println("Running test suite " + test); + runTests(((TestSuite) test).tests(), testResult); + } else if (test instanceof TestCase) { + System.out.println("Running test case " + test); + ((TestCase) test).run(testResult); + } + } + } + +} Index: contrib/splitindex/src/java/org/apache/lucene/index/AbstractSplitPolicy.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/AbstractSplitPolicy.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/AbstractSplitPolicy.java (revision 0) @@ -0,0 +1,556 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.CopyOnWriteArrayList; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Version; + +/** + * An AbstractSplitPolicy defines the default behavior of a split + * policy, and is meant to serve as a building block for concrete policy + * implementations. In particular, it registers itself and its correponding + * {@link #splitRules} as a split voter on the split writer, if given. + * + * @author Karthick Sankarachary + * + */ +public abstract class AbstractSplitPolicy extends SplitVoterAdapter implements + SplitPolicy { + /** + * An ordered set of directories, each of which denotes a configurable portion + * of the split index. A directory that belongs to this set will referred to + * herein as a sub-directory. By the same token, the directory on which the + * split writer (or reader) is based will be referred to as a super-directory, + * considering that it is managed by the IndexWriter (or + * IndexReader) super-class. + * + * Note that the split index (exposed by a SplitWriter or + * SplitReader) is backed by some collaboration of the + * sub-directories (i.e. {@link #splits}) and super-directory ( + * {@link #directory}. By design, the abstract split policy does not specify + * the exact semantics of that collaboration (leaving it rather up to its + * concrete sub-classes). + */ + protected Set splits; + + /** + * An ordered list of split rules in charge of determining the scope, length + * and contents of the {@link splits} set. Specifically, each split rule is + * responsible for triggering modifications to the {@link splits} set, as it + * deems fit. Think of this list as a chain of responbility of sorts, in the + * sense that each split rule gets a chance to interject its behavior in the + * order in which it appears in the list. + */ + protected Map splitRules; + + /** + * The writer for the split index, which is typically known if the split + * policy was opened through a {@link SplitWriter}, or a {@link SplitReader} + * obtained through the SplitWriter. + */ + protected SplitWriter writer; + + /** + * The directory underlying the index writer on which the above split + * {@link writer} is based. Again, this will be denoted as the super-directory + * for the split index for the reasons mentioned above. Typically, this + * directory will contain most, if not all, of the real-time (or recent) + * changes to the index. + */ + protected Directory directory; + + /** + * The configuration object used to create the split {@link writer}. + */ + protected IndexWriterConfig config; + + /** + * The active listeners of this split policy. + */ + protected List splitListeners = new CopyOnWriteArrayList(); + + /** + * A map that tracks split policies by their corresponding identifiers. This + * comes in handy when the split policy needs to be accessed in the context of + * a stateless split rule. + */ + private static Map instances = new HashMap(); + + /** + * Construct a split policy with an empty set of split sub-indices. Since a + * split writer was not provided, then it is safe to say that the policy is + * being constructed by a split reader. + * + * @param options + * the map used to configure the policy + */ + @SuppressWarnings("unchecked") + public AbstractSplitPolicy(Map options) { + super(options); + splits = Collections.synchronizedSortedSet(new TreeSet( + getDirectoryComparator())); + splitRules = new HashMap(); + Object ruleValue = options.get(POLICY_SPLIT_RULES); + if (ruleValue instanceof SplitRule) { + addRule((SplitRule) ruleValue); + } else if (ruleValue instanceof Collection) { + for (SplitRule splitRule : ((Collection) options + .get(POLICY_SPLIT_RULES))) { + addRule(splitRule); + } + } + } + + /** + * @return the list of split rules being enforced. + */ + public Collection getRules() { + return splitRules.values(); + } + + /** + * Obtain the split rule for the given split instance identifier. + * + * @param splitInstanceID + * the identifier of the split rule of interest + * @return the split rule instance, if present + */ + public SplitRule getRule(int splitInstanceID) { + return splitRules.get(Long.valueOf(splitInstanceID)); + } + + /** + * @return a specific split rule of the given type. + */ + public SplitRule getRule(Class ruleClass) { + for (SplitRule splitRule : splitRules.values()) { + if (splitRule.getClass().equals(ruleClass)) { + return splitRule; + } + } + return null; + } + + /** + * Check to see if the given type of split rule exists in this policy. + * + * @param ruleClass + * the type of the rule being searched + * @return + */ + protected boolean containsRule(Class ruleClass) { + for (SplitRule splitRule : splitRules.values()) { + if (splitRule.getClass().equals(ruleClass)) { + return true; + } + } + return false; + } + + /** + * Add a rule to this split policy. + * + * @param splitRule + * the split rule to be added. + */ + protected void addRule(SplitRule splitRule) { + splitRules.put(Long.valueOf(splitRule.getRuleInstanceID()), splitRule); + } + + /** + * Removes a rule from this split policy. + * + * @param splitRule + * the split rule to be removed. + */ + protected void removeRule(SplitRule splitRule) { + splitRules.remove(splitRule); + } + + /** + * Check to see if the current rule being processed (whose name is contained + * in the given context) matches the one supplied in the second argument. + * + * @param context + * @param splitRuleClass + * @return + */ + protected boolean isActiveRule(Map context, + Class splitRuleClass) { + String currentRuleClassName = (String) context + .get(SplitRuleAdapter.SPLIT_RULE_CLASS_NAME); + String expectedRuleClassName = splitRuleClass.getSimpleName(); + return currentRuleClassName != null + && currentRuleClassName.equals(expectedRuleClassName); + } + + /** + * Return the directory underlying the index writer on which the split index + * is based. + * + * @return the (super-)directory + */ + public Directory getDirectory() { + return directory; + } + + /** + * By default, return the number of documents in all the splits in this + * policy. + * + * @return the number of documents maintained by this policy + */ + public int numDocs() throws CorruptIndexException, IOException { + return new SplitReader(null, true).numDocs(); + } + + /** + * @return true if and only if the split policy is being used in a read-only + * context. + * + */ + public boolean isReadOnly() { + return writer == null; + } + + /** + * Return a brand new index writer configuration object to be used by the + * writers for the sub-directories. Note that the {@link IndexWriterConfig} + * does not perform a deep-clone, which necessitates creating a brand new + * object for every sub-writer. + * + * @return an index writer configuration object + */ + protected IndexWriterConfig getConfig() { + return new IndexWriterConfig(Version.LUCENE_31, getAnalyzer()); + } + + /** + * Return the analyzer for the associated index writer, if present, otherwise + * a standard analyzer. + * + * @return the analyzer to be used for the sub-indices. + */ + protected Analyzer getAnalyzer() { + return isReadOnly() || writer.isClosed() ? new StandardAnalyzer( + Version.LUCENE_30) : writer.getAnalyzer(); + } + + /** + * Return the max field length for the associated index writer, if present, + * otherwise a conservative value. + * + * @return the max field length to be used for the sub-indices. + */ + protected int getMaxFieldLength() { + return isReadOnly() ? IndexWriterConfig.UNLIMITED_FIELD_LENGTH : writer + .getConfig().getMaxFieldLength(); + } + + /** + * A comparator for the {@link splits} set, which by default falls back on the + * name of the directories being compared. + * + * @return 1, 0 or -1 depending on whether the first directory is greater, + * equal or less than the second one + */ + protected Comparator getDirectoryComparator() { + return new Comparator() { + public int compare(Directory d1, Directory d2) { + return d1.toString().compareTo(d2.toString()); + } + }; + } + + /** + * An unmodifiable list of sub-directories based on {@link splits}. Note that + * this forces all modifications to {@link splits} to occur through a + * {@link SplitRule}. + * + * @return an unmodifiable splits set. + */ + @SuppressWarnings("unchecked") + public Set getSplits() { + return splits != null ? Collections.unmodifiableSet(splits) + : Collections.EMPTY_SET; + } + + /** + * Cause a split to occur, based on the context propagated from the split + * rule. All concrete split policies are expected to implement the + * doSplit method, while this method handles the split policy + * listener logic. + * + * @param context + * @throws CorruptIndexException + * @throws IOException + */ + protected void split(Map context) + throws CorruptIndexException, IOException { + if (writer.isClosed()) { + System.out.println("Cannot split when the super-directory is closed."); + return; + } + doSplit(context); + for (SplitPolicyListener listener : splitListeners) { + listener.onSplit(splits); + } + } + + /** + * Cause a split to occur, in situations where a context is not required. + * + * @throws CorruptIndexException + * @throws IOException + */ + protected void split() throws CorruptIndexException, IOException { + split(new HashMap()); + } + + /** + * An abstract method where each concrete split policy can implement any + * changes to {@link splits}. + * + * @param context + * the context propagated from the split rule. + * @throws CorruptIndexException + * @throws IOException + */ + protected abstract void doSplit(Map context) + throws CorruptIndexException, IOException; + + /** + * @return the identifier for this split policy, which is based on the + * underlying lock identifier. + */ + protected String getSplitPolicyID() { + return directory.getLockID(); + } + + /** + * @return a stringified representation of the split policy + */ + @Override + public String toString() { + return getSplitPolicyID(); + } + + /** + * Return the split policy for the given identifer. + * + * @param identifier + * the identifier of the split policy. + * @return the corresponding split policy, if one exists. + */ + public static AbstractSplitPolicy getInstance(String identifier) { + return instances.get(identifier); + } + + /** + * Map the given split policy to its corresponding identifier + * + * @param identifier + * the identifier of the split policy being added. + * @param splitPolicy + * the split policy being added. + */ + private static void addInstance(String identifier, + AbstractSplitPolicy splitPolicy) { + instances.put(identifier, splitPolicy); + } + + /** + * Un-map the split policy from its corresponding identifier + * + * @param identifier + * an identifier of an (existing) split policy. + */ + private void removeInstance(String identifier) { + instances.remove(identifier); + } + + /** + * This method is called when the split writer is opened. This is a good point + * in time to initialize the split policy, by loading the sub-indices from the + * given directory, and giving the split rules a chance to initialize as well. + * + * @param directory + * the directory underlying the split index + */ + public SplitVote onOpen(SplitWriter writer, Directory directory) + throws IOException { + this.writer = writer; + this.directory = directory; + addInstance(getSplitPolicyID(), this); + if (!isOpenModeCreate()) { + splits.addAll(loadSplits(directory)); + // } else { + // clearSplits(directory); + } + for (SplitRule splitRule : splitRules.values()) { + if (!isReadOnly()) { + splitRule.onOpen(this); + } + } + return SplitVote.CARRY_ON; + } + + /** + * When the split policy is closed, it removes itself from the static policy + * pool. + * + * @throws IOException + * @throws SplitException + * @throws CorruptIndexException + */ + @Override + public SplitVote onClose() throws CorruptIndexException, SplitException, + IOException { + removeInstance(getSplitPolicyID()); + return super.onClose(); + + } + + /** + * Load the sub-directories from the split index's directory into this split + * policy. Note that this is called at the time the split policy is opened. + * Furthermore, while the list of sub-directories is maintained in the + * super-directory, the actual sub-directories may reside outside the + * directory, or even on a different machine, for that matter. + * + * @param directory + * the directory underlying the split index. + * @return the collection of sub-directories for this split index, not + * including the (super-)directory that was passed in. + * @throws IOException + * if the sub-indices could not be read from the directory + */ + protected abstract Collection loadSplits( + Directory directory) throws IOException; + + protected abstract void clearSplits(Directory directory) throws IOException; + + /** + * Add a sub-index to this split index, as determined by the order specified + * by the policy's directory comparator. + * + * @param subDirectory + * the directory of the sub-index being added. + */ + protected void pushSplit(Directory subDirectory) { + if (subDirectory != null) { + splits.add(subDirectory); + } + } + + /** + * Remove last sub-index from this split index, as determined by the order + * specified by the policy's directory comparator. + * + * @return the directory of the last sub-index, which was removed, if at all + * @throws IOException + */ + protected Directory popSplit() throws IOException { + Iterator iterator = splits.iterator(); + Directory directory = iterator.next(); + if (directory != null) { + iterator.remove(); + } + return directory; + } + + /** + * Same as {@link popSplit()} except that it does not actually remove the last + * sub-index, but merely returns it. + * + * @return the directory of the last sub-index, if present. + */ + protected Directory peekSplit() { + return !splits.isEmpty() ? splits.iterator().next() : null; + } + + /** + * @return true if the writer points to a brand new index. + */ + protected boolean isOpenModeCreate() { + return writer != null + && writer.getConfig().getOpenMode().equals(OpenMode.CREATE); + } + + /** + * A helper method that (recursively) removes the contents of the given file. + * + * @param file + * the file being removed + */ + protected void deleteFile(File file, boolean recursive) { + if (file == null) { + return; + } + if (file.isFile()) { + file.delete(); + return; + } + if (recursive) { + if (file.isDirectory()) { + for (File subFile : file.listFiles()) { + deleteFile(subFile, recursive); + } + file.delete(); + } + } + } + + /** + * Add a listener for this split policy. + * + * @param listener + * the (new) split policy listener. + */ + public void addListener(SplitPolicyListener listener) { + splitListeners.add(listener); + } + + /** + * Remove a listener of this split policy. + * + * @param listener + * an (existing) split policy listener. + */ + public void removeListener(SplitPolicyListener listener) { + splitListeners.remove(listener); + } + +} Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitRule.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/SplitRule.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/SplitRule.java (revision 0) @@ -0,0 +1,54 @@ +package org.apache.lucene.index; + +import java.io.Serializable; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * The SplitRule builds on the {@link SplitVoter} and allows a + * {@link SplitPolicy} to be registered with it. + */ +public interface SplitRule extends SplitVoter, Serializable { + // The identifier of the split policy to which this rule applies + public static final String SPLIT_RULE_SPLIT_POLICY_ID = "split.rule.split.policy.id"; + // The time at which the last split occurred due to this rule. + public static final String SPLIT_RULE_LAST_SPLIT_TIME = "split.rule.last.split.time"; + // The name of the last segment in the super-directory the last time around. + public static final String SPLIT_RULE_LAST_SEGMENT_NAME = "split.rule.last.segment.name"; + // The size of the last segment in the super-directory the last time around. + public static final String SPLIT_RULE_LAST_SEGMENT_SIZE = "split.rule.last.segment.size"; + + /** + * This method is invoked when the {@link SplitPolicy} is opened, which + * typically occurs when a {@link SplitWriter} or {@link SplitReader} for the + * split index is opened. + * + * @param splitPolicy + * the split policy on which to apply this rule + */ + public void onOpen(SplitPolicy splitPolicy); + + /** + * The unique identifier of this rule, which comes in handy while performing a + * split that could potentially have been triggered by more than one split + * rule. + * + * @return the unique identifier for this split rule instance. + */ + public long getRuleInstanceID(); +} Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitRuleAdapter.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/SplitRuleAdapter.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/SplitRuleAdapter.java (revision 0) @@ -0,0 +1,75 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; + +/** + * An abstract implementation of the {@link SplitRule} interface, which builds + * on the {@link SplitVoterAdapter}. + * + * @author Karthick Sankarachary + * + */ +public abstract class SplitRuleAdapter extends SplitVoterAdapter implements + SplitRule { + private static final long serialVersionUID = -1559066539420356617L; + + // A context key that refers to the type of this split rule. + public static final String SPLIT_RULE_CLASS_NAME = "split.rule.class.name"; + + // A context object that keeps track of the state of this split rule. + protected Map context = new HashMap(); + + private static long nextRuleInstanceID = 0; + + private long ruleInstanceID; + + /** + * Construct a split rule with no specific configuration options. + */ + public SplitRuleAdapter() { + this(null); + } + + /** + * Construct a split rule with the given configuration options. + * + * @param options + * the configuration options for this split rule + */ + public SplitRuleAdapter(Map options) { + super(options); + context.put(SPLIT_RULE_CLASS_NAME, getClass().getSimpleName()); + this.ruleInstanceID = nextRuleInstanceID++; + } + + public long getRuleInstanceID() { + return ruleInstanceID; + } + + /** + * When the split policy is opened, make a note of it here. + */ + public void onOpen(SplitPolicy splitPolicy) { + this.splitPolicy = (AbstractSplitPolicy) splitPolicy; + } + +} Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterPool.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterPool.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterPool.java (revision 0) @@ -0,0 +1,126 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.Method; +import java.lang.reflect.Proxy; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; + +/** + * The SplitVoterPool acts as a proxy for a group of split voters. + * It behaves like a {@link SplitVoter} in and of itself. It basically delegates + * all events to each of the registered voters. Then, it creates a + * {@link SplitVote} object that may be regarded as a consensus of the votes of + * the registered voters. + * + *

+ * In particular, if any of the registered voters vote to override the return + * value of the index-changing method in the {@link SplitWriter} or + * {@link SplitReader}, then the return value of the leading voter (viz., the + * one that appears ahead of the others) is used. Also, if any of the registered + * voters vote to not have the index-changing method carry on business as usual, + * then it too will vote likewise. + * + * @author Karthick Sankarachary + * + */ +public class SplitVoterPool { + // The list of registered voters. + private List voters = new CopyOnWriteArrayList(); + + /** + * A proxy object that implements the {@link SplitVoter} interface. + */ + private SplitVoter proxy = (SplitVoter) Proxy.newProxyInstance( + SplitVoter.class.getClassLoader(), new Class[] {SplitVoter.class}, + new InvocationHandler() { + + public Object invoke(Object proxy, Method method, Object[] args) + throws Throwable { + try { + boolean combinedCarryOn = true; + Object combinedReturnValue = null; + for (SplitVoter listener : voters) { + SplitVote listenersSplitVote = (SplitVote) method.invoke( + listener, args); + if (combinedReturnValue == null + && listenersSplitVote.getReturnValue() != null) { + combinedReturnValue = listenersSplitVote.getReturnValue(); + } + if (!listenersSplitVote.carryOn()) { + combinedCarryOn = false; + } + } + final boolean finalCarryOn = combinedCarryOn; + final Object finalReturnValue = combinedReturnValue; + return new SplitVote() { + @Override + public boolean carryOn() { + return finalCarryOn; + } + + @Override + public Object getReturnValue() { + return finalReturnValue; + } + }; + } catch (Throwable t) { + System.out.println("One of the split voters failed to vote (" + + t.getMessage() + ")"); + t.printStackTrace(); + return SplitVote.CARRY_ON; + } + } + }); + + /** + * @return the voter proxy object corresponding to this pool of voters + */ + public SplitVoter getVoterProxy() { + return proxy; + } + + /** + * Add a voter to the mix. + * + * @param voter + * the voter being added + */ + public void addVoter(SplitVoter voter) { + voters.add(voter); + } + + /** + * Remove a voter from the mix. + * + * @param voter + * the voter being removed + */ + public void removeVoter(SplitVoter voter) { + voters.remove(voter); + } + + /** + * @return the number of registered voters + */ + public int getVoterCount() { + return voters.size(); + } +} Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitWriter.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/SplitWriter.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/SplitWriter.java (revision 0) @@ -0,0 +1,413 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.lang.reflect.Constructor; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.Version; + +/** + * A SplitWriter builds on an {@link IndexWriter} by aggregating it + * with a set of sub-directories known as splits. We denote the directory + * underlying the super class {@link IndexWriter} as the super-directory, where + * real-time changes to the index will typically reside. The distribution of + * content across the super-directory and sub-directories is controlled by what + * is known as a {@link SplitPolicy}. To facilitate that, the split policy is + * allowed to observe changes made by not only the {@link SplitWriter} but also + * the {@link SplitReader}s obtained from it. + * + *

+ * Each sub-index corresponding to a split denotes a partition that captures a + * portion of the changes made to the split index. For instance, a split may + * represent a certain splice of time or a mirror image of the super-directory. + * The exact semantics of a split is specified by specific implementations of + * the {@link SplitPolicy}. Note that the {@link SplitWriter}, + * {@link SplitReader} and the {@link SplitPolicy} interface intentionally leave + * the semantics of splits unspecified. Instead, they merely provide an + * extensible and observable framework for enforcing the policies and rules that + * define the split index. + *

+ * + *

+ * The key enabler that makes the split writer observable is the + * {@link SplitVoter} interface, which acts as a listener on all state-changing + * actions performed on the super-index (based on the super-directory). Both the + * {@link SplitWriter} and {@link SplitReader} trigger events whenever the state + * of the index (potentially) changes, which are propagated to each of the + * registered voters. In fact, a voter may choose to override (and even disable) + * the action being performed on the super-directory, if it so wishes. Think of + * split voting and policies as a aspect-oriented mechanism that is tailored for + * lucene indices. + *

+ * + * @author Karthick Sankarachary + * + */ +public class SplitWriter extends IndexWriter implements Closeable { + // The split policy that governs the splits (and to some extent, the + // super-directory). + protected SplitPolicy policy; + // The pool of voters registered with this split writer. + protected SplitVoterPool voterPool = new SplitVoterPool(); + + // The name of the file where the split policy's options are serialized. + protected static final String SPLIT_POLICY_OPTIONS_FILE = "policy.options.ser"; + // The name of the split policy class that is stored along with the options. + protected static final String SPLIT_POLICY_CLASS_NAME = "split.policy.class.name"; + + /** + * Construct an {@link SplitWriter} that based on the given directory, which + * is passed along to the super class which is a {@link IndexWriter}, along + * with the {@link IndexWriterConfig}. The {@link SplitPolicy} is registered + * as a voter of this split writer, along with its {@link SplitRule}s (@see + * {@link SplitPolicy#getRules()}, if any. + * + * @param directory + * the super-directory on which the {@link IndexWriter} is based + * @param config + * the configuration for the writer of the super-directory + * @param policy + * the split policy for the split writer + * @throws CorruptIndexException + * @throws LockObtainFailedException + * @throws IOException + */ + public SplitWriter(Directory directory, IndexWriterConfig config, + SplitPolicy policy) throws CorruptIndexException, + LockObtainFailedException, IOException { + super(directory, config); + initialize(policy); + } + + /** + * Create the voter pool, add the policy and its rules to it. Persist the + * state of the policy into the {@link #SPLIT_POLICY_OPTIONS_FILE}. + * + * @param policy + * the split policy for this writer + * @throws CorruptIndexException + * @throws LockObtainFailedException + * @throws IOException + */ + private void initialize(SplitPolicy policy) throws CorruptIndexException, + LockObtainFailedException, IOException { + this.policy = policy; + + voterPool = new SplitVoterPool(); + voterPool.addVoter(policy); + for (SplitRule splitRule : policy.getRules()) { + voterPool.addVoter(splitRule); + } + getVoters().onOpen(this, getDirectory()); + + if (getDirectory() instanceof FSDirectory) { + File file = ((FSDirectory) getDirectory()).getDirectory(); + ObjectOutputStream fos = new ObjectOutputStream(new FileOutputStream( + new File(file, SPLIT_POLICY_OPTIONS_FILE))); + policy.getOptions().put(SPLIT_POLICY_CLASS_NAME, + policy.getClass().getCanonicalName()); + fos.writeObject(policy.getOptions()); + } + } + + /** + * @return the split policy for this writer + */ + public SplitPolicy getSplitPolicy() { + return policy; + } + + /** + * Restore the state of the split policy from the + * {@link #SPLIT_POLICY_OPTIONS_FILE} that is saved under the super-directory. + * + * @param directory + * the super-directory for the split index + * @return the split policy based on the persisted options + * @throws CorruptIndexException + */ + @SuppressWarnings("unchecked") + public static SplitPolicy getSplitPolicy(Directory directory) + throws CorruptIndexException { + SplitPolicy policy = null; + if (directory instanceof FSDirectory) { + File file = ((FSDirectory) directory).getDirectory(); + try { + ObjectInputStream ois = new ObjectInputStream(new FileInputStream( + new File(file, SPLIT_POLICY_OPTIONS_FILE))); + Map options = (Map) ois + .readObject(); + String splitPolicyClassName = (String) options + .get(SPLIT_POLICY_CLASS_NAME); + Class policyClass = Class.forName(splitPolicyClassName); + Constructor policyConstructor = policyClass + .getConstructor(Map.class); + policy = (SplitPolicy) policyConstructor.newInstance(options); + policy.onOpen(null, directory); + } catch (Exception e) { + throw new CorruptIndexException( + "Could not load split policy from the directory"); + } + } + return policy; + } + + /** + * @return the proxy for this writer's voters, which itself acts as a voter + */ + protected SplitVoter getVoters() { + return voterPool.getVoterProxy(); + } + + /** + * Add a split voter to this split writer. + * + * @param splitVoter + * the split voter being added + */ + public void addVoter(SplitVoter voter) { + voterPool.addVoter(voter); + } + + /** + * Remove a split voter from this split writer. + * + * @param splitVoter + * the split voter being removed + */ + public void removeVoter(SplitVoter voter) { + voterPool.removeVoter(voter); + } + + /** + * @return the number of sub-indices (or splits) in this split index. + */ + public int getNumSplits() { + return getSplitPolicy().getSplits().size(); + } + + /** + * Prevent modifications to the set of splits in this writer. + * + * @throws SplitException + */ + public void pauseSplits() throws SplitException { + getVoters().onPause(); + } + + /** + * Allow modifications to the set of splits in this writer. + * + * @throws SplitException + */ + public void resumeSplits() throws SplitException { + getVoters().onResume(); + } + + /** + * Add readers to the super-index, unless the voters decide otherwise. + * + * @param readers + * one or more readers to add + */ + @Override + public void addIndexes(IndexReader... readers) throws CorruptIndexException, + IOException { + if (getVoters().onAddIndexes(readers).carryOn()) { + super.addIndexes(readers); + } + } + + /** + * Add directories to the super-index, unless the voters decide otherwise. + * + * @param directories + * one or more directories to add + */ + @Override + public void addIndexesNoOptimize(Directory... dirs) + throws CorruptIndexException, IOException { + if (getVoters().onAddIndexesNoOptimize(dirs).carryOn()) { + super.addIndexesNoOptimize(dirs); + } + } + + /** + * Close the super-index after notifying its voters to do the same. + * + * @param waitForMerges + * if true, this call will block until all merges complete; else, it + * will ask all running merges to abort, wait until those merges have + * finished (which should be at most a few seconds), and then return. + */ + @Override + public void close(boolean waitForMerges) throws CorruptIndexException, + IOException { + if (getVoters().onClose().carryOn()) { + super.close(waitForMerges); + } + } + + /** + * Commit the super-index after notifying its voters to do the same. + * + * @param commitUserData + * the data passed by the user that initiated the commit + */ + @Override + public void commit(Map commitUserData) + throws CorruptIndexException, IOException { + if (getVoters().onCommit(commitUserData).carryOn()) { + super.commit(commitUserData); + } + } + + /** + * Delete all documents from the super-index after notifying its voters to do + * the same. + */ + @Override + public synchronized void deleteAll() throws IOException { + if (getVoters().onDeleteAll().carryOn()) { + super.deleteAll(); + } + } + + /** + * Delete all documents from only the super-index. + * + * @throws IOException + */ + protected synchronized void deleteAllFromSuper() throws IOException { + super.deleteAll(); + } + + /** + * Add documents to the super-index, unless the voters decide otherwise. + */ + @Override + public void addDocument(Document document, Analyzer analyzer) + throws CorruptIndexException, IOException { + if (getVoters().onAddDocument(document, analyzer).carryOn()) { + super.addDocument(document, analyzer); + } + } + + /** + * Obtain a reader for this split index, which typically is a + * {@link SplitReader}. However, the split policy in force may choose to + * override that with a different type of {@link IndexReader}. + */ + @Override + public IndexReader getReader() throws IOException { + SplitReader reader = new SplitReader(this); + SplitVote splitVote = getVoters().onGetReader(reader); + return (splitVote.getReturnValue() != null) ? (IndexReader) splitVote + .getReturnValue() : reader; + } + + /** + * @return the reader for the super-index + * @throws IOException + */ + public IndexReader getSuperReader() throws IOException { + return super.getReader(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); + } + + /** + * Obtain an index reader for just the splits in this index. In other words, + * the super-directory is not made visible in this reader. + * + * @return an index reader for only the splits + * @throws IOException + */ + public IndexReader getPolicyReader() throws IOException { + return new SplitReader(this, true); + } + + /** + * Obtain an index reader that optionally includes the super-index and zero or + * more splits. + * + * @return an index reader for the given set of indices + */ + public IndexReader getReader(boolean excludeSuper, int... slitsToInclude) + throws IOException { + return new SplitReader(this, excludeSuper, slitsToInclude); + } + + /** + * @return the total number of documents in this split index + */ + @Override + public synchronized int numDocs() throws IOException { + return super.numDocs() + policy.numDocs(); + } + + /** + * @return the number of documents in the super-directory + * @throws IOException + */ + public synchronized int numSuperDocs() throws IOException { + return super.numDocs(); + } + + /** + * Optimize the super-directory and each of the splits, if not already + * optimized. + * + * @param maxNumSegments + * maximum number of segments left in the index after optimization + * finishes + * @param doWait + * should the call block until the optimize completes? + */ + @Override + public void optimize(int maxNumSegments, boolean doWait) + throws CorruptIndexException, IOException { + super.optimize(maxNumSegments, doWait); + IndexReader[] subReaders = getPolicyReader().getSequentialSubReaders(); + if (subReaders != null) { + for (IndexReader subReader : subReaders) { + if (!subReader.isOptimized()) { + IndexWriterConfig subConfig = new IndexWriterConfig( + Version.LUCENE_31, getAnalyzer()); + subConfig.setMaxFieldLength(super.getConfig().getMaxFieldLength()); + IndexWriter subWriter = new IndexWriter(subReader.directory(), + subConfig); + subWriter.optimize(maxNumSegments, doWait); + subWriter.close(true); + } + } + } + } +} Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitVote.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/SplitVote.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/SplitVote.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * The SplitVote represents the vote of a {@link SplitVoter}, + * typically in response to a change to the split index. + * + *

+ * The vote serves a two-fold purpose. Through it's {@link #carryOn()} method, + * it indicates whether or not the change should actually be performed on the + * super-directory. Plus, through its {@link #getReturnValue()} method, it + * indicates that the return value that should be used by the method on the + * split index that caused the change. + *

+ * + *

+ * Note that it takes only one vote to say false to {@link #carryOn()} for the + * original change to be ignored. On the other hand, only the first non-null + * {@link #getReturnValue()} is used to override the original return value of + * the state-changing method. + *

+ */ + +public class SplitVote { + + public static final SplitVote CARRY_ON = new SplitVote(); + + public static final SplitVote DO_NOTHING = new SplitVote() { + public boolean carryOn() { + return false; + }; + }; + + public Object getReturnValue() { + return null; + } + + public boolean carryOn() { + return true; + } + +} Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitException.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/SplitException.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/SplitException.java (revision 0) @@ -0,0 +1,22 @@ +package org.apache.lucene.index; + +public class SplitException extends RuntimeException { + private static final long serialVersionUID = 8267088235489206246L; + + public SplitException() { + super(); + } + + public SplitException(String message) { + super(message); + } + + public SplitException(Throwable cause) { + super(cause); + } + + public SplitException(String message, Throwable cause) { + super(message, cause); + } + +} Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitPolicy.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/SplitPolicy.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/SplitPolicy.java (revision 0) @@ -0,0 +1,145 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Serializable; +import java.util.Collection; +import java.util.EventListener; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.store.Directory; + +/** + * A SplitPolicy determines the way in which a {@link SplitWriter} + * is broken down into sub-indices. + * + *

+ * In particular, this interface separates the concern of managing the + * directories underlying the sub-indices from the implementation of the + * {@link SplitWriter} and {@link SplitReader}. The set of directories that it + * owns may be obtained via its {@link #getSplits()} method. It is important to + * note that the split policy does not own the directory passed to the + * {@link SplitWriter}'s super class, which is an {@link IndexWriter}. However, + * the split policy may decide to move documents out of the writer's + * (super-)directory into one or more of the (sub-)directories that it owns, if + * it so wishes. In a way, the {@link SplitPolicy} acts as the polar opposite of + * the {@link MergePolicy} in the sense that under its auspices documents tends + * to diverge rather than converge in terms of their location in the index. + *

+ * + *

+ * Typically, the composition of the set of directories (or splits) owned by the + * {@link SplitPolicy} changes when the split index is updated, either through + * the {@link SplitWriter} or the {@link SplitReader}. To that end, the policy + * acts as an observer of the aforementioned classes, specifically any method + * that has the potential to change the contents of the index. This way, the + * split policy receives notifications whenever the index writer is updated. In + * addition, it is also notified of the index's life cycle events, such as when + * it is opened or closed. + *

+ * + *

+ * For those interested in knowing when the split policy adds or removes a + * sub-directory, they may register themselves as a split policy listener. + *

+ * + *

+ * Given that implementations of the split policy will inevitably need to be + * configured differently, we pass a options hash to their constructor, from + * which they can pick the configuration items of interest to them. The policy's + * options can be obtained through its {@link #getOptions()} method, which must + * be in a serializable shape, so that the policy can be dehydrated and hydrated + * to persistent storage as and when required. + *

+ */ +public interface SplitPolicy extends SplitVoter { + /** + * An option specifying the collection of split rules to be employed. + */ + public static final String POLICY_SPLIT_RULES = "policy.split.rules"; + + /** + * Return a set of {@link Directory}, each of which represents a split (or + * sub-index) of the index, as defined by this policy. + * + * @return the set of directories of sub-indices belonging to this split + * index. + */ + public Set getSplits(); + + /** + * @return the list of split rules being enforced. + */ + public Collection getRules(); + + /** + * Return the options for this policy in a serializable form. + * + * @return the map of option keys to corresponding values. + */ + + public Map getOptions(); + + /** + * @return a specific split rule of the given type. + */ + public SplitRule getRule(Class splitRuleClass); + + /** + * A SplitListener observes the split personality of a + * {@link SplitPolicy}. Specifically, it is notified any time the return value + * of its {@link getSplits()} method is about to change. + * + */ + public interface SplitPolicyListener extends EventListener { + /** + * The event to trigger after a split has been performed. The new state of + * the split policy is available at this point to its listeners. + * + * @param splits + * the new state of the split policy + */ + public void onSplit(Set splits); + } + + /** + * Add a {@link #SplitListener} to this {@link SplitPolicy}. + * + * @param listener + * the split policy listener to be added. + */ + public void addListener(SplitPolicyListener listener); + + /** + * Remove a {@link #SplitListener} from this {@link SplitPolicy}. + * + * @param listener + * the split policy listener to be removed. + */ + public void removeListener(SplitPolicyListener listener); + + /** + * @return the number of (logical) documents present exclusively in the splits + * (outside of the super-directory). + * @throws CorruptIndexException + * @throws IOException + */ + public int numDocs() throws CorruptIndexException, IOException; +} Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitVoter.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/SplitVoter.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/SplitVoter.java (revision 0) @@ -0,0 +1,195 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.EventListener; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.store.Directory; + +/** + * The SplitVoter is an event listener that is notified of + * real-time changes being made to the split writer or reader. + * + *

+ * After processing an event, the split voter must return a {@link SplitVote} + * object that may be used to (a) override the return value of method that + * caused the change, (b) force the split writer or reader (as the case may be) + * to not change the (super-)directory where appropriate. + *

+ * + * @author Karthick Sankarachary + * + */ +public interface SplitVoter extends EventListener { + /** + * This method is invoked when the index is opened for writing or reading, + * which is the cue for the policy to initialize its state. + * + * @param directory + * the directory on which the slit index is based + * @throws IOException + * if the policy could not initialize its state + */ + public SplitVote onOpen(SplitWriter writer, Directory directory) + throws IOException; + + /** + * This method is invoked when the index is closed for business, which is the + * cue for the policy to cleanup its state. + * + * @throws CorruptIndexException + * if the splits were found to be in an inconsistent state + * @throws IOException + * if the splits could not be closed properly + */ + public SplitVote onClose() throws CorruptIndexException, IOException; + + /** + * This method is invoked when either the split writer or reader commits its + * changes. + * + * @param commitUserData + * @return + * @throws CorruptIndexException + * @throws IOException + */ + public SplitVote onCommit(Map commitUserData) + throws CorruptIndexException, IOException; + + /** + * This method is invoked when the index is put on pause, which entails that + * the policy in turn put it's split rule activities on hold. + */ + public SplitVote onPause(); + + /** + * This method is invoked when the index is asked to resume operations (after + * a {@link #pause()}), at which point the policy may resume it's split rule + * activities. + */ + public SplitVote onResume(); + + /** + * This method is invoked when document are added to the split index, + * specifically the directory underlying the index writer super-class. + * + * @param document + * the document about to be added + * @param analyzer + * the analyzer used to process the document + * @return a flag indicating whether or not the document should be added to + * the running directory + * @throws CorruptIndexException + * if the change appears to have corrupted the index + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onAddDocument(Document document, Analyzer analyzer) + throws CorruptIndexException, IOException; + + /** + * This method is invoked when external index (readers) are added to the split + * index, specifically the directory underlying the index writer super-class. + * + * @param readers + * the index readers about to be added + * @return flag indicating whether or not the document should be added to the + * running directory + * @throws CorruptIndexException + * if the change appears to have corrupted the index + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onAddIndexes(IndexReader... readers) + throws CorruptIndexException, IOException; + + /** + * This method is invoked when external index (directories) are added to the + * split index, specifically the directory underlying the index writer + * super-class. + * + * @param dirs + * the index directories about to be added + * @return flag indicating whether or not the document should be added to the + * running directory + * @throws CorruptIndexException + * if the change appears to have corrupted the index + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onAddIndexesNoOptimize(Directory[] dirs) + throws CorruptIndexException, IOException; + + /** + * This method is invoked when the {@link SplitWriter} is asked to provide an + * index reader through its {@link SplitWriter#getReader} method. Typically, + * the policy will return the (split) reader passed to it, but it may + * completely different type of reader, if it so desires. + * + * @param reader + * the split reader the writer plans on returning + * @return the actual index reader that will be returned + * @throws CorruptIndexException + * if the change appears to have corrupted the index + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onGetReader(SplitReader reader) + throws CorruptIndexException, IOException; + + /** + * This method is invoked when the {@link SplitWriter#deleteAll()} method is + * invoked. + * + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onDeleteAll() throws IOException; + + /** + * This method is invoked when the {@link SplitReader#undeleteAll()} method is + * invoked. + * + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onUndeleteAll(); + + /** + * This method is invoked when the {@link SplitReader#deleteDocument(int)} + * method is invoked. + * + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onDeleteDocument(int docNum); + + /** + * This method is invoked when the {@link SplitReader#deleteDocuments(Term)} + * method is invoked. + * + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onDeleteDocument(Term term); + +} Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterAdapter.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterAdapter.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterAdapter.java (revision 0) @@ -0,0 +1,330 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Serializable; +import java.lang.reflect.Method; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.store.Directory; + +/** + * The SplitVoterAdapter provides a rudimentary implementation of + * the {@link SplitVoter} interface, using sane defaults for the returned votes. + * + * @author Karthick Sankarachary + * + */ +public class SplitVoterAdapter implements SplitVoter { + // The split policy associated with this split voter. + protected AbstractSplitPolicy splitPolicy; + + // A flag indicating whether the split policy (and rules thereof) are in a + // paused state. + protected boolean paused = false; + + // An extensible options map, whose semantics is open to interpretation. + protected Map options; + + /** + * Construct a split voter with no options. + */ + public SplitVoterAdapter() { + this(new HashMap()); + } + + /** + * Construct a split voter with the given set of options. + * + * @param options + * the configuration options + */ + public SplitVoterAdapter(Map options) { + this.options = checkAndBalanceOptions(options); + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @param directory + * the directory on which the slit index is based + * @throws IOException + * if the policy could not initialize its state + */ + public SplitVote onOpen(SplitWriter writer, Directory directory) + throws IOException { + return SplitVote.CARRY_ON; + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @throws CorruptIndexException + * if the splits were found to be in an inconsistent state + * @throws IOException + * if the splits could not be closed properly + */ + public SplitVote onClose() throws CorruptIndexException, IOException, + SplitException { + return SplitVote.CARRY_ON; + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @param commitUserData + * @return + * @throws CorruptIndexException + * @throws IOException + */ + public SplitVote onCommit(Map commitUserData) + throws CorruptIndexException, IOException { + return SplitVote.CARRY_ON; + } + + /** + * Pause the split voter in an idempotent manner. + */ + public synchronized SplitVote onPause() { + paused = true; + return SplitVote.CARRY_ON; + } + + /** + * Resume the split voter in an idempotent manner. + */ + public synchronized SplitVote onResume() { + paused = false; + return SplitVote.CARRY_ON; + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @param document + * the document about to be added + * @param analyzer + * the analyzer used to process the document + * @return a flag indicating whether or not the document should be added to + * the running directory + * @throws CorruptIndexException + * if the change appears to have corrupted the index + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onAddDocument(Document document, Analyzer analyzer) + throws CorruptIndexException, IOException { + return SplitVote.CARRY_ON; + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @param readers + * the (sub-)indices being added + * @return true if and only if the split writer should actually add the + * (sub-)indices + */ + public SplitVote onAddIndexes(IndexReader... readers) + throws CorruptIndexException, IOException { + return SplitVote.CARRY_ON; + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @param directories + * the (sub-)indices being added + * @return true if and only if the split writer should actually add the + * (sub-)indices + */ + public SplitVote onAddIndexesNoOptimize(Directory[] directories) + throws CorruptIndexException, IOException { + return SplitVote.CARRY_ON; + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @param reader + * the split reader the writer plans on returning + * @return the actual index reader that will be returned + * @throws CorruptIndexException + * if the change appears to have corrupted the index + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onGetReader(SplitReader reader) + throws CorruptIndexException, IOException { + return SplitVote.CARRY_ON; + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onDeleteAll() throws IOException { + return SplitVote.CARRY_ON; + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onUndeleteAll() { + return SplitVote.CARRY_ON; + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onDeleteDocument(int docNum) { + return SplitVote.CARRY_ON; + } + + /** + * By default, tell the split writer or reader to carry on as usual. + * + * @throws IOException + * if the change could not be persisted properly + */ + public SplitVote onDeleteDocument(Term term) { + return SplitVote.CARRY_ON; + } + + /** + * @return true if the split voter is in paused mode. + */ + protected boolean isPaused() { + return paused; + } + + /** + * @return a unique identifier for the corresponding split policy. + */ + protected String getSplitPolicyId() { + return splitPolicy.toString(); + } + + /** + * @return an empty hash map of option defaults. + */ + public Map getOptionDefaults() { + return new HashMap(); + } + + /** + * @return an empty hash map of option bounds. + */ + public Map getOptionBounds() { + return new HashMap(); + } + + /** + * @return the options hash used to configure this split voter. + */ + public Map getOptions() { + return Collections.synchronizedMap(options); + } + + /** + * Return the value for the given configuration option key. + * + * @param key + * the key of the configuration option + * @return the value for the above key + */ + public Serializable getOption(String key) { + return options.get(key); + } + + /** + * Return the value for the given configuration option key, after casting it + * as the given value class. + * + * @param key + * the key of the configuration option + * @param valueClass + * the expected type of the value + * @return the value for the above key + */ + public Serializable getOption(String key, + Class valueClass) { + Serializable value = options.get(key); + if (!value.getClass().equals(valueClass)) { + try { + Method method = valueClass.getMethod("valueOf", String.class); + value = (Serializable) method.invoke(null, value.toString()); + } catch (Exception e) {} + } + return value; + } + + /** + * Ensure that the user-defined options conform to the bound constraints + * defined by {@link #getOptionBounds()}, and if not, replace it with the + * corresponding defaults as defined by {@link #getOptionDefaults()}. + * + * @param options + * the user-defined options hash + * @return the sanitized version of the input options hash + */ + protected Map checkAndBalanceOptions( + Map options) { + Map bounds = getOptionBounds(); + Map defaults = getOptionDefaults(); + for (String key : bounds.keySet()) { + if (options.containsKey(key)) { + OptionBound bound = bounds.get(key); + if (bound != null && !bound.isInbounds(key, options.get(key))) { + options.put(key, defaults.get(key)); + } + } + } + for (String key : defaults.keySet()) { + if (!options.containsKey(key)) { + options.put(key, defaults.get(key)); + } + } + return options; + } + + /** + * An OptionBound defines whether or not a given + * pair in the options satisfies the constraints defined by the split voter. + * It acts as a sanity check mechanism, which ensures that the split voter + * does not have to ever deal with incorrect configuration options. + * + */ + public interface OptionBound { + public boolean isInbounds(String key, Serializable value); + } + +} Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitReader.java =================================================================== --- contrib/splitindex/src/java/org/apache/lucene/index/SplitReader.java (revision 0) +++ contrib/splitindex/src/java/org/apache/lucene/index/SplitReader.java (revision 0) @@ -0,0 +1,626 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; + +/** + * A SplitReader acts as a logical {@link IndexReader} for the + * split index. In particular, it builds on the {@link MultiReader}, such that a + * sub-reader maps to either a (split) sub-directory or to the (split index's) + * super-directory. + * + *

+ * In general, a {@link IndexReader} allows for updates to the index (even + * though that may seem like a contradiction in terms). Since we want the + * ability to observer every change made to the split index, we make the + * state-changing methods of {@link SplitReader} observable (by a + * {@link SplitVoter}). + *

+ * + * @author Karthick Sankarachary + * + */ +public class SplitReader extends MultiReader { + // The split writer, if this reader was obtained from one. + protected SplitWriter writer; + // The pool of voters observing this split reader. + protected SplitVoterPool voterPool = new SplitVoterPool(); + + /** + * Construct a split reader from the given sub-readers. Note that this + * constructor is meant to be used only by one of the {@link #open} factory + * methods. + * + * @param subReaders + * one or more sub-readers + * @throws IOException + */ + SplitReader(IndexReader... subReaders) throws IOException { + super(subReaders); + } + + /** + * Construct a split reader from the given list of sub-readers. Note that this + * constructor is meant to be used only by one of the {@link #open} factory + * methods. + * + * @param subReaders + * a list of sub-readers + * @throws IOException + */ + SplitReader(List subReaders) throws IOException { + super(subReaders.toArray(new IndexReader[] {})); + } + + /** + * Construct a split reader based on a split writer. Note that this + * constructor is meant to used only by the {@link SplitWriter}. + * + * @param writer + * the split writer + */ + SplitReader(SplitWriter writer) throws CorruptIndexException, IOException { + this(writer, false); + } + + /** + * Construct a split reader based on a split writer, that may or may not + * exclude the super-directory. Note that this constructor is meant to used + * only by the {@link SplitWriter}. + * + * @param writer + * the split writer + * @param excludeSuper + * whether or not to exclude the super-directory + */ + SplitReader(SplitWriter writer, boolean excludeSuper) + throws CorruptIndexException, IOException { + this(writer, DEFAULT_TERMS_INDEX_DIVISOR, excludeSuper); + } + + /** + * Construct a split reader based on a split writer, and on the given divisor. + * Note that this constructor is meant to used only by the {@link SplitWriter} + * . + * + * @param writer + * the split writer + * @param termInfosIndexDivisor + * Sub-samples which indexed terms are loaded into RAM + * @throws CorruptIndexException + * @throws IOException + */ + SplitReader(SplitWriter writer, int termInfosIndexDivisor) + throws CorruptIndexException, IOException { + this(writer, termInfosIndexDivisor, false); + } + + /** + * Construct a split reader based on a split writer, and on the given divisor. + * Note that this constructor is meant to used only by the {@link SplitWriter} + * + * @param writer + * the split writer + * @param termInfosIndexDivisor + * Sub-samples which indexed terms are loaded into RAM + * @param excludeSuper + * whether or not to exclude the super-directory + * @throws CorruptIndexException + * @throws IOException + */ + SplitReader(SplitWriter writer, int termInfosIndexDivisor, + boolean excludeSuper) throws CorruptIndexException, IOException { + this(writer, termInfosIndexDivisor, excludeSuper, new int[] {}); + } + + /** + * Construct a split reader based on a split writer, and on the given divisor + * and splits to include. Note that this constructor is meant to used only by + * the {@link SplitWriter} + * + * @param writer + * the split writer + * @param termInfosIndexDivisor + * Sub-samples which indexed terms are loaded into RAM + * @param excludeSuper + * whether or not to exclude the super-directory + * @param splitsToInclude + * indices of one or more splits to include + * @throws CorruptIndexException + * @throws IOException + */ + SplitReader(SplitWriter writer, boolean excludeSuper, int... splitsToInclude) + throws CorruptIndexException, IOException { + this(writer, DEFAULT_TERMS_INDEX_DIVISOR, excludeSuper, splitsToInclude); + } + + SplitReader(SplitWriter writer, int termInfosIndexDivisor, + boolean excludeSuper, int... splitsToInclude) + throws CorruptIndexException, IOException { + super(getSubReaders(writer, termInfosIndexDivisor, excludeSuper, + splitsToInclude)); + this.writer = writer; + } + + /** + * Return an array of sub-readers, one for each of the specified splits. + * + * @param writer + * the split writer + * @param termInfosIndexDivisor + * Sub-samples which indexed terms are loaded into RAM + * @param excludeSuper + * whether or not to exclude the super-directory + * @param splitsToInclude + * indices of one or more splits to be included + * @return the array of sub-readers for the specified splits + * @throws CorruptIndexException + * @throws IOException + */ + private static IndexReader[] getSubReaders(SplitWriter writer, + int termInfosIndexDivisor, boolean excludeSuper, int... splitsToInclude) + throws CorruptIndexException, IOException { + List subDirectories = new ArrayList(writer + .getSplitPolicy().getSplits()); + + if (subDirectories == null) { + return new IndexReader[] {}; + } + if (splitsToInclude == null || splitsToInclude.length == 0) { + splitsToInclude = new int[subDirectories.size()]; + for (int i = 0; i < subDirectories.size(); i++) { + splitsToInclude[i] = i; + } + } + IndexReader[] subReaders = new IndexReader[splitsToInclude.length + + (excludeSuper ? 0 : 1)]; + for (int subIndex = 0; subIndex < splitsToInclude.length; subIndex++) { + int splitToInclude = splitsToInclude[subIndex]; + if (splitToInclude < 0 || splitToInclude > subDirectories.size()) { + System.out.println("Not including non-existent split " + splitToInclude + + " in split reader"); + continue; + } + Directory subDirectory = subDirectories.get(splitToInclude); + subReaders[subIndex] = IndexReader.open(subDirectory, null, true, + termInfosIndexDivisor); + } + if (!excludeSuper) { + subReaders[subReaders.length - 1] = writer.getSuperReader(); + } + writer.resumeSplits(); + return subReaders; + } + + /** + * Open a {@link SplitReader} based on the directory on which the split index + * is based (a.k.a. its super-directory). + * + * @param directory + * the super-directory on which the split index is based + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public static SplitReader open(final Directory directory) + throws CorruptIndexException, IOException { + return open(directory, true); + } + + /** + * Open a {@link SplitReader} based on the directory on which the split index + * is based (a.k.a. its super-directory). You should pass readOnly=true, since + * it gives much better concurrent performance, unless you intend to do write + * operations (delete documents or change norms) with the reader. + * + * @param directory + * the index directory + * @param readOnly + * true if no changes (deletions, norms) will be made with this + * SplitIndexReader + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public static SplitReader open(final Directory directory, boolean readOnly) + throws CorruptIndexException, IOException { + return open(directory, null, readOnly); + } + + /** + * Expert: returns an {@link SplitReader} reading the split index upto the + * given {@link IndexCommit}. You should pass readOnly=true, since it gives + * much better concurrent performance, unless you intend to do write + * operations (delete documents or change norms) with the reader. + * + * @param commit + * the commit point to open + * @param readOnly + * true if no changes (deletions, norms) will be made with this + * SplitIndexReader + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public static SplitReader open(final IndexCommit commit, boolean readOnly) + throws CorruptIndexException, IOException { + return open(commit.getDirectory(), null, commit, readOnly, + DEFAULT_TERMS_INDEX_DIVISOR); + } + + /** + * Expert: returns an {@link SplitReader} reading the index in the given + * directory, with a custom {@link IndexDeletionPolicy}. You should pass + * readOnly=true, since it gives much better concurrent performance, unless + * you intend to do write operations (delete documents or change norms) with + * the reader. + * + * @param directory + * the index directory + * @param deletionPolicy + * a custom deletion policy (only used if you use this reader to + * perform deletes or to set norms); see {@link IndexWriter} for + * details. + * @param readOnly + * true if no changes (deletions, norms) will be made with this + * SplitIndexReader + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public static SplitReader open(final Directory directory, + IndexDeletionPolicy deletionPolicy, boolean readOnly) + throws CorruptIndexException, IOException { + return open(directory, deletionPolicy, readOnly, + DEFAULT_TERMS_INDEX_DIVISOR); + } + + /** + * Expert: returns an {@link SplitReader} reading the index in the given + * directory, with a custom {@link IndexDeletionPolicy}. You should pass + * readOnly=true, since it gives much better concurrent performance, unless + * you intend to do write operations (delete documents or change norms) with + * the reader. + * + * @param directory + * the index directory + * @param deletionPolicy + * a custom deletion policy (only used if you use this reader to + * perform deletes or to set norms); see {@link IndexWriter} for + * details. + * @param readOnly + * true if no changes (deletions, norms) will be made with this + * SplitIndexReader + * @param termInfosIndexDivisor + * Subsamples which indexed terms are loaded into RAM. This has the + * same effect as {@link IndexWriter#setTermIndexInterval} except + * that setting must be done at indexing time while this setting can + * be set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into memory. By + * setting this to a value > 1 you can reduce memory usage, at the + * expense of higher latency when loading a TermInfo. The default + * value is 1. Set this to -1 to skip loading the terms index + * entirely. + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public static SplitReader open(final Directory directory, + IndexDeletionPolicy deletionPolicy, boolean readOnly, + int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return open(directory, deletionPolicy, null, readOnly, + termInfosIndexDivisor); + } + + /** + * Expert: returns an {@link SplitReader} reading the index in the given + * Directory, using a specific commit and with a custom + * {@link IndexDeletionPolicy}. You should pass readOnly=true, since it gives + * much better concurrent performance, unless you intend to do write + * operations (delete documents or change norms) with the reader. + * + * @param commit + * the specific {@link IndexCommit} to open; see + * {@link SplitReader#listCommits} to list all commits in a directory + * @param deletionPolicy + * a custom deletion policy (only used if you use this reader to + * perform deletes or to set norms); see {@link IndexWriter} for + * details. + * @param readOnly + * true if no changes (deletions, norms) will be made with this + * SplitIndexReader + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public static SplitReader open(final IndexCommit commit, + IndexDeletionPolicy deletionPolicy, boolean readOnly) + throws CorruptIndexException, IOException { + return open(commit.getDirectory(), deletionPolicy, commit, readOnly, + DEFAULT_TERMS_INDEX_DIVISOR); + } + + /** + * Expert: returns an {@link SplitReader} reading the index in the given + * Directory, using a specific commit and with a custom + * {@link IndexDeletionPolicy}. You should pass readOnly=true, since it gives + * much better concurrent performance, unless you intend to do write + * operations (delete documents or change norms) with the reader. + * + * @param commit + * the specific {@link IndexCommit} to open; see + * {@link SplitReader#listCommits} to list all commits in a directory + * @param deletionPolicy + * a custom deletion policy (only used if you use this reader to + * perform deletes or to set norms); see {@link IndexWriter} for + * details. + * @param readOnly + * true if no changes (deletions, norms) will be made with this + * SplitIndexReader + * @param termInfosIndexDivisor + * Subsamples which indexed terms are loaded into RAM. This has the + * same effect as {@link IndexWriter#setTermIndexInterval} except + * that setting must be done at indexing time while this setting can + * be set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into memory. By + * setting this to a value > 1 you can reduce memory usage, at the + * expense of higher latency when loading a TermInfo. The default + * value is 1. Set this to -1 to skip loading the terms index + * entirely. + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public static SplitReader open(final IndexCommit commit, + IndexDeletionPolicy deletionPolicy, boolean readOnly, + int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return open(commit.getDirectory(), deletionPolicy, commit, readOnly, + termInfosIndexDivisor); + } + + /** + * Returns an {@link SplitReader} reading the split index upto the given + * {@link IndexCommit}, using the given {@link IndexDeletionPolicy}. You + * should pass readOnly=true, since it gives much better concurrent + * performance, unless you intend to do write operations (delete documents or + * change norms) with the reader. + * + * @param directory + * the index directory + * @param deletionPolicy + * a custom deletion policy (only used if you use this reader to + * perform deletes or to set norms); see {@link IndexWriter} for + * details. + * @param commit + * the specific {@link IndexCommit} to open; see + * {@link SplitReader#listCommits} to list all commits in a directory + * @param readOnly + * true if no changes (deletions, norms) will be made with this + * SplitIndexReader + * @param termInfosIndexDivisor + * Subsamples which indexed terms are loaded into RAM. This has the + * same effect as {@link IndexWriter#setTermIndexInterval} except + * that setting must be done at indexing time while this setting can + * be set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into memory. By + * setting this to a value > 1 you can reduce memory usage, at the + * expense of higher latency when loading a TermInfo. The default + * value is 1. Set this to -1 to skip loading the terms index + * entirely. + * @return + * @throws CorruptIndexException + * @throws IOException + */ + static SplitReader open(final Directory directory, + final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, + final boolean readOnly, int termInfosIndexDivisor) + throws CorruptIndexException, IOException { + List subReaders = new ArrayList(); + for (Directory subDirectory : SplitWriter.getSplitPolicy(directory) + .getSplits()) { + IndexReader subReader = null; + if (commit == null) { + subReader = IndexReader.open(subDirectory, deletionPolicy, readOnly, + termInfosIndexDivisor); + } else { + subReader = IndexReader.open(subDirectory, deletionPolicy, readOnly, + termInfosIndexDivisor); + if (commit.getDirectory().equals(subDirectory)) { + break; + } + } + subReaders.add(subReader); + } + if (commit == null || commit.getDirectory().equals(directory)) { + subReaders.add(IndexReader.open(directory, deletionPolicy, readOnly, + termInfosIndexDivisor)); + } + return new SplitReader(subReaders); + } + + /** + * Add a sub-reader corresponding to a new split to this split reader. + * + * @param subReader + * the sub-reader for a new split that was just added + * @throws IOException + */ + public void addSplit(IndexReader subReader) throws IOException { + IndexReader[] subReaders = new IndexReader[this.subReaders.length + 1]; + for (int index = 0; index < this.subReaders.length; index++) { + subReaders[index] = this.subReaders[index]; + } + subReaders[subReaders.length - 1] = subReader; + super.initialize(subReaders, false); + } + + /** + * Remove a sub-reader corresponding to an existing split of this split + * reader. + * + * @param subReader + * the sub-reader for an existing split that is being removed + * @throws IOException + */ + public void removeSplit(IndexReader subReader) throws IOException { + List readers = new ArrayList(); + for (IndexReader reader : this.subReaders) { + if (!reader.directory().equals(subReader.directory())) { + readers.add(reader); + } + } + super.initialize(readers.toArray(new IndexReader[] {}), false); + } + + /** + * Re-open the sub-reader corresponding to the given split. + * + * @param subDirectory + * the directory corresponding to the split being re-opened + * @throws CorruptIndexException + * @throws IOException + */ + public void reopenSplit(Directory subDirectory) throws CorruptIndexException, + IOException { + List readers = new ArrayList( + this.subReaders.length); + for (IndexReader reader : this.subReaders) { + if (reader.directory().equals(subDirectory)) { + readers.add(reader.reopen()); + } else { + readers.add(reader); + } + } + + // Re-initialize the underlying {@link MultiReader}. + super.initialize(readers.toArray(new IndexReader[] {}), true); + } + + /** + * @return the split writer for this reader + */ + public SplitWriter getWriter() { + return writer; + } + + /** + * @return the proxy for this reader's voters, which itself acts as a voter + */ + protected SplitVoter getVoters() { + return voterPool.getVoterProxy(); + } + + /** + * Add a split voter to this split reader (and writer too, if it exists). + * + * @param splitVoter + * the split voter being added + */ + public void addVoter(SplitVoter splitVoter) { + voterPool.addVoter(splitVoter); + if (writer != null) { + writer.addVoter(splitVoter); + } + } + + /** + * Remove a split voter from this split reader (and writer too, if it exists). + * + * @param splitVoter + * the split voter being removed + */ + public void removeVoter(SplitVoter splitVoter) { + voterPool.removeVoter(splitVoter); + if (writer != null) { + writer.removeVoter(splitVoter); + } + } + + /** + * When a document is deleted based on its document number, notify registered + * voters. + * + * @param docNum + * the number of the document being deleted + */ + @Override + public synchronized void deleteDocument(int docNum) + throws StaleReaderException, CorruptIndexException, + LockObtainFailedException, IOException { + if (getVoters().onDeleteDocument(docNum).carryOn()) { + super.deleteDocument(docNum); + } + } + + /** + * When a document is deleted based on a term, notify registered voters. + * + * @param term + * the term on which the document being removed is indexed + * @return the number of documents deleted + */ + @Override + public int deleteDocuments(Term term) throws StaleReaderException, + CorruptIndexException, LockObtainFailedException, IOException { + SplitVote vote = getVoters().onDeleteDocument(term); + int documentsDeleted = vote.getReturnValue() != null ? (Integer) vote + .getReturnValue() : 0; + if (vote.carryOn()) { + documentsDeleted += super.deleteDocuments(term); + } + return documentsDeleted; + } + + /** + * When all document deletes are rolled back, notify registered voters. + */ + @Override + public synchronized void undeleteAll() throws StaleReaderException, + CorruptIndexException, LockObtainFailedException, IOException { + if (getVoters().onUndeleteAll().carryOn()) { + super.undeleteAll(); + } + } + + /** + * When document changes are committed, notify registered voters. + * + * @param commitUserData + * the data passed by the user to {@link IndexReader#commit} + */ + @Override + protected void doCommit(Map commitUserData) throws IOException { + if (getVoters().onCommit(commitUserData).carryOn()) { + super.doCommit(commitUserData); + } + } +}