options) {
+ super(options);
+ context.put(SPLIT_RULE_CLASS_NAME, getClass().getSimpleName());
+ this.ruleInstanceID = nextRuleInstanceID++;
+ }
+
+ public long getRuleInstanceID() {
+ return ruleInstanceID;
+ }
+
+ /**
+ * When the split policy is opened, make a note of it here.
+ */
+ public void onOpen(SplitPolicy splitPolicy) {
+ this.splitPolicy = (AbstractSplitPolicy) splitPolicy;
+ }
+
+}
Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterPool.java
===================================================================
--- contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterPool.java (revision 0)
+++ contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterPool.java (revision 0)
@@ -0,0 +1,126 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+
+/**
+ * The SplitVoterPool acts as a proxy for a group of split voters.
+ * It behaves like a {@link SplitVoter} in and of itself. It basically delegates
+ * all events to each of the registered voters. Then, it creates a
+ * {@link SplitVote} object that may be regarded as a consensus of the votes of
+ * the registered voters.
+ *
+ *
+ * In particular, if any of the registered voters vote to override the return
+ * value of the index-changing method in the {@link SplitWriter} or
+ * {@link SplitReader}, then the return value of the leading voter (viz., the
+ * one that appears ahead of the others) is used. Also, if any of the registered
+ * voters vote to not have the index-changing method carry on business as usual,
+ * then it too will vote likewise.
+ *
+ * @author Karthick Sankarachary
+ *
+ */
+public class SplitVoterPool {
+ // The list of registered voters.
+ private List voters = new CopyOnWriteArrayList();
+
+ /**
+ * A proxy object that implements the {@link SplitVoter} interface.
+ */
+ private SplitVoter proxy = (SplitVoter) Proxy.newProxyInstance(
+ SplitVoter.class.getClassLoader(), new Class[] {SplitVoter.class},
+ new InvocationHandler() {
+
+ public Object invoke(Object proxy, Method method, Object[] args)
+ throws Throwable {
+ try {
+ boolean combinedCarryOn = true;
+ Object combinedReturnValue = null;
+ for (SplitVoter listener : voters) {
+ SplitVote listenersSplitVote = (SplitVote) method.invoke(
+ listener, args);
+ if (combinedReturnValue == null
+ && listenersSplitVote.getReturnValue() != null) {
+ combinedReturnValue = listenersSplitVote.getReturnValue();
+ }
+ if (!listenersSplitVote.carryOn()) {
+ combinedCarryOn = false;
+ }
+ }
+ final boolean finalCarryOn = combinedCarryOn;
+ final Object finalReturnValue = combinedReturnValue;
+ return new SplitVote() {
+ @Override
+ public boolean carryOn() {
+ return finalCarryOn;
+ }
+
+ @Override
+ public Object getReturnValue() {
+ return finalReturnValue;
+ }
+ };
+ } catch (Throwable t) {
+ System.out.println("One of the split voters failed to vote ("
+ + t.getMessage() + ")");
+ t.printStackTrace();
+ return SplitVote.CARRY_ON;
+ }
+ }
+ });
+
+ /**
+ * @return the voter proxy object corresponding to this pool of voters
+ */
+ public SplitVoter getVoterProxy() {
+ return proxy;
+ }
+
+ /**
+ * Add a voter to the mix.
+ *
+ * @param voter
+ * the voter being added
+ */
+ public void addVoter(SplitVoter voter) {
+ voters.add(voter);
+ }
+
+ /**
+ * Remove a voter from the mix.
+ *
+ * @param voter
+ * the voter being removed
+ */
+ public void removeVoter(SplitVoter voter) {
+ voters.remove(voter);
+ }
+
+ /**
+ * @return the number of registered voters
+ */
+ public int getVoterCount() {
+ return voters.size();
+ }
+}
Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitWriter.java
===================================================================
--- contrib/splitindex/src/java/org/apache/lucene/index/SplitWriter.java (revision 0)
+++ contrib/splitindex/src/java/org/apache/lucene/index/SplitWriter.java (revision 0)
@@ -0,0 +1,413 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.lang.reflect.Constructor;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.Version;
+
+/**
+ * A SplitWriter builds on an {@link IndexWriter} by aggregating it
+ * with a set of sub-directories known as splits. We denote the directory
+ * underlying the super class {@link IndexWriter} as the super-directory, where
+ * real-time changes to the index will typically reside. The distribution of
+ * content across the super-directory and sub-directories is controlled by what
+ * is known as a {@link SplitPolicy}. To facilitate that, the split policy is
+ * allowed to observe changes made by not only the {@link SplitWriter} but also
+ * the {@link SplitReader}s obtained from it.
+ *
+ *
+ * Each sub-index corresponding to a split denotes a partition that captures a
+ * portion of the changes made to the split index. For instance, a split may
+ * represent a certain splice of time or a mirror image of the super-directory.
+ * The exact semantics of a split is specified by specific implementations of
+ * the {@link SplitPolicy}. Note that the {@link SplitWriter},
+ * {@link SplitReader} and the {@link SplitPolicy} interface intentionally leave
+ * the semantics of splits unspecified. Instead, they merely provide an
+ * extensible and observable framework for enforcing the policies and rules that
+ * define the split index.
+ *
+ *
+ *
+ * The key enabler that makes the split writer observable is the
+ * {@link SplitVoter} interface, which acts as a listener on all state-changing
+ * actions performed on the super-index (based on the super-directory). Both the
+ * {@link SplitWriter} and {@link SplitReader} trigger events whenever the state
+ * of the index (potentially) changes, which are propagated to each of the
+ * registered voters. In fact, a voter may choose to override (and even disable)
+ * the action being performed on the super-directory, if it so wishes. Think of
+ * split voting and policies as a aspect-oriented mechanism that is tailored for
+ * lucene indices.
+ *
+ *
+ * @author Karthick Sankarachary
+ *
+ */
+public class SplitWriter extends IndexWriter implements Closeable {
+ // The split policy that governs the splits (and to some extent, the
+ // super-directory).
+ protected SplitPolicy policy;
+ // The pool of voters registered with this split writer.
+ protected SplitVoterPool voterPool = new SplitVoterPool();
+
+ // The name of the file where the split policy's options are serialized.
+ protected static final String SPLIT_POLICY_OPTIONS_FILE = "policy.options.ser";
+ // The name of the split policy class that is stored along with the options.
+ protected static final String SPLIT_POLICY_CLASS_NAME = "split.policy.class.name";
+
+ /**
+ * Construct an {@link SplitWriter} that based on the given directory, which
+ * is passed along to the super class which is a {@link IndexWriter}, along
+ * with the {@link IndexWriterConfig}. The {@link SplitPolicy} is registered
+ * as a voter of this split writer, along with its {@link SplitRule}s (@see
+ * {@link SplitPolicy#getRules()}, if any.
+ *
+ * @param directory
+ * the super-directory on which the {@link IndexWriter} is based
+ * @param config
+ * the configuration for the writer of the super-directory
+ * @param policy
+ * the split policy for the split writer
+ * @throws CorruptIndexException
+ * @throws LockObtainFailedException
+ * @throws IOException
+ */
+ public SplitWriter(Directory directory, IndexWriterConfig config,
+ SplitPolicy policy) throws CorruptIndexException,
+ LockObtainFailedException, IOException {
+ super(directory, config);
+ initialize(policy);
+ }
+
+ /**
+ * Create the voter pool, add the policy and its rules to it. Persist the
+ * state of the policy into the {@link #SPLIT_POLICY_OPTIONS_FILE}.
+ *
+ * @param policy
+ * the split policy for this writer
+ * @throws CorruptIndexException
+ * @throws LockObtainFailedException
+ * @throws IOException
+ */
+ private void initialize(SplitPolicy policy) throws CorruptIndexException,
+ LockObtainFailedException, IOException {
+ this.policy = policy;
+
+ voterPool = new SplitVoterPool();
+ voterPool.addVoter(policy);
+ for (SplitRule splitRule : policy.getRules()) {
+ voterPool.addVoter(splitRule);
+ }
+ getVoters().onOpen(this, getDirectory());
+
+ if (getDirectory() instanceof FSDirectory) {
+ File file = ((FSDirectory) getDirectory()).getDirectory();
+ ObjectOutputStream fos = new ObjectOutputStream(new FileOutputStream(
+ new File(file, SPLIT_POLICY_OPTIONS_FILE)));
+ policy.getOptions().put(SPLIT_POLICY_CLASS_NAME,
+ policy.getClass().getCanonicalName());
+ fos.writeObject(policy.getOptions());
+ }
+ }
+
+ /**
+ * @return the split policy for this writer
+ */
+ public SplitPolicy getSplitPolicy() {
+ return policy;
+ }
+
+ /**
+ * Restore the state of the split policy from the
+ * {@link #SPLIT_POLICY_OPTIONS_FILE} that is saved under the super-directory.
+ *
+ * @param directory
+ * the super-directory for the split index
+ * @return the split policy based on the persisted options
+ * @throws CorruptIndexException
+ */
+ @SuppressWarnings("unchecked")
+ public static SplitPolicy getSplitPolicy(Directory directory)
+ throws CorruptIndexException {
+ SplitPolicy policy = null;
+ if (directory instanceof FSDirectory) {
+ File file = ((FSDirectory) directory).getDirectory();
+ try {
+ ObjectInputStream ois = new ObjectInputStream(new FileInputStream(
+ new File(file, SPLIT_POLICY_OPTIONS_FILE)));
+ Map options = (Map) ois
+ .readObject();
+ String splitPolicyClassName = (String) options
+ .get(SPLIT_POLICY_CLASS_NAME);
+ Class> policyClass = Class.forName(splitPolicyClassName);
+ Constructor> policyConstructor = policyClass
+ .getConstructor(Map.class);
+ policy = (SplitPolicy) policyConstructor.newInstance(options);
+ policy.onOpen(null, directory);
+ } catch (Exception e) {
+ throw new CorruptIndexException(
+ "Could not load split policy from the directory");
+ }
+ }
+ return policy;
+ }
+
+ /**
+ * @return the proxy for this writer's voters, which itself acts as a voter
+ */
+ protected SplitVoter getVoters() {
+ return voterPool.getVoterProxy();
+ }
+
+ /**
+ * Add a split voter to this split writer.
+ *
+ * @param splitVoter
+ * the split voter being added
+ */
+ public void addVoter(SplitVoter voter) {
+ voterPool.addVoter(voter);
+ }
+
+ /**
+ * Remove a split voter from this split writer.
+ *
+ * @param splitVoter
+ * the split voter being removed
+ */
+ public void removeVoter(SplitVoter voter) {
+ voterPool.removeVoter(voter);
+ }
+
+ /**
+ * @return the number of sub-indices (or splits) in this split index.
+ */
+ public int getNumSplits() {
+ return getSplitPolicy().getSplits().size();
+ }
+
+ /**
+ * Prevent modifications to the set of splits in this writer.
+ *
+ * @throws SplitException
+ */
+ public void pauseSplits() throws SplitException {
+ getVoters().onPause();
+ }
+
+ /**
+ * Allow modifications to the set of splits in this writer.
+ *
+ * @throws SplitException
+ */
+ public void resumeSplits() throws SplitException {
+ getVoters().onResume();
+ }
+
+ /**
+ * Add readers to the super-index, unless the voters decide otherwise.
+ *
+ * @param readers
+ * one or more readers to add
+ */
+ @Override
+ public void addIndexes(IndexReader... readers) throws CorruptIndexException,
+ IOException {
+ if (getVoters().onAddIndexes(readers).carryOn()) {
+ super.addIndexes(readers);
+ }
+ }
+
+ /**
+ * Add directories to the super-index, unless the voters decide otherwise.
+ *
+ * @param directories
+ * one or more directories to add
+ */
+ @Override
+ public void addIndexesNoOptimize(Directory... dirs)
+ throws CorruptIndexException, IOException {
+ if (getVoters().onAddIndexesNoOptimize(dirs).carryOn()) {
+ super.addIndexesNoOptimize(dirs);
+ }
+ }
+
+ /**
+ * Close the super-index after notifying its voters to do the same.
+ *
+ * @param waitForMerges
+ * if true, this call will block until all merges complete; else, it
+ * will ask all running merges to abort, wait until those merges have
+ * finished (which should be at most a few seconds), and then return.
+ */
+ @Override
+ public void close(boolean waitForMerges) throws CorruptIndexException,
+ IOException {
+ if (getVoters().onClose().carryOn()) {
+ super.close(waitForMerges);
+ }
+ }
+
+ /**
+ * Commit the super-index after notifying its voters to do the same.
+ *
+ * @param commitUserData
+ * the data passed by the user that initiated the commit
+ */
+ @Override
+ public void commit(Map commitUserData)
+ throws CorruptIndexException, IOException {
+ if (getVoters().onCommit(commitUserData).carryOn()) {
+ super.commit(commitUserData);
+ }
+ }
+
+ /**
+ * Delete all documents from the super-index after notifying its voters to do
+ * the same.
+ */
+ @Override
+ public synchronized void deleteAll() throws IOException {
+ if (getVoters().onDeleteAll().carryOn()) {
+ super.deleteAll();
+ }
+ }
+
+ /**
+ * Delete all documents from only the super-index.
+ *
+ * @throws IOException
+ */
+ protected synchronized void deleteAllFromSuper() throws IOException {
+ super.deleteAll();
+ }
+
+ /**
+ * Add documents to the super-index, unless the voters decide otherwise.
+ */
+ @Override
+ public void addDocument(Document document, Analyzer analyzer)
+ throws CorruptIndexException, IOException {
+ if (getVoters().onAddDocument(document, analyzer).carryOn()) {
+ super.addDocument(document, analyzer);
+ }
+ }
+
+ /**
+ * Obtain a reader for this split index, which typically is a
+ * {@link SplitReader}. However, the split policy in force may choose to
+ * override that with a different type of {@link IndexReader}.
+ */
+ @Override
+ public IndexReader getReader() throws IOException {
+ SplitReader reader = new SplitReader(this);
+ SplitVote splitVote = getVoters().onGetReader(reader);
+ return (splitVote.getReturnValue() != null) ? (IndexReader) splitVote
+ .getReturnValue() : reader;
+ }
+
+ /**
+ * @return the reader for the super-index
+ * @throws IOException
+ */
+ public IndexReader getSuperReader() throws IOException {
+ return super.getReader(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
+ }
+
+ /**
+ * Obtain an index reader for just the splits in this index. In other words,
+ * the super-directory is not made visible in this reader.
+ *
+ * @return an index reader for only the splits
+ * @throws IOException
+ */
+ public IndexReader getPolicyReader() throws IOException {
+ return new SplitReader(this, true);
+ }
+
+ /**
+ * Obtain an index reader that optionally includes the super-index and zero or
+ * more splits.
+ *
+ * @return an index reader for the given set of indices
+ */
+ public IndexReader getReader(boolean excludeSuper, int... slitsToInclude)
+ throws IOException {
+ return new SplitReader(this, excludeSuper, slitsToInclude);
+ }
+
+ /**
+ * @return the total number of documents in this split index
+ */
+ @Override
+ public synchronized int numDocs() throws IOException {
+ return super.numDocs() + policy.numDocs();
+ }
+
+ /**
+ * @return the number of documents in the super-directory
+ * @throws IOException
+ */
+ public synchronized int numSuperDocs() throws IOException {
+ return super.numDocs();
+ }
+
+ /**
+ * Optimize the super-directory and each of the splits, if not already
+ * optimized.
+ *
+ * @param maxNumSegments
+ * maximum number of segments left in the index after optimization
+ * finishes
+ * @param doWait
+ * should the call block until the optimize completes?
+ */
+ @Override
+ public void optimize(int maxNumSegments, boolean doWait)
+ throws CorruptIndexException, IOException {
+ super.optimize(maxNumSegments, doWait);
+ IndexReader[] subReaders = getPolicyReader().getSequentialSubReaders();
+ if (subReaders != null) {
+ for (IndexReader subReader : subReaders) {
+ if (!subReader.isOptimized()) {
+ IndexWriterConfig subConfig = new IndexWriterConfig(
+ Version.LUCENE_31, getAnalyzer());
+ subConfig.setMaxFieldLength(super.getConfig().getMaxFieldLength());
+ IndexWriter subWriter = new IndexWriter(subReader.directory(),
+ subConfig);
+ subWriter.optimize(maxNumSegments, doWait);
+ subWriter.close(true);
+ }
+ }
+ }
+ }
+}
Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitVote.java
===================================================================
--- contrib/splitindex/src/java/org/apache/lucene/index/SplitVote.java (revision 0)
+++ contrib/splitindex/src/java/org/apache/lucene/index/SplitVote.java (revision 0)
@@ -0,0 +1,58 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The SplitVote represents the vote of a {@link SplitVoter},
+ * typically in response to a change to the split index.
+ *
+ *
+ * The vote serves a two-fold purpose. Through it's {@link #carryOn()} method,
+ * it indicates whether or not the change should actually be performed on the
+ * super-directory. Plus, through its {@link #getReturnValue()} method, it
+ * indicates that the return value that should be used by the method on the
+ * split index that caused the change.
+ *
+ *
+ *
+ * Note that it takes only one vote to say false to {@link #carryOn()} for the
+ * original change to be ignored. On the other hand, only the first non-null
+ * {@link #getReturnValue()} is used to override the original return value of
+ * the state-changing method.
+ *
+ */
+
+public class SplitVote {
+
+ public static final SplitVote CARRY_ON = new SplitVote();
+
+ public static final SplitVote DO_NOTHING = new SplitVote() {
+ public boolean carryOn() {
+ return false;
+ };
+ };
+
+ public Object getReturnValue() {
+ return null;
+ }
+
+ public boolean carryOn() {
+ return true;
+ }
+
+}
Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitException.java
===================================================================
--- contrib/splitindex/src/java/org/apache/lucene/index/SplitException.java (revision 0)
+++ contrib/splitindex/src/java/org/apache/lucene/index/SplitException.java (revision 0)
@@ -0,0 +1,22 @@
+package org.apache.lucene.index;
+
+public class SplitException extends RuntimeException {
+ private static final long serialVersionUID = 8267088235489206246L;
+
+ public SplitException() {
+ super();
+ }
+
+ public SplitException(String message) {
+ super(message);
+ }
+
+ public SplitException(Throwable cause) {
+ super(cause);
+ }
+
+ public SplitException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+}
Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitPolicy.java
===================================================================
--- contrib/splitindex/src/java/org/apache/lucene/index/SplitPolicy.java (revision 0)
+++ contrib/splitindex/src/java/org/apache/lucene/index/SplitPolicy.java (revision 0)
@@ -0,0 +1,145 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.EventListener;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.store.Directory;
+
+/**
+ * A SplitPolicy determines the way in which a {@link SplitWriter}
+ * is broken down into sub-indices.
+ *
+ *
+ * In particular, this interface separates the concern of managing the
+ * directories underlying the sub-indices from the implementation of the
+ * {@link SplitWriter} and {@link SplitReader}. The set of directories that it
+ * owns may be obtained via its {@link #getSplits()} method. It is important to
+ * note that the split policy does not own the directory passed to the
+ * {@link SplitWriter}'s super class, which is an {@link IndexWriter}. However,
+ * the split policy may decide to move documents out of the writer's
+ * (super-)directory into one or more of the (sub-)directories that it owns, if
+ * it so wishes. In a way, the {@link SplitPolicy} acts as the polar opposite of
+ * the {@link MergePolicy} in the sense that under its auspices documents tends
+ * to diverge rather than converge in terms of their location in the index.
+ *
+ *
+ *
+ * Typically, the composition of the set of directories (or splits) owned by the
+ * {@link SplitPolicy} changes when the split index is updated, either through
+ * the {@link SplitWriter} or the {@link SplitReader}. To that end, the policy
+ * acts as an observer of the aforementioned classes, specifically any method
+ * that has the potential to change the contents of the index. This way, the
+ * split policy receives notifications whenever the index writer is updated. In
+ * addition, it is also notified of the index's life cycle events, such as when
+ * it is opened or closed.
+ *
+ *
+ *
+ * For those interested in knowing when the split policy adds or removes a
+ * sub-directory, they may register themselves as a split policy listener.
+ *
+ *
+ *
+ * Given that implementations of the split policy will inevitably need to be
+ * configured differently, we pass a options hash to their constructor, from
+ * which they can pick the configuration items of interest to them. The policy's
+ * options can be obtained through its {@link #getOptions()} method, which must
+ * be in a serializable shape, so that the policy can be dehydrated and hydrated
+ * to persistent storage as and when required.
+ *
+ */
+public interface SplitPolicy extends SplitVoter {
+ /**
+ * An option specifying the collection of split rules to be employed.
+ */
+ public static final String POLICY_SPLIT_RULES = "policy.split.rules";
+
+ /**
+ * Return a set of {@link Directory}, each of which represents a split (or
+ * sub-index) of the index, as defined by this policy.
+ *
+ * @return the set of directories of sub-indices belonging to this split
+ * index.
+ */
+ public Set getSplits();
+
+ /**
+ * @return the list of split rules being enforced.
+ */
+ public Collection getRules();
+
+ /**
+ * Return the options for this policy in a serializable form.
+ *
+ * @return the map of option keys to corresponding values.
+ */
+
+ public Map getOptions();
+
+ /**
+ * @return a specific split rule of the given type.
+ */
+ public SplitRule getRule(Class extends SplitRule> splitRuleClass);
+
+ /**
+ * A SplitListener observes the split personality of a
+ * {@link SplitPolicy}. Specifically, it is notified any time the return value
+ * of its {@link getSplits()} method is about to change.
+ *
+ */
+ public interface SplitPolicyListener extends EventListener {
+ /**
+ * The event to trigger after a split has been performed. The new state of
+ * the split policy is available at this point to its listeners.
+ *
+ * @param splits
+ * the new state of the split policy
+ */
+ public void onSplit(Set splits);
+ }
+
+ /**
+ * Add a {@link #SplitListener} to this {@link SplitPolicy}.
+ *
+ * @param listener
+ * the split policy listener to be added.
+ */
+ public void addListener(SplitPolicyListener listener);
+
+ /**
+ * Remove a {@link #SplitListener} from this {@link SplitPolicy}.
+ *
+ * @param listener
+ * the split policy listener to be removed.
+ */
+ public void removeListener(SplitPolicyListener listener);
+
+ /**
+ * @return the number of (logical) documents present exclusively in the splits
+ * (outside of the super-directory).
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
+ public int numDocs() throws CorruptIndexException, IOException;
+}
Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitVoter.java
===================================================================
--- contrib/splitindex/src/java/org/apache/lucene/index/SplitVoter.java (revision 0)
+++ contrib/splitindex/src/java/org/apache/lucene/index/SplitVoter.java (revision 0)
@@ -0,0 +1,195 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.EventListener;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
+
+/**
+ * The SplitVoter is an event listener that is notified of
+ * real-time changes being made to the split writer or reader.
+ *
+ *
+ * After processing an event, the split voter must return a {@link SplitVote}
+ * object that may be used to (a) override the return value of method that
+ * caused the change, (b) force the split writer or reader (as the case may be)
+ * to not change the (super-)directory where appropriate.
+ *
+ *
+ * @author Karthick Sankarachary
+ *
+ */
+public interface SplitVoter extends EventListener {
+ /**
+ * This method is invoked when the index is opened for writing or reading,
+ * which is the cue for the policy to initialize its state.
+ *
+ * @param directory
+ * the directory on which the slit index is based
+ * @throws IOException
+ * if the policy could not initialize its state
+ */
+ public SplitVote onOpen(SplitWriter writer, Directory directory)
+ throws IOException;
+
+ /**
+ * This method is invoked when the index is closed for business, which is the
+ * cue for the policy to cleanup its state.
+ *
+ * @throws CorruptIndexException
+ * if the splits were found to be in an inconsistent state
+ * @throws IOException
+ * if the splits could not be closed properly
+ */
+ public SplitVote onClose() throws CorruptIndexException, IOException;
+
+ /**
+ * This method is invoked when either the split writer or reader commits its
+ * changes.
+ *
+ * @param commitUserData
+ * @return
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
+ public SplitVote onCommit(Map commitUserData)
+ throws CorruptIndexException, IOException;
+
+ /**
+ * This method is invoked when the index is put on pause, which entails that
+ * the policy in turn put it's split rule activities on hold.
+ */
+ public SplitVote onPause();
+
+ /**
+ * This method is invoked when the index is asked to resume operations (after
+ * a {@link #pause()}), at which point the policy may resume it's split rule
+ * activities.
+ */
+ public SplitVote onResume();
+
+ /**
+ * This method is invoked when document are added to the split index,
+ * specifically the directory underlying the index writer super-class.
+ *
+ * @param document
+ * the document about to be added
+ * @param analyzer
+ * the analyzer used to process the document
+ * @return a flag indicating whether or not the document should be added to
+ * the running directory
+ * @throws CorruptIndexException
+ * if the change appears to have corrupted the index
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onAddDocument(Document document, Analyzer analyzer)
+ throws CorruptIndexException, IOException;
+
+ /**
+ * This method is invoked when external index (readers) are added to the split
+ * index, specifically the directory underlying the index writer super-class.
+ *
+ * @param readers
+ * the index readers about to be added
+ * @return flag indicating whether or not the document should be added to the
+ * running directory
+ * @throws CorruptIndexException
+ * if the change appears to have corrupted the index
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onAddIndexes(IndexReader... readers)
+ throws CorruptIndexException, IOException;
+
+ /**
+ * This method is invoked when external index (directories) are added to the
+ * split index, specifically the directory underlying the index writer
+ * super-class.
+ *
+ * @param dirs
+ * the index directories about to be added
+ * @return flag indicating whether or not the document should be added to the
+ * running directory
+ * @throws CorruptIndexException
+ * if the change appears to have corrupted the index
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onAddIndexesNoOptimize(Directory[] dirs)
+ throws CorruptIndexException, IOException;
+
+ /**
+ * This method is invoked when the {@link SplitWriter} is asked to provide an
+ * index reader through its {@link SplitWriter#getReader} method. Typically,
+ * the policy will return the (split) reader passed to it, but it may
+ * completely different type of reader, if it so desires.
+ *
+ * @param reader
+ * the split reader the writer plans on returning
+ * @return the actual index reader that will be returned
+ * @throws CorruptIndexException
+ * if the change appears to have corrupted the index
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onGetReader(SplitReader reader)
+ throws CorruptIndexException, IOException;
+
+ /**
+ * This method is invoked when the {@link SplitWriter#deleteAll()} method is
+ * invoked.
+ *
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onDeleteAll() throws IOException;
+
+ /**
+ * This method is invoked when the {@link SplitReader#undeleteAll()} method is
+ * invoked.
+ *
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onUndeleteAll();
+
+ /**
+ * This method is invoked when the {@link SplitReader#deleteDocument(int)}
+ * method is invoked.
+ *
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onDeleteDocument(int docNum);
+
+ /**
+ * This method is invoked when the {@link SplitReader#deleteDocuments(Term)}
+ * method is invoked.
+ *
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onDeleteDocument(Term term);
+
+}
Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterAdapter.java
===================================================================
--- contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterAdapter.java (revision 0)
+++ contrib/splitindex/src/java/org/apache/lucene/index/SplitVoterAdapter.java (revision 0)
@@ -0,0 +1,330 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.lang.reflect.Method;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
+
+/**
+ * The SplitVoterAdapter provides a rudimentary implementation of
+ * the {@link SplitVoter} interface, using sane defaults for the returned votes.
+ *
+ * @author Karthick Sankarachary
+ *
+ */
+public class SplitVoterAdapter implements SplitVoter {
+ // The split policy associated with this split voter.
+ protected AbstractSplitPolicy splitPolicy;
+
+ // A flag indicating whether the split policy (and rules thereof) are in a
+ // paused state.
+ protected boolean paused = false;
+
+ // An extensible options map, whose semantics is open to interpretation.
+ protected Map options;
+
+ /**
+ * Construct a split voter with no options.
+ */
+ public SplitVoterAdapter() {
+ this(new HashMap());
+ }
+
+ /**
+ * Construct a split voter with the given set of options.
+ *
+ * @param options
+ * the configuration options
+ */
+ public SplitVoterAdapter(Map options) {
+ this.options = checkAndBalanceOptions(options);
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @param directory
+ * the directory on which the slit index is based
+ * @throws IOException
+ * if the policy could not initialize its state
+ */
+ public SplitVote onOpen(SplitWriter writer, Directory directory)
+ throws IOException {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @throws CorruptIndexException
+ * if the splits were found to be in an inconsistent state
+ * @throws IOException
+ * if the splits could not be closed properly
+ */
+ public SplitVote onClose() throws CorruptIndexException, IOException,
+ SplitException {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @param commitUserData
+ * @return
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
+ public SplitVote onCommit(Map commitUserData)
+ throws CorruptIndexException, IOException {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * Pause the split voter in an idempotent manner.
+ */
+ public synchronized SplitVote onPause() {
+ paused = true;
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * Resume the split voter in an idempotent manner.
+ */
+ public synchronized SplitVote onResume() {
+ paused = false;
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @param document
+ * the document about to be added
+ * @param analyzer
+ * the analyzer used to process the document
+ * @return a flag indicating whether or not the document should be added to
+ * the running directory
+ * @throws CorruptIndexException
+ * if the change appears to have corrupted the index
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onAddDocument(Document document, Analyzer analyzer)
+ throws CorruptIndexException, IOException {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @param readers
+ * the (sub-)indices being added
+ * @return true if and only if the split writer should actually add the
+ * (sub-)indices
+ */
+ public SplitVote onAddIndexes(IndexReader... readers)
+ throws CorruptIndexException, IOException {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @param directories
+ * the (sub-)indices being added
+ * @return true if and only if the split writer should actually add the
+ * (sub-)indices
+ */
+ public SplitVote onAddIndexesNoOptimize(Directory[] directories)
+ throws CorruptIndexException, IOException {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @param reader
+ * the split reader the writer plans on returning
+ * @return the actual index reader that will be returned
+ * @throws CorruptIndexException
+ * if the change appears to have corrupted the index
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onGetReader(SplitReader reader)
+ throws CorruptIndexException, IOException {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onDeleteAll() throws IOException {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onUndeleteAll() {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onDeleteDocument(int docNum) {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * By default, tell the split writer or reader to carry on as usual.
+ *
+ * @throws IOException
+ * if the change could not be persisted properly
+ */
+ public SplitVote onDeleteDocument(Term term) {
+ return SplitVote.CARRY_ON;
+ }
+
+ /**
+ * @return true if the split voter is in paused mode.
+ */
+ protected boolean isPaused() {
+ return paused;
+ }
+
+ /**
+ * @return a unique identifier for the corresponding split policy.
+ */
+ protected String getSplitPolicyId() {
+ return splitPolicy.toString();
+ }
+
+ /**
+ * @return an empty hash map of option defaults.
+ */
+ public Map getOptionDefaults() {
+ return new HashMap();
+ }
+
+ /**
+ * @return an empty hash map of option bounds.
+ */
+ public Map getOptionBounds() {
+ return new HashMap();
+ }
+
+ /**
+ * @return the options hash used to configure this split voter.
+ */
+ public Map getOptions() {
+ return Collections.synchronizedMap(options);
+ }
+
+ /**
+ * Return the value for the given configuration option key.
+ *
+ * @param key
+ * the key of the configuration option
+ * @return the value for the above key
+ */
+ public Serializable getOption(String key) {
+ return options.get(key);
+ }
+
+ /**
+ * Return the value for the given configuration option key, after casting it
+ * as the given value class.
+ *
+ * @param key
+ * the key of the configuration option
+ * @param valueClass
+ * the expected type of the value
+ * @return the value for the above key
+ */
+ public Serializable getOption(String key,
+ Class extends Serializable> valueClass) {
+ Serializable value = options.get(key);
+ if (!value.getClass().equals(valueClass)) {
+ try {
+ Method method = valueClass.getMethod("valueOf", String.class);
+ value = (Serializable) method.invoke(null, value.toString());
+ } catch (Exception e) {}
+ }
+ return value;
+ }
+
+ /**
+ * Ensure that the user-defined options conform to the bound constraints
+ * defined by {@link #getOptionBounds()}, and if not, replace it with the
+ * corresponding defaults as defined by {@link #getOptionDefaults()}.
+ *
+ * @param options
+ * the user-defined options hash
+ * @return the sanitized version of the input options hash
+ */
+ protected Map checkAndBalanceOptions(
+ Map options) {
+ Map bounds = getOptionBounds();
+ Map defaults = getOptionDefaults();
+ for (String key : bounds.keySet()) {
+ if (options.containsKey(key)) {
+ OptionBound bound = bounds.get(key);
+ if (bound != null && !bound.isInbounds(key, options.get(key))) {
+ options.put(key, defaults.get(key));
+ }
+ }
+ }
+ for (String key : defaults.keySet()) {
+ if (!options.containsKey(key)) {
+ options.put(key, defaults.get(key));
+ }
+ }
+ return options;
+ }
+
+ /**
+ * An OptionBound defines whether or not a given
+ * pair in the options satisfies the constraints defined by the split voter.
+ * It acts as a sanity check mechanism, which ensures that the split voter
+ * does not have to ever deal with incorrect configuration options.
+ *
+ */
+ public interface OptionBound {
+ public boolean isInbounds(String key, Serializable value);
+ }
+
+}
Index: contrib/splitindex/src/java/org/apache/lucene/index/SplitReader.java
===================================================================
--- contrib/splitindex/src/java/org/apache/lucene/index/SplitReader.java (revision 0)
+++ contrib/splitindex/src/java/org/apache/lucene/index/SplitReader.java (revision 0)
@@ -0,0 +1,626 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+
+/**
+ * A SplitReader acts as a logical {@link IndexReader} for the
+ * split index. In particular, it builds on the {@link MultiReader}, such that a
+ * sub-reader maps to either a (split) sub-directory or to the (split index's)
+ * super-directory.
+ *
+ *
+ * In general, a {@link IndexReader} allows for updates to the index (even
+ * though that may seem like a contradiction in terms). Since we want the
+ * ability to observer every change made to the split index, we make the
+ * state-changing methods of {@link SplitReader} observable (by a
+ * {@link SplitVoter}).
+ *
+ *
+ * @author Karthick Sankarachary
+ *
+ */
+public class SplitReader extends MultiReader {
+ // The split writer, if this reader was obtained from one.
+ protected SplitWriter writer;
+ // The pool of voters observing this split reader.
+ protected SplitVoterPool voterPool = new SplitVoterPool();
+
+ /**
+ * Construct a split reader from the given sub-readers. Note that this
+ * constructor is meant to be used only by one of the {@link #open} factory
+ * methods.
+ *
+ * @param subReaders
+ * one or more sub-readers
+ * @throws IOException
+ */
+ SplitReader(IndexReader... subReaders) throws IOException {
+ super(subReaders);
+ }
+
+ /**
+ * Construct a split reader from the given list of sub-readers. Note that this
+ * constructor is meant to be used only by one of the {@link #open} factory
+ * methods.
+ *
+ * @param subReaders
+ * a list of sub-readers
+ * @throws IOException
+ */
+ SplitReader(List subReaders) throws IOException {
+ super(subReaders.toArray(new IndexReader[] {}));
+ }
+
+ /**
+ * Construct a split reader based on a split writer. Note that this
+ * constructor is meant to used only by the {@link SplitWriter}.
+ *
+ * @param writer
+ * the split writer
+ */
+ SplitReader(SplitWriter writer) throws CorruptIndexException, IOException {
+ this(writer, false);
+ }
+
+ /**
+ * Construct a split reader based on a split writer, that may or may not
+ * exclude the super-directory. Note that this constructor is meant to used
+ * only by the {@link SplitWriter}.
+ *
+ * @param writer
+ * the split writer
+ * @param excludeSuper
+ * whether or not to exclude the super-directory
+ */
+ SplitReader(SplitWriter writer, boolean excludeSuper)
+ throws CorruptIndexException, IOException {
+ this(writer, DEFAULT_TERMS_INDEX_DIVISOR, excludeSuper);
+ }
+
+ /**
+ * Construct a split reader based on a split writer, and on the given divisor.
+ * Note that this constructor is meant to used only by the {@link SplitWriter}
+ * .
+ *
+ * @param writer
+ * the split writer
+ * @param termInfosIndexDivisor
+ * Sub-samples which indexed terms are loaded into RAM
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
+ SplitReader(SplitWriter writer, int termInfosIndexDivisor)
+ throws CorruptIndexException, IOException {
+ this(writer, termInfosIndexDivisor, false);
+ }
+
+ /**
+ * Construct a split reader based on a split writer, and on the given divisor.
+ * Note that this constructor is meant to used only by the {@link SplitWriter}
+ *
+ * @param writer
+ * the split writer
+ * @param termInfosIndexDivisor
+ * Sub-samples which indexed terms are loaded into RAM
+ * @param excludeSuper
+ * whether or not to exclude the super-directory
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
+ SplitReader(SplitWriter writer, int termInfosIndexDivisor,
+ boolean excludeSuper) throws CorruptIndexException, IOException {
+ this(writer, termInfosIndexDivisor, excludeSuper, new int[] {});
+ }
+
+ /**
+ * Construct a split reader based on a split writer, and on the given divisor
+ * and splits to include. Note that this constructor is meant to used only by
+ * the {@link SplitWriter}
+ *
+ * @param writer
+ * the split writer
+ * @param termInfosIndexDivisor
+ * Sub-samples which indexed terms are loaded into RAM
+ * @param excludeSuper
+ * whether or not to exclude the super-directory
+ * @param splitsToInclude
+ * indices of one or more splits to include
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
+ SplitReader(SplitWriter writer, boolean excludeSuper, int... splitsToInclude)
+ throws CorruptIndexException, IOException {
+ this(writer, DEFAULT_TERMS_INDEX_DIVISOR, excludeSuper, splitsToInclude);
+ }
+
+ SplitReader(SplitWriter writer, int termInfosIndexDivisor,
+ boolean excludeSuper, int... splitsToInclude)
+ throws CorruptIndexException, IOException {
+ super(getSubReaders(writer, termInfosIndexDivisor, excludeSuper,
+ splitsToInclude));
+ this.writer = writer;
+ }
+
+ /**
+ * Return an array of sub-readers, one for each of the specified splits.
+ *
+ * @param writer
+ * the split writer
+ * @param termInfosIndexDivisor
+ * Sub-samples which indexed terms are loaded into RAM
+ * @param excludeSuper
+ * whether or not to exclude the super-directory
+ * @param splitsToInclude
+ * indices of one or more splits to be included
+ * @return the array of sub-readers for the specified splits
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
+ private static IndexReader[] getSubReaders(SplitWriter writer,
+ int termInfosIndexDivisor, boolean excludeSuper, int... splitsToInclude)
+ throws CorruptIndexException, IOException {
+ List subDirectories = new ArrayList(writer
+ .getSplitPolicy().getSplits());
+
+ if (subDirectories == null) {
+ return new IndexReader[] {};
+ }
+ if (splitsToInclude == null || splitsToInclude.length == 0) {
+ splitsToInclude = new int[subDirectories.size()];
+ for (int i = 0; i < subDirectories.size(); i++) {
+ splitsToInclude[i] = i;
+ }
+ }
+ IndexReader[] subReaders = new IndexReader[splitsToInclude.length
+ + (excludeSuper ? 0 : 1)];
+ for (int subIndex = 0; subIndex < splitsToInclude.length; subIndex++) {
+ int splitToInclude = splitsToInclude[subIndex];
+ if (splitToInclude < 0 || splitToInclude > subDirectories.size()) {
+ System.out.println("Not including non-existent split " + splitToInclude
+ + " in split reader");
+ continue;
+ }
+ Directory subDirectory = subDirectories.get(splitToInclude);
+ subReaders[subIndex] = IndexReader.open(subDirectory, null, true,
+ termInfosIndexDivisor);
+ }
+ if (!excludeSuper) {
+ subReaders[subReaders.length - 1] = writer.getSuperReader();
+ }
+ writer.resumeSplits();
+ return subReaders;
+ }
+
+ /**
+ * Open a {@link SplitReader} based on the directory on which the split index
+ * is based (a.k.a. its super-directory).
+ *
+ * @param directory
+ * the super-directory on which the split index is based
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws IOException
+ * if there is a low-level IO error
+ */
+ public static SplitReader open(final Directory directory)
+ throws CorruptIndexException, IOException {
+ return open(directory, true);
+ }
+
+ /**
+ * Open a {@link SplitReader} based on the directory on which the split index
+ * is based (a.k.a. its super-directory). You should pass readOnly=true, since
+ * it gives much better concurrent performance, unless you intend to do write
+ * operations (delete documents or change norms) with the reader.
+ *
+ * @param directory
+ * the index directory
+ * @param readOnly
+ * true if no changes (deletions, norms) will be made with this
+ * SplitIndexReader
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws IOException
+ * if there is a low-level IO error
+ */
+ public static SplitReader open(final Directory directory, boolean readOnly)
+ throws CorruptIndexException, IOException {
+ return open(directory, null, readOnly);
+ }
+
+ /**
+ * Expert: returns an {@link SplitReader} reading the split index upto the
+ * given {@link IndexCommit}. You should pass readOnly=true, since it gives
+ * much better concurrent performance, unless you intend to do write
+ * operations (delete documents or change norms) with the reader.
+ *
+ * @param commit
+ * the commit point to open
+ * @param readOnly
+ * true if no changes (deletions, norms) will be made with this
+ * SplitIndexReader
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws IOException
+ * if there is a low-level IO error
+ */
+ public static SplitReader open(final IndexCommit commit, boolean readOnly)
+ throws CorruptIndexException, IOException {
+ return open(commit.getDirectory(), null, commit, readOnly,
+ DEFAULT_TERMS_INDEX_DIVISOR);
+ }
+
+ /**
+ * Expert: returns an {@link SplitReader} reading the index in the given
+ * directory, with a custom {@link IndexDeletionPolicy}. You should pass
+ * readOnly=true, since it gives much better concurrent performance, unless
+ * you intend to do write operations (delete documents or change norms) with
+ * the reader.
+ *
+ * @param directory
+ * the index directory
+ * @param deletionPolicy
+ * a custom deletion policy (only used if you use this reader to
+ * perform deletes or to set norms); see {@link IndexWriter} for
+ * details.
+ * @param readOnly
+ * true if no changes (deletions, norms) will be made with this
+ * SplitIndexReader
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws IOException
+ * if there is a low-level IO error
+ */
+ public static SplitReader open(final Directory directory,
+ IndexDeletionPolicy deletionPolicy, boolean readOnly)
+ throws CorruptIndexException, IOException {
+ return open(directory, deletionPolicy, readOnly,
+ DEFAULT_TERMS_INDEX_DIVISOR);
+ }
+
+ /**
+ * Expert: returns an {@link SplitReader} reading the index in the given
+ * directory, with a custom {@link IndexDeletionPolicy}. You should pass
+ * readOnly=true, since it gives much better concurrent performance, unless
+ * you intend to do write operations (delete documents or change norms) with
+ * the reader.
+ *
+ * @param directory
+ * the index directory
+ * @param deletionPolicy
+ * a custom deletion policy (only used if you use this reader to
+ * perform deletes or to set norms); see {@link IndexWriter} for
+ * details.
+ * @param readOnly
+ * true if no changes (deletions, norms) will be made with this
+ * SplitIndexReader
+ * @param termInfosIndexDivisor
+ * Subsamples which indexed terms are loaded into RAM. This has the
+ * same effect as {@link IndexWriter#setTermIndexInterval} except
+ * that setting must be done at indexing time while this setting can
+ * be set per reader. When set to N, then one in every
+ * N*termIndexInterval terms in the index is loaded into memory. By
+ * setting this to a value > 1 you can reduce memory usage, at the
+ * expense of higher latency when loading a TermInfo. The default
+ * value is 1. Set this to -1 to skip loading the terms index
+ * entirely.
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws IOException
+ * if there is a low-level IO error
+ */
+ public static SplitReader open(final Directory directory,
+ IndexDeletionPolicy deletionPolicy, boolean readOnly,
+ int termInfosIndexDivisor) throws CorruptIndexException, IOException {
+ return open(directory, deletionPolicy, null, readOnly,
+ termInfosIndexDivisor);
+ }
+
+ /**
+ * Expert: returns an {@link SplitReader} reading the index in the given
+ * Directory, using a specific commit and with a custom
+ * {@link IndexDeletionPolicy}. You should pass readOnly=true, since it gives
+ * much better concurrent performance, unless you intend to do write
+ * operations (delete documents or change norms) with the reader.
+ *
+ * @param commit
+ * the specific {@link IndexCommit} to open; see
+ * {@link SplitReader#listCommits} to list all commits in a directory
+ * @param deletionPolicy
+ * a custom deletion policy (only used if you use this reader to
+ * perform deletes or to set norms); see {@link IndexWriter} for
+ * details.
+ * @param readOnly
+ * true if no changes (deletions, norms) will be made with this
+ * SplitIndexReader
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws IOException
+ * if there is a low-level IO error
+ */
+ public static SplitReader open(final IndexCommit commit,
+ IndexDeletionPolicy deletionPolicy, boolean readOnly)
+ throws CorruptIndexException, IOException {
+ return open(commit.getDirectory(), deletionPolicy, commit, readOnly,
+ DEFAULT_TERMS_INDEX_DIVISOR);
+ }
+
+ /**
+ * Expert: returns an {@link SplitReader} reading the index in the given
+ * Directory, using a specific commit and with a custom
+ * {@link IndexDeletionPolicy}. You should pass readOnly=true, since it gives
+ * much better concurrent performance, unless you intend to do write
+ * operations (delete documents or change norms) with the reader.
+ *
+ * @param commit
+ * the specific {@link IndexCommit} to open; see
+ * {@link SplitReader#listCommits} to list all commits in a directory
+ * @param deletionPolicy
+ * a custom deletion policy (only used if you use this reader to
+ * perform deletes or to set norms); see {@link IndexWriter} for
+ * details.
+ * @param readOnly
+ * true if no changes (deletions, norms) will be made with this
+ * SplitIndexReader
+ * @param termInfosIndexDivisor
+ * Subsamples which indexed terms are loaded into RAM. This has the
+ * same effect as {@link IndexWriter#setTermIndexInterval} except
+ * that setting must be done at indexing time while this setting can
+ * be set per reader. When set to N, then one in every
+ * N*termIndexInterval terms in the index is loaded into memory. By
+ * setting this to a value > 1 you can reduce memory usage, at the
+ * expense of higher latency when loading a TermInfo. The default
+ * value is 1. Set this to -1 to skip loading the terms index
+ * entirely.
+ * @throws CorruptIndexException
+ * if the index is corrupt
+ * @throws IOException
+ * if there is a low-level IO error
+ */
+ public static SplitReader open(final IndexCommit commit,
+ IndexDeletionPolicy deletionPolicy, boolean readOnly,
+ int termInfosIndexDivisor) throws CorruptIndexException, IOException {
+ return open(commit.getDirectory(), deletionPolicy, commit, readOnly,
+ termInfosIndexDivisor);
+ }
+
+ /**
+ * Returns an {@link SplitReader} reading the split index upto the given
+ * {@link IndexCommit}, using the given {@link IndexDeletionPolicy}. You
+ * should pass readOnly=true, since it gives much better concurrent
+ * performance, unless you intend to do write operations (delete documents or
+ * change norms) with the reader.
+ *
+ * @param directory
+ * the index directory
+ * @param deletionPolicy
+ * a custom deletion policy (only used if you use this reader to
+ * perform deletes or to set norms); see {@link IndexWriter} for
+ * details.
+ * @param commit
+ * the specific {@link IndexCommit} to open; see
+ * {@link SplitReader#listCommits} to list all commits in a directory
+ * @param readOnly
+ * true if no changes (deletions, norms) will be made with this
+ * SplitIndexReader
+ * @param termInfosIndexDivisor
+ * Subsamples which indexed terms are loaded into RAM. This has the
+ * same effect as {@link IndexWriter#setTermIndexInterval} except
+ * that setting must be done at indexing time while this setting can
+ * be set per reader. When set to N, then one in every
+ * N*termIndexInterval terms in the index is loaded into memory. By
+ * setting this to a value > 1 you can reduce memory usage, at the
+ * expense of higher latency when loading a TermInfo. The default
+ * value is 1. Set this to -1 to skip loading the terms index
+ * entirely.
+ * @return
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
+ static SplitReader open(final Directory directory,
+ final IndexDeletionPolicy deletionPolicy, final IndexCommit commit,
+ final boolean readOnly, int termInfosIndexDivisor)
+ throws CorruptIndexException, IOException {
+ List subReaders = new ArrayList();
+ for (Directory subDirectory : SplitWriter.getSplitPolicy(directory)
+ .getSplits()) {
+ IndexReader subReader = null;
+ if (commit == null) {
+ subReader = IndexReader.open(subDirectory, deletionPolicy, readOnly,
+ termInfosIndexDivisor);
+ } else {
+ subReader = IndexReader.open(subDirectory, deletionPolicy, readOnly,
+ termInfosIndexDivisor);
+ if (commit.getDirectory().equals(subDirectory)) {
+ break;
+ }
+ }
+ subReaders.add(subReader);
+ }
+ if (commit == null || commit.getDirectory().equals(directory)) {
+ subReaders.add(IndexReader.open(directory, deletionPolicy, readOnly,
+ termInfosIndexDivisor));
+ }
+ return new SplitReader(subReaders);
+ }
+
+ /**
+ * Add a sub-reader corresponding to a new split to this split reader.
+ *
+ * @param subReader
+ * the sub-reader for a new split that was just added
+ * @throws IOException
+ */
+ public void addSplit(IndexReader subReader) throws IOException {
+ IndexReader[] subReaders = new IndexReader[this.subReaders.length + 1];
+ for (int index = 0; index < this.subReaders.length; index++) {
+ subReaders[index] = this.subReaders[index];
+ }
+ subReaders[subReaders.length - 1] = subReader;
+ super.initialize(subReaders, false);
+ }
+
+ /**
+ * Remove a sub-reader corresponding to an existing split of this split
+ * reader.
+ *
+ * @param subReader
+ * the sub-reader for an existing split that is being removed
+ * @throws IOException
+ */
+ public void removeSplit(IndexReader subReader) throws IOException {
+ List readers = new ArrayList();
+ for (IndexReader reader : this.subReaders) {
+ if (!reader.directory().equals(subReader.directory())) {
+ readers.add(reader);
+ }
+ }
+ super.initialize(readers.toArray(new IndexReader[] {}), false);
+ }
+
+ /**
+ * Re-open the sub-reader corresponding to the given split.
+ *
+ * @param subDirectory
+ * the directory corresponding to the split being re-opened
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
+ public void reopenSplit(Directory subDirectory) throws CorruptIndexException,
+ IOException {
+ List readers = new ArrayList(
+ this.subReaders.length);
+ for (IndexReader reader : this.subReaders) {
+ if (reader.directory().equals(subDirectory)) {
+ readers.add(reader.reopen());
+ } else {
+ readers.add(reader);
+ }
+ }
+
+ // Re-initialize the underlying {@link MultiReader}.
+ super.initialize(readers.toArray(new IndexReader[] {}), true);
+ }
+
+ /**
+ * @return the split writer for this reader
+ */
+ public SplitWriter getWriter() {
+ return writer;
+ }
+
+ /**
+ * @return the proxy for this reader's voters, which itself acts as a voter
+ */
+ protected SplitVoter getVoters() {
+ return voterPool.getVoterProxy();
+ }
+
+ /**
+ * Add a split voter to this split reader (and writer too, if it exists).
+ *
+ * @param splitVoter
+ * the split voter being added
+ */
+ public void addVoter(SplitVoter splitVoter) {
+ voterPool.addVoter(splitVoter);
+ if (writer != null) {
+ writer.addVoter(splitVoter);
+ }
+ }
+
+ /**
+ * Remove a split voter from this split reader (and writer too, if it exists).
+ *
+ * @param splitVoter
+ * the split voter being removed
+ */
+ public void removeVoter(SplitVoter splitVoter) {
+ voterPool.removeVoter(splitVoter);
+ if (writer != null) {
+ writer.removeVoter(splitVoter);
+ }
+ }
+
+ /**
+ * When a document is deleted based on its document number, notify registered
+ * voters.
+ *
+ * @param docNum
+ * the number of the document being deleted
+ */
+ @Override
+ public synchronized void deleteDocument(int docNum)
+ throws StaleReaderException, CorruptIndexException,
+ LockObtainFailedException, IOException {
+ if (getVoters().onDeleteDocument(docNum).carryOn()) {
+ super.deleteDocument(docNum);
+ }
+ }
+
+ /**
+ * When a document is deleted based on a term, notify registered voters.
+ *
+ * @param term
+ * the term on which the document being removed is indexed
+ * @return the number of documents deleted
+ */
+ @Override
+ public int deleteDocuments(Term term) throws StaleReaderException,
+ CorruptIndexException, LockObtainFailedException, IOException {
+ SplitVote vote = getVoters().onDeleteDocument(term);
+ int documentsDeleted = vote.getReturnValue() != null ? (Integer) vote
+ .getReturnValue() : 0;
+ if (vote.carryOn()) {
+ documentsDeleted += super.deleteDocuments(term);
+ }
+ return documentsDeleted;
+ }
+
+ /**
+ * When all document deletes are rolled back, notify registered voters.
+ */
+ @Override
+ public synchronized void undeleteAll() throws StaleReaderException,
+ CorruptIndexException, LockObtainFailedException, IOException {
+ if (getVoters().onUndeleteAll().carryOn()) {
+ super.undeleteAll();
+ }
+ }
+
+ /**
+ * When document changes are committed, notify registered voters.
+ *
+ * @param commitUserData
+ * the data passed by the user to {@link IndexReader#commit}
+ */
+ @Override
+ protected void doCommit(Map commitUserData) throws IOException {
+ if (getVoters().onCommit(commitUserData).carryOn()) {
+ super.doCommit(commitUserData);
+ }
+ }
+}