Index: src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (revision 696662) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (working copy) @@ -154,9 +154,8 @@ * in the constructor. * * @return the directory instance passed in the constructor - * @throws IOException */ - Directory getDirectory() throws IOException { + Directory getDirectory() { return directory; } @@ -448,6 +447,9 @@ private Document getFinishedDocument(Document doc) throws IOException { if (!Util.isDocumentReady(doc)) { Document copy = new Document(); + // mark the document that reindexing is required + copy.add(new Field(FieldNames.REINDEXING_REQUIRED, "", + Field.Store.NO, Field.Index.NO_NORMS)); Iterator fields = doc.getFields().iterator(); while (fields.hasNext()) { Fieldable f = (Fieldable) fields.next(); Index: src/main/java/org/apache/jackrabbit/core/query/lucene/FieldNames.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/FieldNames.java (revision 696662) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/FieldNames.java (working copy) @@ -101,6 +101,12 @@ public static final String PROPERTY_LENGTHS = "_:PROPERTY_LENGTHS".intern(); /** + * Name of the field that marks nodes that require reindexing because the + * text extraction process timed out. See also {@link IndexingQueue}. + */ + public static final String REINDEXING_REQUIRED = "_:REINDEXING_REQUIRED".intern(); + + /** * Returns a named length for use as a term in the index. The named length * is of the form: propertyName + '[' + * {@link LongField#longToString(long)}. Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueue.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueue.java (revision 696662) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueue.java (working copy) @@ -26,6 +26,8 @@ import org.apache.jackrabbit.core.NodeId; import org.apache.jackrabbit.uuid.UUID; import org.apache.lucene.document.Document; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,28 +73,42 @@ * Initializes the indexing queue. * * @param index the multi index this indexing queue belongs to. + * @throws IOException if an error occurs while reading from the index. */ - void initialize(MultiIndex index) { + void initialize(MultiIndex index) throws IOException { if (initialized) { throw new IllegalStateException("already initialized"); } + // check index for nodes that need to be reindexed + CachingMultiIndexReader reader = index.getIndexReader(); + try { + TermDocs tDocs = reader.termDocs( + new Term(FieldNames.REINDEXING_REQUIRED, "")); + try { + while (tDocs.next()) { + queueStore.addUUID(reader.document(tDocs.doc(), + FieldSelectors.UUID).get(FieldNames.UUID)); + } + } finally { + tDocs.close(); + } + } finally { + reader.release(); + } String[] uuids = queueStore.getPending(); for (int i = 0; i < uuids.length; i++) { try { UUID uuid = UUID.fromString(uuids[i]); Document doc = index.createDocument(new NodeId(uuid)); pendingDocuments.put(uuids[i], doc); + log.debug("added node {}. New size of indexing queue: {}", + uuid, new Integer(pendingDocuments.size())); } catch (IllegalArgumentException e) { log.warn("Invalid UUID in indexing queue store: " + uuids[i]); } catch (RepositoryException e) { // node does not exist anymore log.debug("Node with uuid {} does not exist anymore", uuids[i]); - try { - queueStore.removeUUID(uuids[i]); - } catch (IOException ex) { - log.warn("Unable to remove node {} from indexing queue", - uuids[i], ex); - } + queueStore.removeUUID(uuids[i]); } } initialized = true; @@ -128,10 +144,8 @@ * @return the document for the given uuid or null * if this queue does not contain a document with the given * uuid. - * @throws IOException if an error occurs removing the document from the - * queue. */ - public synchronized Document removeDocument(String uuid) throws IOException { + public synchronized Document removeDocument(String uuid) { checkInitialized(); Document doc = (Document) pendingDocuments.remove(uuid); if (doc != null) { @@ -149,10 +163,8 @@ * @return an existing document in the queue with the same uuid as the one * in doc or null if there was no such * document. - * @throws IOException an error occurs while adding the document to this - * queue. */ - public synchronized Document addDocument(Document doc) throws IOException { + public synchronized Document addDocument(Document doc) { checkInitialized(); String uuid = doc.get(FieldNames.UUID); Document existing = (Document) pendingDocuments.put(uuid, doc); @@ -168,10 +180,8 @@ /** * Closes this indexing queue and disposes all pending documents. - * - * @throws IOException if an error occurs while closing this queue. */ - public synchronized void close() throws IOException { + public synchronized void close() { checkInitialized(); // go through pending documents and close readers Iterator it = pendingDocuments.values().iterator(); @@ -184,17 +194,6 @@ } /** - * Commits any pending changes to this queue store to disk. - * - * @throws IOException if an error occurs while writing pending changes to - * disk. - */ - public synchronized void commit() throws IOException { - checkInitialized(); - queueStore.commit(); - } - - /** * Checks if this indexing queue is initialized and otherwise throws a * {@link IllegalStateException}. */ @@ -203,4 +202,15 @@ throw new IllegalStateException("not initialized"); } } + + //----------------------------< testing only >------------------------------ + + /** + * This method is for testing only! + * + * @return the number of the currently pending documents. + */ + synchronized int getNumPendingDocuments() { + return pendingDocuments.size(); + } } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueStore.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueStore.java (revision 696662) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueStore.java (working copy) @@ -18,7 +18,6 @@ import org.apache.jackrabbit.core.fs.FileSystem; import org.apache.jackrabbit.core.fs.FileSystemException; -import org.apache.jackrabbit.core.fs.RandomAccessOutputStream; import org.slf4j.LoggerFactory; import org.slf4j.Logger; @@ -26,15 +25,15 @@ import java.io.InputStream; import java.io.BufferedReader; import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.io.BufferedOutputStream; import java.util.Set; import java.util.HashSet; /** - * IndexingQueueStore implements the persistent store to keep - * track of pending document in an indexing queue. + * IndexingQueueStore implements a store that keeps the uuids of + * nodes that are pending in the indexing queue. Until Jackrabbit 1.4 this store + * was also persisted to a {@link FileSystem}. Starting with 1.5 the pending + * nodes are marked directly in the index with a special field. + * See {@link FieldNames#REINDEXING_REQUIRED}. */ class IndexingQueueStore { @@ -64,7 +63,7 @@ private final Set pending = new HashSet(); /** - * The file system where to write the pending document UUIDs. + * The file system from where to read pending document UUIDs. */ private final FileSystem fs; @@ -74,11 +73,6 @@ private final String fileName; /** - * Non-null if we are currently writing to the file. - */ - private Writer out; - - /** * Creates a new IndexingQueueStore using the given file * system. * @@ -105,10 +99,8 @@ * Adds a uuid to the store. * * @param uuid the uuid to add. - * @throws IOException if an error occurs while writing. */ - public void addUUID(String uuid) throws IOException { - writeEntry(ADD, uuid, getLog()); + public void addUUID(String uuid) { pending.add(uuid); } @@ -116,46 +108,26 @@ * Removes a uuid from the store. * * @param uuid the uuid to add. - * @throws IOException if an error occurs while writing. */ - public void removeUUID(String uuid) throws IOException { - writeEntry(REMOVE, uuid, getLog()); + public void removeUUID(String uuid) { pending.remove(uuid); } /** - * Commits the pending changes to the file. - * - * @throws IOException if an error occurs while writing. + * Closes this queue store. */ - public void commit() throws IOException { - if (out != null) { - out.flush(); - if (pending.size() == 0) { - out.close(); - out = null; - // truncate log - try { - fs.getOutputStream(fileName).close(); - } catch (FileSystemException e) { - // ignore + public void close() { + if (pending.isEmpty()) { + try { + if (fs.exists(fileName)) { + fs.deleteFile(fileName); } + } catch (FileSystemException e) { + log.warn("unable to delete " + fileName); } } } - /** - * Flushes and closes this queue store. - * - * @throws IOException if an error occurs while writing. - */ - public void close() throws IOException { - commit(); - if (out != null) { - out.close(); - } - } - //----------------------------< internal >---------------------------------- /** @@ -198,50 +170,4 @@ } } } - - /** - * Writes an entry to the log file. - * - * @param op the operation. Either {@link #ADD} or {@link #REMOVE}. - * @param uuid the uuid of the added or removed node. - * @param writer the writer where the entry is written to. - * @throws IOException if an error occurs when writing the entry. - */ - private static void writeEntry(String op, String uuid, Writer writer) throws IOException { - StringBuffer buf = new StringBuffer(op); - buf.append(' ').append(uuid).append('\n'); - writer.write(buf.toString()); - } - - /** - * Returns the writer to the log file. - * - * @return the writer to the log file. - * @throws IOException if an error occurs while opening the log file. - */ - private Writer getLog() throws IOException { - if (out == null) { - // open file - try { - long len = 0; - if (fs.exists(fileName)) { - len = fs.length(fileName); - } - RandomAccessOutputStream raos - = fs.getRandomAccessOutputStream(fileName); - raos.seek(len); - // use buffering - out = new OutputStreamWriter( - new BufferedOutputStream(raos, 1024), - ENCODING); - } catch (FileSystemException e) { - if (out != null) { - out.close(); - out = null; - } - throw Util.createIOException(e); - } - } - return out; - } } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java (revision 696662) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java (working copy) @@ -768,11 +768,7 @@ } // finally close indexing queue - try { - indexingQueue.close(); - } catch (IOException e) { - log.error("Exception while closing search index.", e); - } + indexingQueue.close(); } } @@ -981,8 +977,6 @@ // after a crash. if (a.getType() == Action.TYPE_COMMIT || a.getType() == Action.TYPE_ADD_INDEX) { redoLog.flush(); - // also flush indexing queue - indexingQueue.commit(); } return a; } @@ -1198,11 +1192,7 @@ // remove documents from the queue Iterator it = finished.keySet().iterator(); while (it.hasNext()) { - try { - indexingQueue.removeDocument(it.next().toString()); - } catch (IOException e) { - log.error("Failed to remove node from indexing queue", e); - } + indexingQueue.removeDocument(it.next().toString()); } try { Index: src/test/java/org/apache/jackrabbit/core/query/AbstractIndexingTest.java =================================================================== --- src/test/java/org/apache/jackrabbit/core/query/AbstractIndexingTest.java (revision 696662) +++ src/test/java/org/apache/jackrabbit/core/query/AbstractIndexingTest.java (working copy) @@ -43,4 +43,11 @@ testRootNode = null; super.tearDown(); } + + /** + * @return the query handler inside the {@link #qm query manager}. + */ + protected QueryHandler getQueryHandler() { + return ((QueryManagerImpl) qm).getQueryHandler(); + } } Index: src/test/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueTest.java =================================================================== --- src/test/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueTest.java (revision 0) +++ src/test/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueTest.java (revision 0) @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import org.apache.jackrabbit.extractor.TextExtractor; +import org.apache.jackrabbit.core.query.AbstractIndexingTest; + +import javax.jcr.Node; +import javax.jcr.NodeIterator; +import javax.jcr.query.Query; +import java.io.Reader; +import java.io.InputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.ByteArrayInputStream; +import java.util.Calendar; + +/** + * IndexingQueueTest checks if the indexing queue properly indexes + * nodes in a background thread when text extraction takes more than 100 ms. + */ +public class IndexingQueueTest extends AbstractIndexingTest { + + private static final String CONTENT_TYPE = "application/indexing-queue-test"; + + private static final String ENCODING = "UTF-8"; + + public void testQueue() throws Exception { + SearchIndex index = (SearchIndex) getQueryHandler(); + IndexingQueue queue = index.getIndex().getIndexingQueue(); + + assertEquals(0, queue.getNumPendingDocuments()); + + String text = "the quick brown fox jumps over the lazy dog."; + InputStream in = new ByteArrayInputStream(text.getBytes(ENCODING)); + Node resource = testRootNode.addNode(nodeName1, "nt:resource"); + resource.setProperty("jcr:data", in); + resource.setProperty("jcr:lastModified", Calendar.getInstance()); + resource.setProperty("jcr:mimeType", CONTENT_TYPE); + resource.setProperty("jcr:encoding", ENCODING); + session.save(); + + assertEquals(1, queue.getNumPendingDocuments()); + + Query q = qm.createQuery(testPath + "/*[jcr:contains(., 'fox')]", Query.XPATH); + NodeIterator nodes = q.execute().getNodes(); + assertFalse(nodes.hasNext()); + + while (queue.getNumPendingDocuments() > 0) { + Thread.sleep(50); + } + + q = qm.createQuery(testPath + "/*[jcr:contains(., 'fox')]", Query.XPATH); + nodes = q.execute().getNodes(); + assertTrue(nodes.hasNext()); + } + + public static final class Extractor implements TextExtractor { + + public String[] getContentTypes() { + return new String[]{CONTENT_TYPE}; + } + + public Reader extractText(InputStream stream, String type, String encoding) + throws IOException { + try { + Thread.sleep(200); + } catch (InterruptedException e) { + throw new IOException(); + } + return new InputStreamReader(stream, encoding); + } + } +} Property changes on: src\test\java\org\apache\jackrabbit\core\query\lucene\IndexingQueueTest.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/test/java/org/apache/jackrabbit/core/query/lucene/TestAll.java =================================================================== --- src/test/java/org/apache/jackrabbit/core/query/lucene/TestAll.java (revision 696662) +++ src/test/java/org/apache/jackrabbit/core/query/lucene/TestAll.java (working copy) @@ -14,10 +14,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.jackrabbit.core.query; +package org.apache.jackrabbit.core.query.lucene; -import org.apache.jackrabbit.core.query.lucene.hits.ArrayHitsTest; - import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; @@ -37,29 +35,8 @@ public static Test suite() { TestSuite suite = new TestSuite("Search tests"); - suite.addTestSuite(SimpleQueryTest.class); - suite.addTestSuite(FulltextQueryTest.class); - suite.addTestSuite(SelectClauseTest.class); - suite.addTestSuite(SQLTest.class); - suite.addTestSuite(OrderByTest.class); - suite.addTestSuite(XPathAxisTest.class); - suite.addTestSuite(SkipDeletedNodesTest.class); - suite.addTestSuite(SkipDeniedNodesTest.class); - suite.addTestSuite(MixinTest.class); - suite.addTestSuite(DerefTest.class); - suite.addTestSuite(VersionStoreQueryTest.class); - suite.addTestSuite(UpperLowerCaseQueryTest.class); - suite.addTestSuite(ChildAxisQueryTest.class); - suite.addTestSuite(QueryResultTest.class); - suite.addTestSuite(FnNameQueryTest.class); - suite.addTestSuite(PathQueryNodeTest.class); - suite.addTestSuite(SynonymProviderTest.class); - suite.addTestSuite(ArrayHitsTest.class); - suite.addTestSuite(ExcerptTest.class); - suite.addTestSuite(IndexingAggregateTest.class); - suite.addTestSuite(IndexFormatVersionTest.class); - suite.addTestSuite(IndexingRuleTest.class); + suite.addTestSuite(IndexingQueueTest.class); return suite; } -} +} \ No newline at end of file Property changes on: src\test\java\org\apache\jackrabbit\core\query\lucene\TestAll.java ___________________________________________________________________ Added: svn:mergeinfo Index: src/test/repository/workspaces/indexing-test/workspace.xml =================================================================== --- src/test/repository/workspaces/indexing-test/workspace.xml (revision 696662) +++ src/test/repository/workspaces/indexing-test/workspace.xml (working copy) @@ -39,6 +39,8 @@ + +