Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMigration.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMigration.java (revision 1023844) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMigration.java (working copy) @@ -16,23 +16,33 @@ */ package org.apache.jackrabbit.core.query.lucene; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.jackrabbit.core.query.lucene.directory.DirectoryManager; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FilterIndexReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.TermPositions; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermPositions; import org.apache.lucene.store.Directory; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.Field; -import org.apache.jackrabbit.core.query.lucene.directory.DirectoryManager; -import org.slf4j.LoggerFactory; import org.slf4j.Logger; - -import java.io.IOException; +import org.slf4j.LoggerFactory; /** * IndexMigration implements a utility that migrates a Jackrabbit @@ -55,10 +65,12 @@ * * @param index the index to check and migration if needed. * @param directoryManager the directory manager. + * @param oldSeparatorChar the old separator char that needs to be replaced. * @throws IOException if an error occurs while migrating the index. */ public static void migrate(PersistentIndex index, - DirectoryManager directoryManager) + DirectoryManager directoryManager, + char oldSeparatorChar) throws IOException { Directory indexDir = index.getDirectory(); log.debug("Checking {} ...", indexDir); @@ -76,7 +88,7 @@ TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, "")); try { Term t = terms.term(); - if (t.text().indexOf('\uFFFF') == -1) { + if (t.text().indexOf(oldSeparatorChar) == -1) { log.debug("Index already migrated"); return; } @@ -102,7 +114,8 @@ IndexWriter.MaxFieldLength.UNLIMITED); try { IndexReader r = new MigrationIndexReader( - IndexReader.open(index.getDirectory())); + IndexReader.open(index.getDirectory()), + oldSeparatorChar); try { writer.addIndexes(new IndexReader[]{r}); writer.close(); @@ -131,8 +144,11 @@ */ private static class MigrationIndexReader extends FilterIndexReader { - public MigrationIndexReader(IndexReader in) { + private final char oldSepChar; + + public MigrationIndexReader(IndexReader in, char oldSepChar) { super(in); + this.oldSepChar = oldSepChar; } public Document document(int n, FieldSelector fieldSelector) @@ -141,28 +157,74 @@ Fieldable[] fields = doc.getFieldables(FieldNames.PROPERTIES); if (fields != null) { doc.removeFields(FieldNames.PROPERTIES); - for (int i = 0; i < fields.length; i++) { - String value = fields[i].stringValue(); - value = value.replace('\uFFFF', '['); - doc.add(new Field(FieldNames.PROPERTIES, value, - Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + for (int i = 0; i < fields.length; i++ ) { + Fieldable field = fields[i]; + String value = field.stringValue(); + value = value.replace(oldSepChar, '['); + doc.add(new Field(FieldNames.PROPERTIES, value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); } } return doc; } public TermEnum terms() throws IOException { - return new MigrationTermEnum(in.terms()); + List enums = new ArrayList(); + List fieldNames = new ArrayList(); + + for (Iterator iterator = in.getFieldNames(FieldOption.ALL).iterator(); iterator.hasNext();) { + fieldNames.add(iterator.next()); + } + Collections.sort(fieldNames); + for (Iterator iterator = fieldNames.iterator(); iterator.hasNext();) { + String fieldName = (String) iterator.next(); + if (fieldName.equals(FieldNames.PROPERTIES)) { + addPropertyTerms(enums); + } else { + enums.add(new RangeScan(in, new Term(fieldName, ""), new Term(fieldName, "\uFFFF"))); + } + } + return new MigrationTermEnum(new ChainedTermEnum(enums), oldSepChar); } public TermPositions termPositions() throws IOException { - return new MigrationTermPositions(in.termPositions()); + return new MigrationTermPositions(in.termPositions(), oldSepChar); + } + + private void addPropertyTerms(List enums) throws IOException { + SortedMap termEnums = new TreeMap(new Comparator() { + public int compare(Object o1, Object o2) { + String s1 = (String) o1; + String s2 = (String) o2; + s1 = s1.replace(oldSepChar, '['); + s2 = s2.replace(oldSepChar, '['); + return s1.compareTo(s2); + } + }); + // scan through terms and find embedded field names + TermEnum terms = new RangeScan(in, + new Term(FieldNames.PROPERTIES, ""), + new Term(FieldNames.PROPERTIES, "\uFFFF")); + String previous = null; + while (terms.next()) { + Term t = terms.term(); + String name = t.text().substring(0, t.text().indexOf(oldSepChar) + 1); + if (!name.equals(previous)) { + termEnums.put(name, new RangeScan(in, + new Term(FieldNames.PROPERTIES, name), + new Term(FieldNames.PROPERTIES, name + "\uFFFF"))); + } + previous = name; + } + enums.addAll(termEnums.values()); } private static class MigrationTermEnum extends FilterTermEnum { - public MigrationTermEnum(TermEnum in) { + private final char oldSepChar; + + public MigrationTermEnum(TermEnum in, char oldSepChar) { super(in); + this.oldSepChar = oldSepChar; } public Term term() { @@ -172,7 +234,7 @@ } if (t.field().equals(FieldNames.PROPERTIES)) { String text = t.text(); - return t.createTerm(text.replace('\uFFFF', '[')); + return t.createTerm(text.replace(oldSepChar, '[')); } else { return t; } @@ -185,14 +247,17 @@ private static class MigrationTermPositions extends FilterTermPositions { - public MigrationTermPositions(TermPositions in) { + private final char oldSepChar; + + public MigrationTermPositions(TermPositions in, char oldSepChar) { super(in); + this.oldSepChar = oldSepChar; } public void seek(Term term) throws IOException { if (term.field().equals(FieldNames.PROPERTIES)) { char[] text = term.text().toCharArray(); - text[term.text().indexOf('[')] = '\uFFFF'; + text[term.text().indexOf('[')] = oldSepChar; super.seek(term.createTerm(new String(text))); } else { super.seek(term); @@ -208,4 +273,61 @@ } } } -} + + static final class ChainedTermEnum extends TermEnum { + + private Queue queue = new LinkedList(); + + public ChainedTermEnum(Collection enums) { + super(); + queue.addAll(enums); + } + + public boolean next() throws IOException { + boolean newEnum = false; + for (;;) { + TermEnum terms = (TermEnum) queue.peek(); + if (terms == null) { + // no more enums + break; + } + if (newEnum && terms.term() != null) { + // need to check if enum is already positioned + // at first term + return true; + } + if (terms.next()) { + return true; + } else { + queue.remove(); + terms.close(); + newEnum = true; + } + } + return false; + } + + public Term term() { + TermEnum terms = (TermEnum) queue.peek(); + if (terms != null) { + return terms.term(); + } + return null; + } + + public int docFreq() { + TermEnum terms = (TermEnum) queue.peek(); + if (terms != null) { + return terms.docFreq(); + } + return 0; + } + + public void close() throws IOException { + // close remaining + while (!queue.isEmpty()) { + ((TermEnum) queue.remove()).close(); + } + } + } +} \ No newline at end of file Index: src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java (revision 1023844) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java (working copy) @@ -63,7 +63,7 @@ cache, indexingQueue); this.name = name; if (isExisting()) { - IndexMigration.migrate(this, directoryManager); + IndexMigration.migrate(this, directoryManager, '\uFFFF'); } } Index: src/test/java/org/apache/jackrabbit/core/query/lucene/ChainedTermEnumTest.java =================================================================== --- src/test/java/org/apache/jackrabbit/core/query/lucene/ChainedTermEnumTest.java (revision 0) +++ src/test/java/org/apache/jackrabbit/core/query/lucene/ChainedTermEnumTest.java (revision 0) @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import junit.framework.TestCase; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; + +/** + * ChainedTermEnumTest implements a test for JCR-2410. + */ +public class ChainedTermEnumTest extends TestCase { + + public void testEnum() throws Exception { + Collection enums = new ArrayList(); + enums.add(createTermEnum("a", 2)); + enums.add(createTermEnum("b", 1)); + enums.add(createTermEnum("c", 0)); + enums.add(createTermEnum("d", 2)); + TermEnum terms = new IndexMigration.ChainedTermEnum(enums); + List expected = new ArrayList(); + expected.addAll(Arrays.asList(new String[]{"a0", "a1", "b0", "d0", "d1"})); + List result = new ArrayList(); + do { + Term t = terms.term(); + if (t != null) { + result.add(t.text()); + } + } while (terms.next()); + assertEquals(expected, result); + } + + protected TermEnum createTermEnum(String prefix, int numTerms) + throws IOException { + Directory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), + true, IndexWriter.MaxFieldLength.UNLIMITED); + for (int i = 0; i < numTerms; i++) { + Document doc = new Document(); + doc.add(new Field("field", prefix + i, + Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); + writer.addDocument(doc); + } + writer.close(); + IndexReader reader = IndexReader.open(dir); + TermEnum terms = reader.terms(); + if (terms.term() == null) { + // position at first term + terms.next(); + } + return terms; + } +} \ No newline at end of file Index: src/test/java/org/apache/jackrabbit/core/query/lucene/IndexMigrationTest.java =================================================================== --- src/test/java/org/apache/jackrabbit/core/query/lucene/IndexMigrationTest.java (revision 0) +++ src/test/java/org/apache/jackrabbit/core/query/lucene/IndexMigrationTest.java (revision 0) @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +import junit.framework.TestCase; + +import org.apache.jackrabbit.core.query.lucene.directory.DirectoryManager; +import org.apache.jackrabbit.core.query.lucene.directory.RAMDirectoryManager; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.RAMDirectory; + +/** + * IndexMigrationTest contains a test case for JCR-2393. + */ +public class IndexMigrationTest extends TestCase { + + /** + * Cannot use \uFFFF because of LUCENE-1221. + */ + private static final char SEP_CHAR = '\uFFFE'; + + public void testMigration() throws Exception { + List docs = new ArrayList(); + docs.add(createDocument("ab", "a")); + docs.add(createDocument("a", "b")); + docs.add(createDocument("abcd", "c")); + docs.add(createDocument("abc", "d")); + + DirectoryManager dirMgr = new RAMDirectoryManager(); + + PersistentIndex idx = new PersistentIndex("index", + new StandardAnalyzer(), Similarity.getDefault(), + new DocNumberCache(100), + new IndexingQueue(new IndexingQueueStore(new RAMDirectory())), + dirMgr); + idx.addDocuments((Document[]) docs.toArray(new Document[docs.size()])); + idx.commit(); + + IndexMigration.migrate(idx, dirMgr, SEP_CHAR); + } + + protected static String createNamedValue14(String name, String value) { + return name + SEP_CHAR + value; + } + + protected static Document createDocument(String name, String value) { + Document doc = new Document(); + doc.add(new Field(FieldNames.UUID, UUID.randomUUID().toString(), Field.Store.YES, Field.Index.NO)); + doc.add(new Field(FieldNames.PROPERTIES, createNamedValue14(name, value), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field(FieldNames.FULLTEXT_PREFIX + ":" + name, value, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + return doc; + } +} Index: src/test/java/org/apache/jackrabbit/core/query/lucene/TestAll.java =================================================================== --- src/test/java/org/apache/jackrabbit/core/query/lucene/TestAll.java (revision 1023844) +++ src/test/java/org/apache/jackrabbit/core/query/lucene/TestAll.java (working copy) @@ -37,6 +37,8 @@ suite.addTestSuite(IndexingQueueTest.class); suite.addTestSuite(IndexingAggregateTest.class); + suite.addTestSuite(IndexMigrationTest.class); + suite.addTestSuite(ChainedTermEnumTest.class); return suite; }