Index: src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java (working copy) @@ -233,11 +233,12 @@ final SortedMap termMap = new TreeMap(); Reader r = new StringReader(text); TokenStream ts = index.getTextAnalyzer().tokenStream("", r); - Token t; + Token t = new Token(); try { - while ((t = ts.next()) != null) { + while ((t = ts.next(t)) != null) { + String termText = t.term(); TermVectorOffsetInfo[] info = - (TermVectorOffsetInfo[]) termMap.get(t.termText()); + (TermVectorOffsetInfo[]) termMap.get(termText); if (info == null) { info = new TermVectorOffsetInfo[1]; } else { @@ -247,7 +248,7 @@ } info[info.length - 1] = new TermVectorOffsetInfo( t.startOffset(), t.endOffset()); - termMap.put(t.termText(), info); + termMap.put(termText, info); } } catch (IOException e) { // should never happen, we are reading from a string Index: src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (working copy) @@ -20,8 +20,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.index.SerialMergeScheduler; -import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.store.Directory; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -80,15 +78,6 @@ /** Compound file flag */ private boolean useCompoundFile = true; - /** minMergeDocs config parameter */ - private int minMergeDocs = SearchIndex.DEFAULT_MIN_MERGE_DOCS; - - /** maxMergeDocs config parameter */ - private int maxMergeDocs = SearchIndex.DEFAULT_MAX_MERGE_DOCS; - - /** mergeFactor config parameter */ - private int mergeFactor = SearchIndex.DEFAULT_MERGE_FACTOR; - /** maxFieldLength config parameter */ private int maxFieldLength = SearchIndex.DEFAULT_MAX_FIELD_LENGTH; @@ -145,7 +134,8 @@ this.isExisting = IndexReader.indexExists(directory); if (!isExisting) { - indexWriter = new IndexWriter(directory, analyzer); + indexWriter = new IndexWriter(directory, analyzer, + IndexWriter.MaxFieldLength.LIMITED); // immediately close, now that index has been created indexWriter.close(); indexWriter = null; @@ -302,7 +292,7 @@ } if (sharedReader == null) { // create new shared reader - IndexReader reader = IndexReader.open(getDirectory()); + IndexReader reader = IndexReader.open(getDirectory(), true); reader.setTermInfosIndexDivisor(termInfosIndexDivisor); CachingIndexReader cr = new CachingIndexReader( reader, cache, initCache); @@ -339,18 +329,11 @@ indexReader = null; } if (indexWriter == null) { - indexWriter = new IndexWriter(getDirectory(), analyzer); + indexWriter = new IndexWriter(getDirectory(), analyzer, + new IndexWriter.MaxFieldLength(maxFieldLength)); indexWriter.setSimilarity(similarity); - // since lucene 2.0 setMaxBuffereDocs is equivalent to previous minMergeDocs attribute - indexWriter.setMaxBufferedDocs(minMergeDocs); - indexWriter.setMaxMergeDocs(maxMergeDocs); - indexWriter.setMergeFactor(mergeFactor); - indexWriter.setMaxFieldLength(maxFieldLength); indexWriter.setUseCompoundFile(useCompoundFile); indexWriter.setInfoStream(STREAM_LOGGER); - indexWriter.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); - indexWriter.setMergeScheduler(new SerialMergeScheduler()); - indexWriter.setMergePolicy(new LogDocMergePolicy()); } return indexWriter; } @@ -372,12 +355,12 @@ */ protected synchronized void commit(boolean optimize) throws IOException { if (indexReader != null) { + log.debug("committing IndexReader."); indexReader.flush(); } if (indexWriter != null) { log.debug("committing IndexWriter."); - indexWriter.close(); - indexWriter = null; + indexWriter.commit(); } // optimize if requested if (optimize) { @@ -484,7 +467,7 @@ Document copy = new Document(); // mark the document that reindexing is required copy.add(new Field(FieldNames.REINDEXING_REQUIRED, "", - Field.Store.NO, Field.Index.NO_NORMS)); + Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); Iterator fields = doc.getFields().iterator(); while (fields.hasNext()) { Fieldable f = (Fieldable) fields.next(); @@ -532,37 +515,6 @@ } /** - * The lucene index writer property: minMergeDocs - */ - void setMinMergeDocs(int minMergeDocs) { - this.minMergeDocs = minMergeDocs; - if (indexWriter != null) { - // since lucene 2.0 setMaxBuffereDocs is equivalent to previous minMergeDocs attribute - indexWriter.setMaxBufferedDocs(minMergeDocs); - } - } - - /** - * The lucene index writer property: maxMergeDocs - */ - void setMaxMergeDocs(int maxMergeDocs) { - this.maxMergeDocs = maxMergeDocs; - if (indexWriter != null) { - indexWriter.setMaxMergeDocs(maxMergeDocs); - } - } - - /** - * The lucene index writer property: mergeFactor - */ - void setMergeFactor(int mergeFactor) { - this.mergeFactor = mergeFactor; - if (indexWriter != null) { - indexWriter.setMergeFactor(mergeFactor); - } - } - - /** * The lucene index writer property: maxFieldLength */ void setMaxFieldLength(int maxFieldLength) { @@ -600,9 +552,9 @@ if (!f.isIndexed()) { return Field.Index.NO; } else if (f.isTokenized()) { - return Field.Index.TOKENIZED; + return Field.Index.ANALYZED; } else { - return Field.Index.UN_TOKENIZED; + return Field.Index.NOT_ANALYZED; } } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMigration.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMigration.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMigration.java (working copy) @@ -101,7 +101,8 @@ Directory migrationDir = directoryManager.getDirectory(migrationName); try { - IndexWriter writer = new IndexWriter(migrationDir, new JackrabbitAnalyzer()); + IndexWriter writer = new IndexWriter(migrationDir, new JackrabbitAnalyzer(), + IndexWriter.MaxFieldLength.UNLIMITED); try { IndexReader r = new MigrationIndexReader( IndexReader.open(index.getDirectory())); @@ -147,7 +148,7 @@ String value = fields[i].stringValue(); value = value.replace('\uFFFF', '['); doc.add(new Field(FieldNames.PROPERTIES, value, - Field.Store.YES, Field.Index.NO_NORMS)); + Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); } } return doc; Index: src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitIndexSearcher.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitIndexSearcher.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitIndexSearcher.java (working copy) @@ -80,7 +80,11 @@ hits = ((JackrabbitQuery) query).execute(this, session, sort); } if (hits == null) { - hits = new LuceneQueryHits(search(query, sort), reader); + if (sort == null) { + hits = new LuceneQueryHits(reader, this, query); + } else { + hits = new SortedLuceneQueryHits(reader, this, query, sort); + } } return hits; } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java (working copy) @@ -73,7 +73,7 @@ boolean withOffsets) { super(name, store ? Field.Store.YES : Field.Store.NO, - Field.Index.TOKENIZED, + Field.Index.ANALYZED, withOffsets ? Field.TermVector.WITH_OFFSETS : Field.TermVector.NO); this.reader = reader; } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryHits.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryHits.java (revision 714004) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryHits.java (working copy) @@ -16,84 +16,75 @@ */ package org.apache.jackrabbit.core.query.lucene; -import org.apache.lucene.search.Hits; +import java.io.IOException; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Scorer; import org.apache.lucene.index.IndexReader; import org.apache.jackrabbit.core.NodeId; +import org.apache.jackrabbit.uuid.UUID; -import java.io.IOException; - /** - * Wraps the lucene Hits object and adds a close method that allows - * to release resources after a query has been executed and the results have - * been read completely. + * Wraps a lucene query result and adds a close method that allows to release + * resources after a query has been executed and the results have been read + * completely. */ -public class LuceneQueryHits extends AbstractQueryHits { +public class LuceneQueryHits implements QueryHits { /** - * The lucene hits we wrap. - */ - private final Hits hits; - - /** * The IndexReader in use by the lucene hits. */ private final IndexReader reader; /** - * The index of the current hit. Initially invalid. + * The scorer for the query. */ - private int hitIndex = -1; + private final Scorer scorer; - /** - * Creates a new QueryHits instance wrapping hits. - * @param hits the lucene hits. - * @param reader the IndexReader in use by hits. - */ - public LuceneQueryHits(Hits hits, IndexReader reader) { - this.hits = hits; + public LuceneQueryHits(IndexReader reader, + IndexSearcher searcher, + Query query) + throws IOException { this.reader = reader; + this.scorer = query.weight(searcher).scorer(reader); } /** * {@inheritDoc} */ - public final int getSize() { - return hits.length(); + public ScoreNode nextScoreNode() throws IOException { + if (!scorer.next()) { + return null; + } + String uuid = reader.document(scorer.doc()).get(FieldNames.UUID); + NodeId id = new NodeId(UUID.fromString(uuid)); + return new ScoreNode(id, scorer.score()); } /** * {@inheritDoc} */ - public final ScoreNode nextScoreNode() throws IOException { - if (++hitIndex >= hits.length()) { - return null; - } - String uuid = reader.document(id(hitIndex), FieldSelectors.UUID).get(FieldNames.UUID); - return new ScoreNode(NodeId.valueOf(uuid), hits.score(hitIndex)); + public void close() throws IOException { + // make sure scorer frees resources + scorer.skipTo(Integer.MAX_VALUE); } /** - * Skips n hits. - * - * @param n the number of hits to skip. - * @throws IOException if an error occurs while skipping. + * @return always -1. */ - public void skip(int n) throws IOException { - hitIndex += n; + public int getSize() { + return -1; } - //-------------------------------< internal >------------------------------- - /** - * Returns the document number for the nth document - * in this QueryHits. - * - * @param n index. - * @return the document number for the nth - * document. - * @throws IOException if an error occurs. + * {@inheritDoc} */ - private final int id(int n) throws IOException { - return hits.id(n); + public void skip(int n) throws IOException { + while (n-- > 0) { + if (nextScoreNode() == null) { + return; + } + } } } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/MoreLikeThis.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/MoreLikeThis.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/MoreLikeThis.java (working copy) @@ -25,10 +25,9 @@ import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Hits; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; @@ -41,7 +40,6 @@ import java.io.IOException; import java.io.Reader; import java.io.File; -import java.io.PrintStream; import java.io.StringReader; import java.io.FileReader; import java.io.InputStreamReader; @@ -86,9 +84,6 @@ *

Initial Usage

* * This class has lots of options to try to make it efficient and flexible. - * See the body of {@link #main main()} below in the source for real code, or - * if you want pseudo code, the simpliest possible usage is as follows. The bold - * fragment is specific to this class. * *
  *
@@ -144,9 +139,6 @@
  *  - optimise: when no termvector support available - used maxNumTermsParsed to limit amount of tokenization
  * 
* - * @author David Spencer - * @author Bruce Ritchie - * @author Mark Harwood */ public final class MoreLikeThis { @@ -174,7 +166,7 @@ * @see #getMinDocFreq * @see #setMinDocFreq */ - public static final int DEFALT_MIN_DOC_FREQ = 5; + public static final int DEFAULT_MIN_DOC_FREQ = 5; /** * Boost terms in query based on score. @@ -239,7 +231,7 @@ /** * Ignore words which do not occur in at least this many docs. */ - private int minDocFreq = DEFALT_MIN_DOC_FREQ; + private int minDocFreq = DEFAULT_MIN_DOC_FREQ; /** * Should we apply a boost to the Query based on the scores? @@ -274,7 +266,7 @@ /** * For idf() calculations. */ - private Similarity similarity = new DefaultSimilarity(); + private Similarity similarity;// = new DefaultSimilarity(); /** * IndexReader to use @@ -285,10 +277,24 @@ * Constructor requiring an IndexReader. */ public MoreLikeThis(IndexReader ir) { - this.ir = ir; + this(ir, new DefaultSimilarity()); } - /** + public MoreLikeThis(IndexReader ir, Similarity sim){ + this.ir = ir; + this.similarity = sim; + } + + + public Similarity getSimilarity() { + return similarity; + } + + public void setSimilarity(Similarity similarity) { + this.similarity = similarity; + } + + /** * Returns an analyzer that will be used to parse source doc with. The default analyzer * is the {@link #DEFAULT_ANALYZER}. * @@ -330,7 +336,7 @@ /** * Returns the frequency at which words will be ignored which do not occur in at least this - * many docs. The default frequency is {@link #DEFALT_MIN_DOC_FREQ}. + * many docs. The default frequency is {@link #DEFAULT_MIN_DOC_FREQ}. * * @return the frequency at which words will be ignored which do not occur in at least this * many docs. @@ -595,12 +601,11 @@ int numDocs = ir.numDocs(); FreqQ res = new FreqQ(words.size()); // will order words by score - Iterator it = words.entrySet().iterator(); + Iterator it = words.keySet().iterator(); while (it.hasNext()) { // for every word - Map.Entry entry = (Map.Entry) it.next(); - String word = (String) entry.getKey(); + String word = (String) it.next(); - int tf = ((Int) entry.getValue()).x; // term freq in the source doc + int tf = ((Int) words.get(word)).x; // term freq in the source doc if (minTermFreq > 0 && tf < minTermFreq) { continue; // filter out words that don't occur enough times in the source } @@ -645,7 +650,7 @@ sb.append("\t" + "maxQueryTerms : " + maxQueryTerms + "\n"); sb.append("\t" + "minWordLen : " + minWordLen + "\n"); sb.append("\t" + "maxWordLen : " + maxWordLen + "\n"); - sb.append("\t" + "fieldNames : \""); + sb.append("\t" + "fieldNames : "); String delim = ""; for (int i = 0; i < fieldNames.length; i++) { String fieldName = fieldNames[i]; @@ -660,72 +665,11 @@ } /** - * Test driver. - * Pass in "-i INDEX" and then either "-fn FILE" or "-url URL". - */ - public static void main(String[] a) throws Throwable { - String indexName = "localhost_index"; - String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en"; - URL url = null; - for (int i = 0; i < a.length; i++) { - if (a[i].equals("-i")) { - indexName = a[++i]; - } - else if (a[i].equals("-f")) { - fn = a[++i]; - } - else if (a[i].equals("-url")) { - url = new URL(a[++i]); - } - } - - PrintStream o = System.out; - IndexReader r = IndexReader.open(indexName); - o.println("Open index " + indexName + " which has " + r.numDocs() + " docs"); - - MoreLikeThis mlt = new MoreLikeThis(r); - - o.println("Query generation parameters:"); - o.println(mlt.describeParams()); - o.println(); - - Query query = null; - if (url != null) { - o.println("Parsing URL: " + url); - query = mlt.like(url); - } - else if (fn != null) { - o.println("Parsing file: " + fn); - query = mlt.like(new File(fn)); - } - - o.println("q: " + query); - o.println(); - IndexSearcher searcher = new IndexSearcher(indexName); - - Hits hits = searcher.search(query); - int len = hits.length(); - o.println("found: " + len + " documents matching"); - o.println(); - for (int i = 0; i < Math.min(25, len); i++) { - Document d = hits.doc(i); - String summary = d.get( "summary"); - o.println("score : " + hits.score(i)); - o.println("url : " + d.get("url")); - o.println("\ttitle : " + d.get("title")); - if (summary != null) { - o.println("\tsummary: " + d.get("summary")); - } - o.println(); - } - } - - /** * Find words for a more-like-this query former. * * @param docNum the id of the lucene document from which to find terms */ - private PriorityQueue retrieveTerms(int docNum) throws IOException { + public PriorityQueue retrieveTerms(int docNum) throws IOException { Map termFreqMap = new HashMap(); for (int i = 0; i < fieldNames.length; i++) { String fieldName = fieldNames[i]; @@ -786,10 +730,11 @@ private void addTermFrequencies(Reader r, Map termFreqMap, String fieldName) throws IOException { TokenStream ts = analyzer.tokenStream(fieldName, r); - org.apache.lucene.analysis.Token token; int tokenCount = 0; - while ((token = ts.next()) != null) { // for every token - String word = token.termText(); + // for every token + final Token reusableToken = new Token(); + for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) { + String word = nextToken.term(); tokenCount++; if (tokenCount > maxNumTokensParsed) { break; @@ -802,8 +747,7 @@ Int cnt = (Int) termFreqMap.get(word); if (cnt == null) { termFreqMap.put(word, new Int()); - } - else { + } else { cnt.x++; } } @@ -847,7 +791,7 @@ * For an easier method to call see {@link #retrieveInterestingTerms retrieveInterestingTerms()}. * * @param r the reader that has the content of the document - * @return the most intresting words in the document ordered by score, with the highest scoring, or best entry, first + * @return the most interesting words in the document ordered by score, with the highest scoring, or best entry, first * * @see #retrieveInterestingTerms */ @@ -861,6 +805,23 @@ } /** + * @see #retrieveInterestingTerms(java.io.Reader) + */ + public String[] retrieveInterestingTerms(int docNum) throws IOException { + ArrayList al = new ArrayList(maxQueryTerms); + PriorityQueue pq = retrieveTerms(docNum); + Object cur; + int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller... + // we just want to return the top words + while (((cur = pq.pop()) != null) && lim-- > 0) { + Object[] ar = (Object[]) cur; + al.add(ar[0]); // the 1st entry is the interesting word + } + String[] res = new String[al.size()]; + return (String[]) al.toArray(res); + } + + /** * Convenience routine to make it easy to return the most interesting words in a document. * More advanced users will call {@link #retrieveTerms(java.io.Reader) retrieveTerms()} directly. * @param r the source document @@ -869,18 +830,18 @@ * @see #retrieveTerms(java.io.Reader) * @see #setMaxQueryTerms */ - public String[] retrieveInterestingTerms( Reader r) throws IOException { - ArrayList al = new ArrayList( maxQueryTerms); - PriorityQueue pq = retrieveTerms( r); - int lim = maxQueryTerms; - // have to be careful, retrieveTerms returns all words - // but that's probably not useful to our caller... + public String[] retrieveInterestingTerms(Reader r) throws IOException { + ArrayList al = new ArrayList(maxQueryTerms); + PriorityQueue pq = retrieveTerms(r); + Object cur; + int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller... // we just want to return the top words - for (Object cur = pq.pop(); cur != null && lim-- > 0; cur = pq.pop()) { + while (((cur = pq.pop()) != null) && lim-- > 0) { Object[] ar = (Object[]) cur; al.add(ar[0]); // the 1st entry is the interesting word } - return (String[]) al.toArray(new String[al.size()]); + String[] res = new String[al.size()]; + return (String[]) al.toArray(res); } /** Index: src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java (working copy) @@ -264,9 +264,6 @@ PersistentIndex index = new PersistentIndex(name, handler.getTextAnalyzer(), handler.getSimilarity(), cache, indexingQueue, directoryManager); - index.setMaxMergeDocs(handler.getMaxMergeDocs()); - index.setMergeFactor(handler.getMergeFactor()); - index.setMinMergeDocs(handler.getMinMergeDocs()); index.setMaxFieldLength(handler.getMaxFieldLength()); index.setUseCompoundFile(handler.getUseCompoundFile()); index.setTermInfosIndexDivisor(handler.getTermInfosIndexDivisor()); @@ -570,9 +567,6 @@ PersistentIndex index = new PersistentIndex(indexName, handler.getTextAnalyzer(), handler.getSimilarity(), cache, indexingQueue, directoryManager); - index.setMaxMergeDocs(handler.getMaxMergeDocs()); - index.setMergeFactor(handler.getMergeFactor()); - index.setMinMergeDocs(handler.getMinMergeDocs()); index.setMaxFieldLength(handler.getMaxFieldLength()); index.setUseCompoundFile(handler.getUseCompoundFile()); index.setTermInfosIndexDivisor(handler.getTermInfosIndexDivisor()); Index: src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java (working copy) @@ -185,17 +185,17 @@ // UUID doc.add(new Field( FieldNames.UUID, node.getNodeId().getUUID().toString(), - Field.Store.YES, Field.Index.NO_NORMS, Field.TermVector.NO)); + Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); try { // parent UUID if (node.getParentId() == null) { // root node - doc.add(new Field(FieldNames.PARENT, "", Field.Store.YES, Field.Index.NO_NORMS, Field.TermVector.NO)); + doc.add(new Field(FieldNames.PARENT, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); addNodeName(doc, "", ""); } else { doc.add(new Field( FieldNames.PARENT, node.getParentId().toString(), - Field.Store.YES, Field.Index.NO_NORMS, Field.TermVector.NO)); + Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); NodeState parent = (NodeState) stateProvider.getItemState(node.getParentId()); ChildNodeEntry child = parent.getChildNodeEntry(node.getNodeId()); if (child == null) { @@ -276,7 +276,7 @@ private void addMVPName(Document doc, Name name) { try { String propName = resolver.getJCRName(name); - doc.add(new Field(FieldNames.MVP, propName, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); + doc.add(new Field(FieldNames.MVP, propName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); } catch (NamespaceException e) { // will never happen, prefixes are created dynamically } @@ -377,7 +377,7 @@ } catch (NamespaceException e) { // will never happen } - doc.add(new Field(FieldNames.PROPERTIES_SET, fieldName, Field.Store.NO, Field.Index.NO_NORMS)); + doc.add(new Field(FieldNames.PROPERTIES_SET, fieldName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } /** @@ -485,7 +485,7 @@ } else { return new Field(FieldNames.PROPERTIES, FieldNames.createNamedValue(fieldName, internalValue), - Field.Store.NO, Field.Index.NO_NORMS, + Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO); } } @@ -669,7 +669,7 @@ + FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1); Field f = new Field(fieldName, stringValue, Field.Store.NO, - Field.Index.TOKENIZED, + Field.Index.ANALYZED, Field.TermVector.NO); f.setBoost(boost); doc.add(f); @@ -746,10 +746,10 @@ stored = Field.Store.YES; } return new Field(FieldNames.FULLTEXT, value, stored, - Field.Index.TOKENIZED, tv); + Field.Index.ANALYZED, tv); } else { return new Field(FieldNames.FULLTEXT, value, - Field.Store.NO, Field.Index.TOKENIZED, tv); + Field.Store.NO, Field.Index.ANALYZED, tv); } } @@ -863,7 +863,7 @@ } doc.add(new Field(FieldNames.PROPERTY_LENGTHS, FieldNames.createNamedLength(propertyName, length), - Field.Store.NO, Field.Index.NO_NORMS)); + Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } /** @@ -878,11 +878,11 @@ String namespaceURI, String localName) throws NamespaceException { String name = mappings.getPrefix(namespaceURI) + ":" + localName; - doc.add(new Field(FieldNames.LABEL, name, Field.Store.NO, Field.Index.NO_NORMS)); + doc.add(new Field(FieldNames.LABEL, name, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); // as of version 3, also index combination of namespace URI and local name if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) { - doc.add(new Field(FieldNames.NAMESPACE_URI, namespaceURI, Field.Store.NO, Field.Index.NO_NORMS)); - doc.add(new Field(FieldNames.LOCAL_NAME, localName, Field.Store.NO, Field.Index.NO_NORMS)); + doc.add(new Field(FieldNames.NAMESPACE_URI, namespaceURI, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(FieldNames.LOCAL_NAME, localName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } } } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java (working copy) @@ -79,24 +79,6 @@ } /** - * Merges another index into this persistent index. Before index - * is merged, {@link AbstractIndex#commit()} is called on that - * index. - * - * @param index the other index to merge. - * @throws IOException if an error occurs while merging. - */ - void mergeIndex(AbstractIndex index) throws IOException { - // commit changes to directory on other index. - index.commit(); - // merge index - getIndexWriter().addIndexes(new Directory[]{ - index.getDirectory() - }); - invalidateSharedReader(); - } - - /** * Merges the provided indexes into this index. After this completes, the * index is optimized. *

Index: src/main/java/org/apache/jackrabbit/core/query/lucene/RangeQuery.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/RangeQuery.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/RangeQuery.java (working copy) @@ -22,13 +22,11 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Similarity; -import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermDocs; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.BitSet; @@ -40,19 +38,11 @@ import java.util.Set; /** - * Implements a variant of the lucene class {@link org.apache.lucene.search.RangeQuery}. - * This class does not rewrite to basic {@link org.apache.lucene.search.TermQuery} - * but will calculate the matching documents itself. That way a - * TooManyClauses can be avoided. + * Implements a lucene range query. */ public class RangeQuery extends Query implements Transformable { /** - * Logger instance for this class. - */ - private static final Logger log = LoggerFactory.getLogger(RangeQuery.class); - - /** * The lower term. May be null if upperTerm is not * null. */ @@ -76,12 +66,6 @@ private int transform = TRANSFORM_NONE; /** - * The rewritten range query or null if the range spans more - * than {@link org.apache.lucene.search.BooleanQuery#maxClauseCount} terms. - */ - private Query stdRangeQuery; - - /** * Creates a new RangeQuery. The lower or the upper term may be * null, but not both! * @@ -130,9 +114,8 @@ } /** - * Tries to rewrite this query into a standard lucene RangeQuery. - * This rewrite might fail with a TooManyClauses exception. If that - * happens, we use our own implementation. + * Rewrites this query into a {@link ConstantScoreRangeQuery} if + * {@link #transform} is {@link #TRANSFORM_NONE}. * * @param reader the index reader. * @return the rewritten query or this query if rewriting is not possible. @@ -140,16 +123,9 @@ */ public Query rewrite(IndexReader reader) throws IOException { if (transform == TRANSFORM_NONE) { - Query stdRangeQueryImpl - = new org.apache.lucene.search.RangeQuery(lowerTerm, upperTerm, inclusive); - try { - stdRangeQuery = stdRangeQueryImpl.rewrite(reader); - return stdRangeQuery; - } catch (BooleanQuery.TooManyClauses e) { - log.debug("Too many terms to enumerate, using custom RangeQuery"); - // failed, use own implementation - return this; - } + return new ConstantScoreRangeQuery(lowerTerm.field(), + lowerTerm.text(), upperTerm.text(), inclusive, + inclusive).rewrite(reader); } else { // always use our implementation when we need to transform the // term enum @@ -194,9 +170,7 @@ * {@inheritDoc} */ public void extractTerms(Set terms) { - if (stdRangeQuery != null) { - stdRangeQuery.extractTerms(terms); - } + // cannot extract terms } /** @@ -503,7 +477,7 @@ * @param other the other String. * @param offset start comparing the two strings at offset. * @return see {@link String#compareTo(Object)}. But also respects {@link - * #transform}. + * RangeQuery#transform}. */ private int termCompare(String text, String other, int offset) { OffsetCharSequence seq1 = new OffsetCharSequence(offset, text, transform); Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (working copy) @@ -1190,7 +1190,7 @@ doc.add(new Field(FieldNames.AGGREGATED_NODE_UUID, aggregates[j].getNodeId().getUUID().toString(), Field.Store.NO, - Field.Index.NO_NORMS)); + Field.Index.NOT_ANALYZED_NO_NORMS)); } } // only use first aggregate definition that matches Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SingletonTokenStream.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/SingletonTokenStream.java (revision 723728) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/SingletonTokenStream.java (working copy) @@ -16,6 +16,8 @@ */ package org.apache.jackrabbit.core.query.lucene; +import java.io.IOException; + import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.Payload; @@ -28,11 +30,16 @@ public final class SingletonTokenStream extends TokenStream { /** - * The single token to return. + * The string value of the token. */ - private Token t; + private String value; /** + * The payload of the token. + */ + private final Payload payload; + + /** * Creates a new SingleTokenStream with the given value and a property * type. * @@ -40,19 +47,23 @@ * @param type the JCR property type. */ public SingletonTokenStream(String value, int type) { - super(); - t = new Token(value, 0, value.length()); - t.setPayload(new Payload(new PropertyMetaData(type).toByteArray())); + this.value = value; + this.payload = new Payload(new PropertyMetaData(type).toByteArray()); } /** * {@inheritDoc} */ - public Token next() { - try { - return t; - } finally { - t = null; + public Token next(Token reusableToken) throws IOException { + if (value == null) { + return null; } + reusableToken.clear(); + reusableToken.setTermBuffer(value); + reusableToken.setPayload(payload); + reusableToken.setStartOffset(0); + reusableToken.setEndOffset(value.length()); + value = null; + return reusableToken; } } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SortedLuceneQueryHits.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/SortedLuceneQueryHits.java (revision 0) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/SortedLuceneQueryHits.java (revision 0) @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopFieldDocCollector; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.index.IndexReader; +import org.apache.jackrabbit.core.NodeId; +import org.apache.jackrabbit.uuid.UUID; + +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; + +/** + * Wraps a lucene query result and adds a close method that allows to release + * resources after a query has been executed and the results have been read + * completely. + */ +public final class SortedLuceneQueryHits extends AbstractQueryHits { + + /** + * The IndexReader in use by the lucene hits. + */ + private final IndexReader reader; + + /** + * The index searcher. + */ + private final JackrabbitIndexSearcher searcher; + + /** + * The query to execute. + */ + private final Query query; + + /** + * The sort criteria. + */ + private final Sort sort; + + /** + * The index of the current hit. Initially invalid. + */ + private int hitIndex = -1; + + /** + * The score nodes. + */ + private final List scoreNodes = new ArrayList(); + + /** + * The total number of hits. + */ + private int size; + + /** + * Number of hits to retrieve. + */ + private int numHits = 50; + + /** + * Creates a new QueryHits instance wrapping hits. + * + * @param reader the IndexReader in use. + * @param searcher the index searcher. + * @param query the query to execute. + * @param sort the sort criteria. + * @throws IOException if an error occurs while reading from the index. + */ + public SortedLuceneQueryHits(IndexReader reader, + JackrabbitIndexSearcher searcher, + Query query, + Sort sort) throws IOException { + this.reader = reader; + this.searcher = searcher; + this.query = query; + this.sort = sort; + getHits(); + } + + /** + * {@inheritDoc} + */ + public int getSize() { + return size; + } + + /** + * {@inheritDoc} + */ + public ScoreNode nextScoreNode() throws IOException { + if (++hitIndex >= size) { + // no more score nodes + return null; + } else if (hitIndex >= scoreNodes.size()) { + // refill + getHits(); + } + return (ScoreNode) scoreNodes.get(hitIndex); + } + + /** + * Skips n hits. + * + * @param n the number of hits to skip. + * @throws IOException if an error occurs while skipping. + */ + public void skip(int n) throws IOException { + hitIndex += n; + } + + //-------------------------------< internal >------------------------------- + + private int getHits() throws IOException { + // double hits + numHits *= 2; + TopFieldDocCollector collector = new TopFieldDocCollector(reader, sort, numHits); + searcher.search(query, collector); + this.size = collector.getTotalHits(); + ScoreDoc[] docs = collector.topDocs().scoreDocs; + int num = 0; + for (int i = scoreNodes.size(); i < docs.length; i++) { + String uuid = reader.document(docs[i].doc).get(FieldNames.UUID); + NodeId id = new NodeId(UUID.fromString(uuid)); + scoreNodes.add(new ScoreNode(id, docs[i].score)); + num++; + } + return num; + } +} Property changes on: src\main\java\org\apache\jackrabbit\core\query\lucene\SortedLuceneQueryHits.java ___________________________________________________________________ Added: svn:eol-style + native