Index: src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java (working copy)
@@ -233,11 +233,12 @@
final SortedMap termMap = new TreeMap();
Reader r = new StringReader(text);
TokenStream ts = index.getTextAnalyzer().tokenStream("", r);
- Token t;
+ Token t = new Token();
try {
- while ((t = ts.next()) != null) {
+ while ((t = ts.next(t)) != null) {
+ String termText = t.term();
TermVectorOffsetInfo[] info =
- (TermVectorOffsetInfo[]) termMap.get(t.termText());
+ (TermVectorOffsetInfo[]) termMap.get(termText);
if (info == null) {
info = new TermVectorOffsetInfo[1];
} else {
@@ -247,7 +248,7 @@
}
info[info.length - 1] = new TermVectorOffsetInfo(
t.startOffset(), t.endOffset());
- termMap.put(t.termText(), info);
+ termMap.put(termText, info);
}
} catch (IOException e) {
// should never happen, we are reading from a string
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (working copy)
@@ -20,8 +20,6 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.SerialMergeScheduler;
-import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -80,15 +78,6 @@
/** Compound file flag */
private boolean useCompoundFile = true;
- /** minMergeDocs config parameter */
- private int minMergeDocs = SearchIndex.DEFAULT_MIN_MERGE_DOCS;
-
- /** maxMergeDocs config parameter */
- private int maxMergeDocs = SearchIndex.DEFAULT_MAX_MERGE_DOCS;
-
- /** mergeFactor config parameter */
- private int mergeFactor = SearchIndex.DEFAULT_MERGE_FACTOR;
-
/** maxFieldLength config parameter */
private int maxFieldLength = SearchIndex.DEFAULT_MAX_FIELD_LENGTH;
@@ -145,7 +134,8 @@
this.isExisting = IndexReader.indexExists(directory);
if (!isExisting) {
- indexWriter = new IndexWriter(directory, analyzer);
+ indexWriter = new IndexWriter(directory, analyzer,
+ IndexWriter.MaxFieldLength.LIMITED);
// immediately close, now that index has been created
indexWriter.close();
indexWriter = null;
@@ -302,7 +292,7 @@
}
if (sharedReader == null) {
// create new shared reader
- IndexReader reader = IndexReader.open(getDirectory());
+ IndexReader reader = IndexReader.open(getDirectory(), true);
reader.setTermInfosIndexDivisor(termInfosIndexDivisor);
CachingIndexReader cr = new CachingIndexReader(
reader, cache, initCache);
@@ -339,18 +329,11 @@
indexReader = null;
}
if (indexWriter == null) {
- indexWriter = new IndexWriter(getDirectory(), analyzer);
+ indexWriter = new IndexWriter(getDirectory(), analyzer,
+ new IndexWriter.MaxFieldLength(maxFieldLength));
indexWriter.setSimilarity(similarity);
- // since lucene 2.0 setMaxBuffereDocs is equivalent to previous minMergeDocs attribute
- indexWriter.setMaxBufferedDocs(minMergeDocs);
- indexWriter.setMaxMergeDocs(maxMergeDocs);
- indexWriter.setMergeFactor(mergeFactor);
- indexWriter.setMaxFieldLength(maxFieldLength);
indexWriter.setUseCompoundFile(useCompoundFile);
indexWriter.setInfoStream(STREAM_LOGGER);
- indexWriter.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
- indexWriter.setMergeScheduler(new SerialMergeScheduler());
- indexWriter.setMergePolicy(new LogDocMergePolicy());
}
return indexWriter;
}
@@ -372,12 +355,12 @@
*/
protected synchronized void commit(boolean optimize) throws IOException {
if (indexReader != null) {
+ log.debug("committing IndexReader.");
indexReader.flush();
}
if (indexWriter != null) {
log.debug("committing IndexWriter.");
- indexWriter.close();
- indexWriter = null;
+ indexWriter.commit();
}
// optimize if requested
if (optimize) {
@@ -484,7 +467,7 @@
Document copy = new Document();
// mark the document that reindexing is required
copy.add(new Field(FieldNames.REINDEXING_REQUIRED, "",
- Field.Store.NO, Field.Index.NO_NORMS));
+ Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
Iterator fields = doc.getFields().iterator();
while (fields.hasNext()) {
Fieldable f = (Fieldable) fields.next();
@@ -532,37 +515,6 @@
}
/**
- * The lucene index writer property: minMergeDocs
- */
- void setMinMergeDocs(int minMergeDocs) {
- this.minMergeDocs = minMergeDocs;
- if (indexWriter != null) {
- // since lucene 2.0 setMaxBuffereDocs is equivalent to previous minMergeDocs attribute
- indexWriter.setMaxBufferedDocs(minMergeDocs);
- }
- }
-
- /**
- * The lucene index writer property: maxMergeDocs
- */
- void setMaxMergeDocs(int maxMergeDocs) {
- this.maxMergeDocs = maxMergeDocs;
- if (indexWriter != null) {
- indexWriter.setMaxMergeDocs(maxMergeDocs);
- }
- }
-
- /**
- * The lucene index writer property: mergeFactor
- */
- void setMergeFactor(int mergeFactor) {
- this.mergeFactor = mergeFactor;
- if (indexWriter != null) {
- indexWriter.setMergeFactor(mergeFactor);
- }
- }
-
- /**
* The lucene index writer property: maxFieldLength
*/
void setMaxFieldLength(int maxFieldLength) {
@@ -600,9 +552,9 @@
if (!f.isIndexed()) {
return Field.Index.NO;
} else if (f.isTokenized()) {
- return Field.Index.TOKENIZED;
+ return Field.Index.ANALYZED;
} else {
- return Field.Index.UN_TOKENIZED;
+ return Field.Index.NOT_ANALYZED;
}
}
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMigration.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMigration.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/IndexMigration.java (working copy)
@@ -101,7 +101,8 @@
Directory migrationDir = directoryManager.getDirectory(migrationName);
try {
- IndexWriter writer = new IndexWriter(migrationDir, new JackrabbitAnalyzer());
+ IndexWriter writer = new IndexWriter(migrationDir, new JackrabbitAnalyzer(),
+ IndexWriter.MaxFieldLength.UNLIMITED);
try {
IndexReader r = new MigrationIndexReader(
IndexReader.open(index.getDirectory()));
@@ -147,7 +148,7 @@
String value = fields[i].stringValue();
value = value.replace('\uFFFF', '[');
doc.add(new Field(FieldNames.PROPERTIES, value,
- Field.Store.YES, Field.Index.NO_NORMS));
+ Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
}
}
return doc;
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitIndexSearcher.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitIndexSearcher.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitIndexSearcher.java (working copy)
@@ -80,7 +80,11 @@
hits = ((JackrabbitQuery) query).execute(this, session, sort);
}
if (hits == null) {
- hits = new LuceneQueryHits(search(query, sort), reader);
+ if (sort == null) {
+ hits = new LuceneQueryHits(reader, this, query);
+ } else {
+ hits = new SortedLuceneQueryHits(reader, this, query, sort);
+ }
}
return hits;
}
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java (working copy)
@@ -73,7 +73,7 @@
boolean withOffsets) {
super(name,
store ? Field.Store.YES : Field.Store.NO,
- Field.Index.TOKENIZED,
+ Field.Index.ANALYZED,
withOffsets ? Field.TermVector.WITH_OFFSETS : Field.TermVector.NO);
this.reader = reader;
}
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryHits.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryHits.java (revision 714004)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryHits.java (working copy)
@@ -16,84 +16,75 @@
*/
package org.apache.jackrabbit.core.query.lucene;
-import org.apache.lucene.search.Hits;
+import java.io.IOException;
+
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Scorer;
import org.apache.lucene.index.IndexReader;
import org.apache.jackrabbit.core.NodeId;
+import org.apache.jackrabbit.uuid.UUID;
-import java.io.IOException;
-
/**
- * Wraps the lucene Hits object and adds a close method that allows
- * to release resources after a query has been executed and the results have
- * been read completely.
+ * Wraps a lucene query result and adds a close method that allows to release
+ * resources after a query has been executed and the results have been read
+ * completely.
*/
-public class LuceneQueryHits extends AbstractQueryHits {
+public class LuceneQueryHits implements QueryHits {
/**
- * The lucene hits we wrap.
- */
- private final Hits hits;
-
- /**
* The IndexReader in use by the lucene hits.
*/
private final IndexReader reader;
/**
- * The index of the current hit. Initially invalid.
+ * The scorer for the query.
*/
- private int hitIndex = -1;
+ private final Scorer scorer;
- /**
- * Creates a new QueryHits instance wrapping hits.
- * @param hits the lucene hits.
- * @param reader the IndexReader in use by hits.
- */
- public LuceneQueryHits(Hits hits, IndexReader reader) {
- this.hits = hits;
+ public LuceneQueryHits(IndexReader reader,
+ IndexSearcher searcher,
+ Query query)
+ throws IOException {
this.reader = reader;
+ this.scorer = query.weight(searcher).scorer(reader);
}
/**
* {@inheritDoc}
*/
- public final int getSize() {
- return hits.length();
+ public ScoreNode nextScoreNode() throws IOException {
+ if (!scorer.next()) {
+ return null;
+ }
+ String uuid = reader.document(scorer.doc()).get(FieldNames.UUID);
+ NodeId id = new NodeId(UUID.fromString(uuid));
+ return new ScoreNode(id, scorer.score());
}
/**
* {@inheritDoc}
*/
- public final ScoreNode nextScoreNode() throws IOException {
- if (++hitIndex >= hits.length()) {
- return null;
- }
- String uuid = reader.document(id(hitIndex), FieldSelectors.UUID).get(FieldNames.UUID);
- return new ScoreNode(NodeId.valueOf(uuid), hits.score(hitIndex));
+ public void close() throws IOException {
+ // make sure scorer frees resources
+ scorer.skipTo(Integer.MAX_VALUE);
}
/**
- * Skips n hits.
- *
- * @param n the number of hits to skip.
- * @throws IOException if an error occurs while skipping.
+ * @return always -1.
*/
- public void skip(int n) throws IOException {
- hitIndex += n;
+ public int getSize() {
+ return -1;
}
- //-------------------------------< internal >-------------------------------
-
/**
- * Returns the document number for the nth document
- * in this QueryHits.
- *
- * @param n index.
- * @return the document number for the nth
- * document.
- * @throws IOException if an error occurs.
+ * {@inheritDoc}
*/
- private final int id(int n) throws IOException {
- return hits.id(n);
+ public void skip(int n) throws IOException {
+ while (n-- > 0) {
+ if (nextScoreNode() == null) {
+ return;
+ }
+ }
}
}
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/MoreLikeThis.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/MoreLikeThis.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/MoreLikeThis.java (working copy)
@@ -25,10 +25,9 @@
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Hits;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
@@ -41,7 +40,6 @@
import java.io.IOException;
import java.io.Reader;
import java.io.File;
-import java.io.PrintStream;
import java.io.StringReader;
import java.io.FileReader;
import java.io.InputStreamReader;
@@ -86,9 +84,6 @@
*
*
@@ -144,9 +139,6 @@
* - optimise: when no termvector support available - used maxNumTermsParsed to limit amount of tokenization
*
*
- * @author David Spencer
- * @author Bruce Ritchie
- * @author Mark Harwood
*/
public final class MoreLikeThis {
@@ -174,7 +166,7 @@
* @see #getMinDocFreq
* @see #setMinDocFreq
*/
- public static final int DEFALT_MIN_DOC_FREQ = 5;
+ public static final int DEFAULT_MIN_DOC_FREQ = 5;
/**
* Boost terms in query based on score.
@@ -239,7 +231,7 @@
/**
* Ignore words which do not occur in at least this many docs.
*/
- private int minDocFreq = DEFALT_MIN_DOC_FREQ;
+ private int minDocFreq = DEFAULT_MIN_DOC_FREQ;
/**
* Should we apply a boost to the Query based on the scores?
@@ -274,7 +266,7 @@
/**
* For idf() calculations.
*/
- private Similarity similarity = new DefaultSimilarity();
+ private Similarity similarity;// = new DefaultSimilarity();
/**
* IndexReader to use
@@ -285,10 +277,24 @@
* Constructor requiring an IndexReader.
*/
public MoreLikeThis(IndexReader ir) {
- this.ir = ir;
+ this(ir, new DefaultSimilarity());
}
- /**
+ public MoreLikeThis(IndexReader ir, Similarity sim){
+ this.ir = ir;
+ this.similarity = sim;
+ }
+
+
+ public Similarity getSimilarity() {
+ return similarity;
+ }
+
+ public void setSimilarity(Similarity similarity) {
+ this.similarity = similarity;
+ }
+
+ /**
* Returns an analyzer that will be used to parse source doc with. The default analyzer
* is the {@link #DEFAULT_ANALYZER}.
*
@@ -330,7 +336,7 @@
/**
* Returns the frequency at which words will be ignored which do not occur in at least this
- * many docs. The default frequency is {@link #DEFALT_MIN_DOC_FREQ}.
+ * many docs. The default frequency is {@link #DEFAULT_MIN_DOC_FREQ}.
*
* @return the frequency at which words will be ignored which do not occur in at least this
* many docs.
@@ -595,12 +601,11 @@
int numDocs = ir.numDocs();
FreqQ res = new FreqQ(words.size()); // will order words by score
- Iterator it = words.entrySet().iterator();
+ Iterator it = words.keySet().iterator();
while (it.hasNext()) { // for every word
- Map.Entry entry = (Map.Entry) it.next();
- String word = (String) entry.getKey();
+ String word = (String) it.next();
- int tf = ((Int) entry.getValue()).x; // term freq in the source doc
+ int tf = ((Int) words.get(word)).x; // term freq in the source doc
if (minTermFreq > 0 && tf < minTermFreq) {
continue; // filter out words that don't occur enough times in the source
}
@@ -645,7 +650,7 @@
sb.append("\t" + "maxQueryTerms : " + maxQueryTerms + "\n");
sb.append("\t" + "minWordLen : " + minWordLen + "\n");
sb.append("\t" + "maxWordLen : " + maxWordLen + "\n");
- sb.append("\t" + "fieldNames : \"");
+ sb.append("\t" + "fieldNames : ");
String delim = "";
for (int i = 0; i < fieldNames.length; i++) {
String fieldName = fieldNames[i];
@@ -660,72 +665,11 @@
}
/**
- * Test driver.
- * Pass in "-i INDEX" and then either "-fn FILE" or "-url URL".
- */
- public static void main(String[] a) throws Throwable {
- String indexName = "localhost_index";
- String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en";
- URL url = null;
- for (int i = 0; i < a.length; i++) {
- if (a[i].equals("-i")) {
- indexName = a[++i];
- }
- else if (a[i].equals("-f")) {
- fn = a[++i];
- }
- else if (a[i].equals("-url")) {
- url = new URL(a[++i]);
- }
- }
-
- PrintStream o = System.out;
- IndexReader r = IndexReader.open(indexName);
- o.println("Open index " + indexName + " which has " + r.numDocs() + " docs");
-
- MoreLikeThis mlt = new MoreLikeThis(r);
-
- o.println("Query generation parameters:");
- o.println(mlt.describeParams());
- o.println();
-
- Query query = null;
- if (url != null) {
- o.println("Parsing URL: " + url);
- query = mlt.like(url);
- }
- else if (fn != null) {
- o.println("Parsing file: " + fn);
- query = mlt.like(new File(fn));
- }
-
- o.println("q: " + query);
- o.println();
- IndexSearcher searcher = new IndexSearcher(indexName);
-
- Hits hits = searcher.search(query);
- int len = hits.length();
- o.println("found: " + len + " documents matching");
- o.println();
- for (int i = 0; i < Math.min(25, len); i++) {
- Document d = hits.doc(i);
- String summary = d.get( "summary");
- o.println("score : " + hits.score(i));
- o.println("url : " + d.get("url"));
- o.println("\ttitle : " + d.get("title"));
- if (summary != null) {
- o.println("\tsummary: " + d.get("summary"));
- }
- o.println();
- }
- }
-
- /**
* Find words for a more-like-this query former.
*
* @param docNum the id of the lucene document from which to find terms
*/
- private PriorityQueue retrieveTerms(int docNum) throws IOException {
+ public PriorityQueue retrieveTerms(int docNum) throws IOException {
Map termFreqMap = new HashMap();
for (int i = 0; i < fieldNames.length; i++) {
String fieldName = fieldNames[i];
@@ -786,10 +730,11 @@
private void addTermFrequencies(Reader r, Map termFreqMap, String fieldName)
throws IOException {
TokenStream ts = analyzer.tokenStream(fieldName, r);
- org.apache.lucene.analysis.Token token;
int tokenCount = 0;
- while ((token = ts.next()) != null) { // for every token
- String word = token.termText();
+ // for every token
+ final Token reusableToken = new Token();
+ for (Token nextToken = ts.next(reusableToken); nextToken != null; nextToken = ts.next(reusableToken)) {
+ String word = nextToken.term();
tokenCount++;
if (tokenCount > maxNumTokensParsed) {
break;
@@ -802,8 +747,7 @@
Int cnt = (Int) termFreqMap.get(word);
if (cnt == null) {
termFreqMap.put(word, new Int());
- }
- else {
+ } else {
cnt.x++;
}
}
@@ -847,7 +791,7 @@
* For an easier method to call see {@link #retrieveInterestingTerms retrieveInterestingTerms()}.
*
* @param r the reader that has the content of the document
- * @return the most intresting words in the document ordered by score, with the highest scoring, or best entry, first
+ * @return the most interesting words in the document ordered by score, with the highest scoring, or best entry, first
*
* @see #retrieveInterestingTerms
*/
@@ -861,6 +805,23 @@
}
/**
+ * @see #retrieveInterestingTerms(java.io.Reader)
+ */
+ public String[] retrieveInterestingTerms(int docNum) throws IOException {
+ ArrayList al = new ArrayList(maxQueryTerms);
+ PriorityQueue pq = retrieveTerms(docNum);
+ Object cur;
+ int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
+ // we just want to return the top words
+ while (((cur = pq.pop()) != null) && lim-- > 0) {
+ Object[] ar = (Object[]) cur;
+ al.add(ar[0]); // the 1st entry is the interesting word
+ }
+ String[] res = new String[al.size()];
+ return (String[]) al.toArray(res);
+ }
+
+ /**
* Convenience routine to make it easy to return the most interesting words in a document.
* More advanced users will call {@link #retrieveTerms(java.io.Reader) retrieveTerms()} directly.
* @param r the source document
@@ -869,18 +830,18 @@
* @see #retrieveTerms(java.io.Reader)
* @see #setMaxQueryTerms
*/
- public String[] retrieveInterestingTerms( Reader r) throws IOException {
- ArrayList al = new ArrayList( maxQueryTerms);
- PriorityQueue pq = retrieveTerms( r);
- int lim = maxQueryTerms;
- // have to be careful, retrieveTerms returns all words
- // but that's probably not useful to our caller...
+ public String[] retrieveInterestingTerms(Reader r) throws IOException {
+ ArrayList al = new ArrayList(maxQueryTerms);
+ PriorityQueue pq = retrieveTerms(r);
+ Object cur;
+ int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
// we just want to return the top words
- for (Object cur = pq.pop(); cur != null && lim-- > 0; cur = pq.pop()) {
+ while (((cur = pq.pop()) != null) && lim-- > 0) {
Object[] ar = (Object[]) cur;
al.add(ar[0]); // the 1st entry is the interesting word
}
- return (String[]) al.toArray(new String[al.size()]);
+ String[] res = new String[al.size()];
+ return (String[]) al.toArray(res);
}
/**
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java (working copy)
@@ -264,9 +264,6 @@
PersistentIndex index = new PersistentIndex(name,
handler.getTextAnalyzer(), handler.getSimilarity(),
cache, indexingQueue, directoryManager);
- index.setMaxMergeDocs(handler.getMaxMergeDocs());
- index.setMergeFactor(handler.getMergeFactor());
- index.setMinMergeDocs(handler.getMinMergeDocs());
index.setMaxFieldLength(handler.getMaxFieldLength());
index.setUseCompoundFile(handler.getUseCompoundFile());
index.setTermInfosIndexDivisor(handler.getTermInfosIndexDivisor());
@@ -570,9 +567,6 @@
PersistentIndex index = new PersistentIndex(indexName,
handler.getTextAnalyzer(), handler.getSimilarity(),
cache, indexingQueue, directoryManager);
- index.setMaxMergeDocs(handler.getMaxMergeDocs());
- index.setMergeFactor(handler.getMergeFactor());
- index.setMinMergeDocs(handler.getMinMergeDocs());
index.setMaxFieldLength(handler.getMaxFieldLength());
index.setUseCompoundFile(handler.getUseCompoundFile());
index.setTermInfosIndexDivisor(handler.getTermInfosIndexDivisor());
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java (working copy)
@@ -185,17 +185,17 @@
// UUID
doc.add(new Field(
FieldNames.UUID, node.getNodeId().getUUID().toString(),
- Field.Store.YES, Field.Index.NO_NORMS, Field.TermVector.NO));
+ Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
try {
// parent UUID
if (node.getParentId() == null) {
// root node
- doc.add(new Field(FieldNames.PARENT, "", Field.Store.YES, Field.Index.NO_NORMS, Field.TermVector.NO));
+ doc.add(new Field(FieldNames.PARENT, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
addNodeName(doc, "", "");
} else {
doc.add(new Field(
FieldNames.PARENT, node.getParentId().toString(),
- Field.Store.YES, Field.Index.NO_NORMS, Field.TermVector.NO));
+ Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
NodeState parent = (NodeState) stateProvider.getItemState(node.getParentId());
ChildNodeEntry child = parent.getChildNodeEntry(node.getNodeId());
if (child == null) {
@@ -276,7 +276,7 @@
private void addMVPName(Document doc, Name name) {
try {
String propName = resolver.getJCRName(name);
- doc.add(new Field(FieldNames.MVP, propName, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
+ doc.add(new Field(FieldNames.MVP, propName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
} catch (NamespaceException e) {
// will never happen, prefixes are created dynamically
}
@@ -377,7 +377,7 @@
} catch (NamespaceException e) {
// will never happen
}
- doc.add(new Field(FieldNames.PROPERTIES_SET, fieldName, Field.Store.NO, Field.Index.NO_NORMS));
+ doc.add(new Field(FieldNames.PROPERTIES_SET, fieldName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
}
/**
@@ -485,7 +485,7 @@
} else {
return new Field(FieldNames.PROPERTIES,
FieldNames.createNamedValue(fieldName, internalValue),
- Field.Store.NO, Field.Index.NO_NORMS,
+ Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
Field.TermVector.NO);
}
}
@@ -669,7 +669,7 @@
+ FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1);
Field f = new Field(fieldName, stringValue,
Field.Store.NO,
- Field.Index.TOKENIZED,
+ Field.Index.ANALYZED,
Field.TermVector.NO);
f.setBoost(boost);
doc.add(f);
@@ -746,10 +746,10 @@
stored = Field.Store.YES;
}
return new Field(FieldNames.FULLTEXT, value, stored,
- Field.Index.TOKENIZED, tv);
+ Field.Index.ANALYZED, tv);
} else {
return new Field(FieldNames.FULLTEXT, value,
- Field.Store.NO, Field.Index.TOKENIZED, tv);
+ Field.Store.NO, Field.Index.ANALYZED, tv);
}
}
@@ -863,7 +863,7 @@
}
doc.add(new Field(FieldNames.PROPERTY_LENGTHS,
FieldNames.createNamedLength(propertyName, length),
- Field.Store.NO, Field.Index.NO_NORMS));
+ Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
}
/**
@@ -878,11 +878,11 @@
String namespaceURI,
String localName) throws NamespaceException {
String name = mappings.getPrefix(namespaceURI) + ":" + localName;
- doc.add(new Field(FieldNames.LABEL, name, Field.Store.NO, Field.Index.NO_NORMS));
+ doc.add(new Field(FieldNames.LABEL, name, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
// as of version 3, also index combination of namespace URI and local name
if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) {
- doc.add(new Field(FieldNames.NAMESPACE_URI, namespaceURI, Field.Store.NO, Field.Index.NO_NORMS));
- doc.add(new Field(FieldNames.LOCAL_NAME, localName, Field.Store.NO, Field.Index.NO_NORMS));
+ doc.add(new Field(FieldNames.NAMESPACE_URI, namespaceURI, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
+ doc.add(new Field(FieldNames.LOCAL_NAME, localName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
}
}
}
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/PersistentIndex.java (working copy)
@@ -79,24 +79,6 @@
}
/**
- * Merges another index into this persistent index. Before index
- * is merged, {@link AbstractIndex#commit()} is called on that
- * index.
- *
- * @param index the other index to merge.
- * @throws IOException if an error occurs while merging.
- */
- void mergeIndex(AbstractIndex index) throws IOException {
- // commit changes to directory on other index.
- index.commit();
- // merge index
- getIndexWriter().addIndexes(new Directory[]{
- index.getDirectory()
- });
- invalidateSharedReader();
- }
-
- /**
* Merges the provided indexes into this index. After this completes, the
* index is optimized.
*
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/RangeQuery.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/RangeQuery.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/RangeQuery.java (working copy)
@@ -22,13 +22,11 @@
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermDocs;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.BitSet;
@@ -40,19 +38,11 @@
import java.util.Set;
/**
- * Implements a variant of the lucene class {@link org.apache.lucene.search.RangeQuery}.
- * This class does not rewrite to basic {@link org.apache.lucene.search.TermQuery}
- * but will calculate the matching documents itself. That way a
- * TooManyClauses can be avoided.
+ * Implements a lucene range query.
*/
public class RangeQuery extends Query implements Transformable {
/**
- * Logger instance for this class.
- */
- private static final Logger log = LoggerFactory.getLogger(RangeQuery.class);
-
- /**
* The lower term. May be null if upperTerm is not
* null.
*/
@@ -76,12 +66,6 @@
private int transform = TRANSFORM_NONE;
/**
- * The rewritten range query or null if the range spans more
- * than {@link org.apache.lucene.search.BooleanQuery#maxClauseCount} terms.
- */
- private Query stdRangeQuery;
-
- /**
* Creates a new RangeQuery. The lower or the upper term may be
* null, but not both!
*
@@ -130,9 +114,8 @@
}
/**
- * Tries to rewrite this query into a standard lucene RangeQuery.
- * This rewrite might fail with a TooManyClauses exception. If that
- * happens, we use our own implementation.
+ * Rewrites this query into a {@link ConstantScoreRangeQuery} if
+ * {@link #transform} is {@link #TRANSFORM_NONE}.
*
* @param reader the index reader.
* @return the rewritten query or this query if rewriting is not possible.
@@ -140,16 +123,9 @@
*/
public Query rewrite(IndexReader reader) throws IOException {
if (transform == TRANSFORM_NONE) {
- Query stdRangeQueryImpl
- = new org.apache.lucene.search.RangeQuery(lowerTerm, upperTerm, inclusive);
- try {
- stdRangeQuery = stdRangeQueryImpl.rewrite(reader);
- return stdRangeQuery;
- } catch (BooleanQuery.TooManyClauses e) {
- log.debug("Too many terms to enumerate, using custom RangeQuery");
- // failed, use own implementation
- return this;
- }
+ return new ConstantScoreRangeQuery(lowerTerm.field(),
+ lowerTerm.text(), upperTerm.text(), inclusive,
+ inclusive).rewrite(reader);
} else {
// always use our implementation when we need to transform the
// term enum
@@ -194,9 +170,7 @@
* {@inheritDoc}
*/
public void extractTerms(Set terms) {
- if (stdRangeQuery != null) {
- stdRangeQuery.extractTerms(terms);
- }
+ // cannot extract terms
}
/**
@@ -503,7 +477,7 @@
* @param other the other String.
* @param offset start comparing the two strings at offset.
* @return see {@link String#compareTo(Object)}. But also respects {@link
- * #transform}.
+ * RangeQuery#transform}.
*/
private int termCompare(String text, String other, int offset) {
OffsetCharSequence seq1 = new OffsetCharSequence(offset, text, transform);
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (working copy)
@@ -1190,7 +1190,7 @@
doc.add(new Field(FieldNames.AGGREGATED_NODE_UUID,
aggregates[j].getNodeId().getUUID().toString(),
Field.Store.NO,
- Field.Index.NO_NORMS));
+ Field.Index.NOT_ANALYZED_NO_NORMS));
}
}
// only use first aggregate definition that matches
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SingletonTokenStream.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/SingletonTokenStream.java (revision 723728)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/SingletonTokenStream.java (working copy)
@@ -16,6 +16,8 @@
*/
package org.apache.jackrabbit.core.query.lucene;
+import java.io.IOException;
+
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Payload;
@@ -28,11 +30,16 @@
public final class SingletonTokenStream extends TokenStream {
/**
- * The single token to return.
+ * The string value of the token.
*/
- private Token t;
+ private String value;
/**
+ * The payload of the token.
+ */
+ private final Payload payload;
+
+ /**
* Creates a new SingleTokenStream with the given value and a property
* type.
*
@@ -40,19 +47,23 @@
* @param type the JCR property type.
*/
public SingletonTokenStream(String value, int type) {
- super();
- t = new Token(value, 0, value.length());
- t.setPayload(new Payload(new PropertyMetaData(type).toByteArray()));
+ this.value = value;
+ this.payload = new Payload(new PropertyMetaData(type).toByteArray());
}
/**
* {@inheritDoc}
*/
- public Token next() {
- try {
- return t;
- } finally {
- t = null;
+ public Token next(Token reusableToken) throws IOException {
+ if (value == null) {
+ return null;
}
+ reusableToken.clear();
+ reusableToken.setTermBuffer(value);
+ reusableToken.setPayload(payload);
+ reusableToken.setStartOffset(0);
+ reusableToken.setEndOffset(value.length());
+ value = null;
+ return reusableToken;
}
}
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SortedLuceneQueryHits.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/SortedLuceneQueryHits.java (revision 0)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/SortedLuceneQueryHits.java (revision 0)
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopFieldDocCollector;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.index.IndexReader;
+import org.apache.jackrabbit.core.NodeId;
+import org.apache.jackrabbit.uuid.UUID;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+/**
+ * Wraps a lucene query result and adds a close method that allows to release
+ * resources after a query has been executed and the results have been read
+ * completely.
+ */
+public final class SortedLuceneQueryHits extends AbstractQueryHits {
+
+ /**
+ * The IndexReader in use by the lucene hits.
+ */
+ private final IndexReader reader;
+
+ /**
+ * The index searcher.
+ */
+ private final JackrabbitIndexSearcher searcher;
+
+ /**
+ * The query to execute.
+ */
+ private final Query query;
+
+ /**
+ * The sort criteria.
+ */
+ private final Sort sort;
+
+ /**
+ * The index of the current hit. Initially invalid.
+ */
+ private int hitIndex = -1;
+
+ /**
+ * The score nodes.
+ */
+ private final List scoreNodes = new ArrayList();
+
+ /**
+ * The total number of hits.
+ */
+ private int size;
+
+ /**
+ * Number of hits to retrieve.
+ */
+ private int numHits = 50;
+
+ /**
+ * Creates a new QueryHits instance wrapping hits.
+ *
+ * @param reader the IndexReader in use.
+ * @param searcher the index searcher.
+ * @param query the query to execute.
+ * @param sort the sort criteria.
+ * @throws IOException if an error occurs while reading from the index.
+ */
+ public SortedLuceneQueryHits(IndexReader reader,
+ JackrabbitIndexSearcher searcher,
+ Query query,
+ Sort sort) throws IOException {
+ this.reader = reader;
+ this.searcher = searcher;
+ this.query = query;
+ this.sort = sort;
+ getHits();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public int getSize() {
+ return size;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public ScoreNode nextScoreNode() throws IOException {
+ if (++hitIndex >= size) {
+ // no more score nodes
+ return null;
+ } else if (hitIndex >= scoreNodes.size()) {
+ // refill
+ getHits();
+ }
+ return (ScoreNode) scoreNodes.get(hitIndex);
+ }
+
+ /**
+ * Skips n hits.
+ *
+ * @param n the number of hits to skip.
+ * @throws IOException if an error occurs while skipping.
+ */
+ public void skip(int n) throws IOException {
+ hitIndex += n;
+ }
+
+ //-------------------------------< internal >-------------------------------
+
+ private int getHits() throws IOException {
+ // double hits
+ numHits *= 2;
+ TopFieldDocCollector collector = new TopFieldDocCollector(reader, sort, numHits);
+ searcher.search(query, collector);
+ this.size = collector.getTotalHits();
+ ScoreDoc[] docs = collector.topDocs().scoreDocs;
+ int num = 0;
+ for (int i = scoreNodes.size(); i < docs.length; i++) {
+ String uuid = reader.document(docs[i].doc).get(FieldNames.UUID);
+ NodeId id = new NodeId(UUID.fromString(uuid));
+ scoreNodes.add(new ScoreNode(id, docs[i].score));
+ num++;
+ }
+ return num;
+ }
+}
Property changes on: src\main\java\org\apache\jackrabbit\core\query\lucene\SortedLuceneQueryHits.java
___________________________________________________________________
Added: svn:eol-style
+ native