Index: src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java (revision 754888) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java (working copy) @@ -17,12 +17,16 @@ package org.apache.jackrabbit.core.query.lucene; import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.Field; import org.apache.lucene.index.FilterIndexReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.CorruptIndexException; import org.apache.jackrabbit.uuid.UUID; +import org.apache.commons.collections.map.LRUMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,6 +35,7 @@ import java.util.Map; import java.util.HashMap; import java.util.Iterator; +import java.util.Collections; import java.text.NumberFormat; import EDU.oswego.cs.dl.util.concurrent.Executor; @@ -88,6 +93,16 @@ private final DocNumberCache cache; /** + * Maps document number to node UUID. + */ + private final Map docNumber2uuid; + + /** + * A cache of TermDocs that are regularly read from the index. + */ + private final TermDocsCache termDocsCache; + + /** * Creates a new CachingIndexReader based on * delegatee * @@ -116,6 +131,10 @@ // ignore } } + // limit cache to 1% of maxDoc(), but at least 10. + this.docNumber2uuid = Collections.synchronizedMap(new LRUMap( + Math.max(10, delegatee.maxDoc() / 100))); + this.termDocsCache = new TermDocsCache(delegatee, FieldNames.PROPERTIES); } /** @@ -193,13 +212,44 @@ //--------------------< FilterIndexReader overwrites >---------------------- /** + * Uses the {@link #docNumber2uuid} cache for document lookups that are only + * interested in the {@link FieldSelectors#UUID}. + * + * @param n the document number. + * @param fieldSelector the field selector. + * @return the document. + * @throws CorruptIndexException if the index is corrupt. + * @throws IOException if an error occurs while reading from the index. + */ + public Document document(int n, FieldSelector fieldSelector) + throws CorruptIndexException, IOException { + if (fieldSelector == FieldSelectors.UUID) { + Integer docNum = new Integer(n); + Document doc; + UUID uuid = (UUID) docNumber2uuid.get(docNum); + if (uuid == null) { + doc = super.document(n, fieldSelector); + uuid = UUID.fromString(doc.get(FieldNames.UUID)); + docNumber2uuid.put(docNum, uuid); + } else { + doc = new Document(); + doc.add(new Field(FieldNames.UUID, uuid.toString(), + Field.Store.YES, Field.Index.NO_NORMS)); + } + return doc; + } else { + return super.document(n, fieldSelector); + } + } + + /** * If the field of term is {@link FieldNames#UUID} this * CachingIndexReader returns a TermDocs instance * with a cached document id. If term has any other field * the call is delegated to the base IndexReader.
* If term is for a {@link FieldNames#UUID} field and this * CachingIndexReader does not have such a document, - * {@link #EMPTY} is returned. + * {@link EmptyTermDocs#INSTANCE} is returned. * * @param term the term to start the TermDocs enumeration. * @return a TermDocs instance. @@ -232,14 +282,14 @@ // and return return new SingleTermDocs(docs.doc()); } else { - return EMPTY; + return EmptyTermDocs.INSTANCE; } } finally { docs.close(); } } } - return super.termDocs(term); + return termDocsCache.termDocs(term); } /** @@ -476,39 +526,4 @@ this.uuid = uuid; } } - - /** - * Implements an empty TermDocs. - */ - static final TermDocs EMPTY = new TermDocs() { - - public void seek(Term term) { - } - - public void seek(TermEnum termEnum) { - } - - public int doc() { - return -1; - } - - public int freq() { - return -1; - } - - public boolean next() { - return false; - } - - public int read(int[] docs, int[] freqs) { - return 0; - } - - public boolean skipTo(int target) { - return false; - } - - public void close() { - } - }; } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/EmptyTermDocs.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/EmptyTermDocs.java (revision 0) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/EmptyTermDocs.java (revision 0) @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; + +/** + * EmptyTermDocs implements a TermDocs, which is empty. + */ +class EmptyTermDocs implements TermDocs { + + /** + * Single instance of this class. + */ + public static final TermDocs INSTANCE = new EmptyTermDocs(); + + private EmptyTermDocs() { + } + + public void seek(Term term) { + } + + public void seek(TermEnum termEnum) { + } + + public int doc() { + return -1; + } + + public int freq() { + return -1; + } + + public boolean next() { + return false; + } + + public int read(int[] docs, int[] freqs) { + return 0; + } + + public boolean skipTo(int target) { + return false; + } + + public void close() { + } +} Property changes on: src\main\java\org\apache\jackrabbit\core\query\lucene\EmptyTermDocs.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTermQuery.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTermQuery.java (revision 0) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTermQuery.java (revision 0) @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import java.io.IOException; + +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; + +/** + * JackrabbitTermQuery implements a {@link TermQuery} where score + * values are retrieved on a per index segment basis using {@link MultiScorer}. + */ +public class JackrabbitTermQuery extends TermQuery { + + private static final long serialVersionUID = 4244799812287335957L; + + public JackrabbitTermQuery(Term t) { + super(t); + } + + protected Weight createWeight(Searcher searcher) throws IOException { + return new JackrabbitTermWeight(searcher, super.createWeight(searcher)); + } + + /** + * The weight implementation. + */ + protected class JackrabbitTermWeight extends AbstractWeight { + + private static final long serialVersionUID = -2070964510010945854L; + + /** + * The default lucene TermQuery weight. + */ + private final Weight weight; + + public JackrabbitTermWeight(Searcher searcher, Weight weight) { + super(searcher); + this.weight = weight; + } + + /** + * {@inheritDoc} + */ + protected Scorer createScorer(IndexReader reader) throws IOException { + return weight.scorer(reader); + } + + /** + * {@inheritDoc} + */ + public Query getQuery() { + return JackrabbitTermQuery.this; + } + + /** + * {@inheritDoc} + */ + public float getValue() { + return weight.getValue(); + } + + /** + * {@inheritDoc} + */ + public float sumOfSquaredWeights() throws IOException { + return weight.sumOfSquaredWeights(); + } + + /** + * {@inheritDoc} + */ + public void normalize(float norm) { + weight.normalize(norm); + } + + /** + * {@inheritDoc} + */ + public Explanation explain(IndexReader reader, int doc) throws + IOException { + return weight.explain(reader, doc); + } + } +} Property changes on: src\main\java\org\apache\jackrabbit\core\query\lucene\JackrabbitTermQuery.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java (revision 754888) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java (working copy) @@ -385,7 +385,7 @@ switch (operator) { case OPERATOR_EQUAL_TO: - return new TermQuery(new Term(FieldNames.PROPERTY_LENGTHS, namedLength)); + return new JackrabbitTermQuery(new Term(FieldNames.PROPERTY_LENGTHS, namedLength)); case OPERATOR_GREATER_THAN: Term lower = new Term(FieldNames.PROPERTY_LENGTHS, namedLength); Term upper = new Term(FieldNames.PROPERTY_LENGTHS, @@ -412,7 +412,7 @@ Query all = Util.createMatchAllQuery(propName, version); BooleanQuery b = new BooleanQuery(); b.add(all, BooleanClause.Occur.SHOULD); - b.add(new TermQuery(new Term(FieldNames.PROPERTY_LENGTHS, namedLength)), + b.add(new JackrabbitTermQuery(new Term(FieldNames.PROPERTY_LENGTHS, namedLength)), BooleanClause.Occur.MUST_NOT); return b; default: @@ -450,7 +450,7 @@ switch (operator) { case OPERATOR_EQUAL_TO: - return new TermQuery(new Term(FieldNames.LOCAL_NAME, value)); + return new JackrabbitTermQuery(new Term(FieldNames.LOCAL_NAME, value)); case OPERATOR_GREATER_THAN: return new LocalNameRangeQuery(value, null, false); case OPERATOR_GREATER_THAN_OR_EQUAL_TO: @@ -469,7 +469,7 @@ MatchAllDocsQuery all = new MatchAllDocsQuery(); BooleanQuery b = new BooleanQuery(); b.add(all, BooleanClause.Occur.SHOULD); - b.add(new TermQuery(new Term(FieldNames.LOCAL_NAME, value)), + b.add(new JackrabbitTermQuery(new Term(FieldNames.LOCAL_NAME, value)), BooleanClause.Occur.MUST_NOT); return b; default: @@ -572,7 +572,7 @@ String text = FieldNames.createNamedValue(propName, stringValue); switch (operator) { case OPERATOR_EQUAL_TO: - return new TermQuery(new Term(FieldNames.PROPERTIES, text)); + return new JackrabbitTermQuery(new Term(FieldNames.PROPERTIES, text)); case OPERATOR_GREATER_THAN: Term lower = new Term(FieldNames.PROPERTIES, text); Term upper = new Term(FieldNames.PROPERTIES, @@ -604,7 +604,7 @@ Query all = Util.createMatchAllQuery(propName, version); BooleanQuery b = new BooleanQuery(); b.add(all, BooleanClause.Occur.SHOULD); - b.add(new TermQuery(new Term(FieldNames.PROPERTIES, text)), + b.add(new JackrabbitTermQuery(new Term(FieldNames.PROPERTIES, text)), BooleanClause.Occur.MUST_NOT); return b; default: @@ -707,11 +707,11 @@ } Query q; if (terms.size() == 1) { - q = new TermQuery((Term) terms.get(0)); + q = new JackrabbitTermQuery((Term) terms.get(0)); } else { BooleanQuery b = new BooleanQuery(); for (Iterator it = terms.iterator(); it.hasNext();) { - b.add(new TermQuery((Term) it.next()), BooleanClause.Occur.SHOULD); + b.add(new JackrabbitTermQuery((Term) it.next()), BooleanClause.Occur.SHOULD); } q = b; } @@ -801,7 +801,7 @@ } } else if (operand instanceof CaseTermQuery) { CaseTermQuery ctq = (CaseTermQuery) operand; - return transformTermQuery(new TermQuery(ctq.getTerm()), toUpperCase); + return transformTermQuery(new JackrabbitTermQuery(ctq.getTerm()), toUpperCase); } else if (operand instanceof MatchAllQuery) { return operand; } else if (operand instanceof BooleanQuery) { Index: src/main/java/org/apache/jackrabbit/core/query/lucene/LocalNameQuery.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/LocalNameQuery.java (revision 754888) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/LocalNameQuery.java (working copy) @@ -17,7 +17,6 @@ package org.apache.jackrabbit.core.query.lucene; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -56,7 +55,7 @@ */ public Query rewrite(IndexReader reader) throws IOException { if (version.getVersion() >= IndexFormatVersion.V3.getVersion()) { - return new TermQuery(new Term(FieldNames.LOCAL_NAME, localName)); + return new JackrabbitTermQuery(new Term(FieldNames.LOCAL_NAME, localName)); } else { throw new IOException("LocalNameQuery requires IndexFormatVersion V3"); } Index: src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java (revision 754888) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java (working copy) @@ -69,7 +69,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.ParseException; @@ -296,7 +295,7 @@ } catch (NamespaceException e) { // will never happen, prefixes are created when unknown } - return new TermQuery(new Term(FieldNames.PROPERTIES, FieldNames.createNamedValue(field, value))); + return new JackrabbitTermQuery(new Term(FieldNames.PROPERTIES, FieldNames.createNamedValue(field, value))); } public Object visit(NodeTypeQueryNode node, Object data) { @@ -353,11 +352,11 @@ // exception occured return new BooleanQuery(); } else if (terms.size() == 1) { - return new TermQuery((Term) terms.get(0)); + return new JackrabbitTermQuery((Term) terms.get(0)); } else { BooleanQuery b = new BooleanQuery(); for (Iterator it = terms.iterator(); it.hasNext();) { - b.add(new TermQuery((Term) it.next()), Occur.SHOULD); + b.add(new JackrabbitTermQuery((Term) it.next()), Occur.SHOULD); } return b; } @@ -430,15 +429,15 @@ Name nameTest = steps[0].getNameTest(); if (nameTest == null) { // this is equivalent to the root node - context = new TermQuery(new Term(FieldNames.PARENT, "")); + context = new JackrabbitTermQuery(new Term(FieldNames.PARENT, "")); } else if (nameTest.getLocalName().length() == 0) { // root node - context = new TermQuery(new Term(FieldNames.PARENT, "")); + context = new JackrabbitTermQuery(new Term(FieldNames.PARENT, "")); } else { // then this is a node != the root node // will never match anything! BooleanQuery and = new BooleanQuery(); - and.add(new TermQuery(new Term(FieldNames.PARENT, "")), Occur.MUST); + and.add(new JackrabbitTermQuery(new Term(FieldNames.PARENT, "")), Occur.MUST); and.add(new NameQuery(nameTest, indexFormatVersion, nsMappings), Occur.MUST); context = and; } @@ -448,7 +447,7 @@ } else { // path is 1) relative or 2) descendant-or-self // use root node as context - context = new TermQuery(new Term(FieldNames.PARENT, "")); + context = new JackrabbitTermQuery(new Term(FieldNames.PARENT, "")); } } else { exceptions.add(new InvalidQueryException("Number of location steps must be > 0")); @@ -715,7 +714,7 @@ } else if (transform[0] == TransformConstants.TRANSFORM_LOWER_CASE) { q = new CaseTermQuery.Lower(t); } else { - q = new TermQuery(t); + q = new JackrabbitTermQuery(t); } or.add(q, Occur.SHOULD); } @@ -798,12 +797,12 @@ } else if (transform[0] == TransformConstants.TRANSFORM_LOWER_CASE) { q = new CaseTermQuery.Lower(t); } else { - q = new TermQuery(t); + q = new JackrabbitTermQuery(t); } notQuery.add(q, Occur.MUST_NOT); } // and exclude all nodes where 'field' is multi valued - notQuery.add(new TermQuery(new Term(FieldNames.MVP, field)), Occur.MUST_NOT); + notQuery.add(new JackrabbitTermQuery(new Term(FieldNames.MVP, field)), Occur.MUST_NOT); query = notQuery; break; case QueryConstants.OPERATION_NE_GENERAL: // != @@ -818,7 +817,7 @@ for (int i = 0; i < stringValues.length; i++) { // exclude the nodes that have the term and are single valued Term t = new Term(FieldNames.PROPERTIES, FieldNames.createNamedValue(field, stringValues[i])); - Query svp = new NotQuery(new TermQuery(new Term(FieldNames.MVP, field))); + Query svp = new NotQuery(new JackrabbitTermQuery(new Term(FieldNames.MVP, field))); BooleanQuery and = new BooleanQuery(); Query q; if (transform[0] == TransformConstants.TRANSFORM_UPPER_CASE) { @@ -826,7 +825,7 @@ } else if (transform[0] == TransformConstants.TRANSFORM_LOWER_CASE) { q = new CaseTermQuery.Lower(t); } else { - q = new TermQuery(t); + q = new JackrabbitTermQuery(t); } and.add(q, Occur.MUST); and.add(svp, Occur.MUST); @@ -920,7 +919,7 @@ */ private Query createSingleValueConstraint(Query q, String propName) { // get nodes with multi-values in propName - Query mvp = new TermQuery(new Term(FieldNames.MVP, propName)); + Query mvp = new JackrabbitTermQuery(new Term(FieldNames.MVP, propName)); // now negate, that gives the nodes that have propName as single // values but also all others Query svp = new NotQuery(mvp); Index: src/main/java/org/apache/jackrabbit/core/query/lucene/MoreLikeThis.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/MoreLikeThis.java (revision 754888) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/MoreLikeThis.java (working copy) @@ -558,7 +558,7 @@ while (((cur = q.pop()) != null)) { Object[] ar = (Object[]) cur; - TermQuery tq = new TermQuery(new Term((String) ar[1], (String) ar[0])); + TermQuery tq = new JackrabbitTermQuery(new Term((String) ar[1], (String) ar[0])); if (boost) { if (qterms == 0) { Index: src/main/java/org/apache/jackrabbit/core/query/lucene/NameQuery.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/NameQuery.java (revision 754888) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/NameQuery.java (working copy) @@ -18,7 +18,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -77,16 +76,16 @@ if (version.getVersion() >= IndexFormatVersion.V3.getVersion()) { // use LOCAL_NAME and NAMESPACE_URI field BooleanQuery name = new BooleanQuery(); - name.add(new TermQuery(new Term(FieldNames.NAMESPACE_URI, nodeName.getNamespaceURI())), + name.add(new JackrabbitTermQuery(new Term(FieldNames.NAMESPACE_URI, nodeName.getNamespaceURI())), BooleanClause.Occur.MUST); - name.add(new TermQuery(new Term(FieldNames.LOCAL_NAME, + name.add(new JackrabbitTermQuery(new Term(FieldNames.LOCAL_NAME, nodeName.getLocalName())), BooleanClause.Occur.MUST); return name; } else { // use LABEL field try { - return new TermQuery(new Term(FieldNames.LABEL, + return new JackrabbitTermQuery(new Term(FieldNames.LABEL, nsMappings.translateName(nodeName))); } catch (IllegalNameException e) { throw Util.createIOException(e); Index: src/main/java/org/apache/jackrabbit/core/query/lucene/NameRangeQuery.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/NameRangeQuery.java (revision 754888) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/NameRangeQuery.java (working copy) @@ -18,7 +18,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; @@ -97,7 +96,7 @@ RangeQuery localNames = new RangeQuery(getLowerLocalNameTerm(), getUpperLocalNameTerm(), inclusive); BooleanQuery query = new BooleanQuery(); - query.add(new TermQuery(new Term(FieldNames.NAMESPACE_URI, + query.add(new JackrabbitTermQuery(new Term(FieldNames.NAMESPACE_URI, getNamespaceURI())), BooleanClause.Occur.MUST); query.add(localNames, BooleanClause.Occur.MUST); return query.rewrite(reader); Index: src/main/java/org/apache/jackrabbit/core/query/lucene/ReadOnlyIndexReader.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/ReadOnlyIndexReader.java (revision 754888) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/ReadOnlyIndexReader.java (working copy) @@ -184,7 +184,7 @@ * marked as deleted.
* If term is for a {@link FieldNames#UUID} field and this * ReadOnlyIndexReader does not have such a document, - * {@link CachingIndexReader#EMPTY} is returned. + * {@link EmptyTermDocs#INSTANCE} is returned. * * @param term the term to enumerate the docs for. * @return TermDocs for term. @@ -193,7 +193,7 @@ public TermDocs termDocs(Term term) throws IOException { // do not wrap for empty TermDocs TermDocs td = reader.termDocs(term); - if (td != CachingIndexReader.EMPTY) { + if (td != EmptyTermDocs.INSTANCE) { td = new FilteredTermDocs(td); } return td; Index: src/main/java/org/apache/jackrabbit/core/query/lucene/SharedIndexReader.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/SharedIndexReader.java (revision 754888) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/SharedIndexReader.java (working copy) @@ -66,7 +66,7 @@ * Simply passes the call to the wrapped reader as is.
* If term is for a {@link FieldNames#UUID} field and this * SharedIndexReader does not have such a document, - * {@link CachingIndexReader#EMPTY} is returned. + * {@link EmptyTermDocs#INSTANCE} is returned. * * @param term the term to enumerate the docs for. * @return TermDocs for term. Index: src/main/java/org/apache/jackrabbit/core/query/lucene/TermDocsCache.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/TermDocsCache.java (revision 0) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/TermDocsCache.java (revision 0) @@ -0,0 +1,281 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import java.io.IOException; +import java.util.Map; +import java.util.Collections; +import java.util.BitSet; +import java.util.Arrays; +import java.util.Iterator; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.commons.collections.map.LRUMap; +import org.apache.commons.collections.map.LinkedMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * TermDocsCache implements a cache for frequently read + * {@link TermDocs}. + */ +public class TermDocsCache { + + /** + * The logger instance for this class. + */ + private static final Logger log = LoggerFactory.getLogger(TermDocsCache.class); + + /** + * The default cache size. + */ + private static final int CACHE_SIZE = 10; + + /** + * The underlying index reader. + */ + private final IndexReader reader; + + /** + * Only TermDocs for the given field are cached. + */ + private final String field; + + /** + * Map of {@link Term#text()} that are unknown to the underlying index. + */ + private final Map unknownValues = Collections.synchronizedMap(new LRUMap(100)); + + /** + * The cache of the {@link #CACHE_SIZE} most frequently requested TermDocs. + * Maps term text String to {@link CacheEntry}. + */ + private final LinkedMap cache = new LinkedMap(); + + /** + * Creates a new cache for the given reader and + * field. + * + * @param reader the index reader. + * @param field the field name of the terms to potentially cache. + */ + public TermDocsCache(IndexReader reader, String field) { + this.reader = reader; + this.field = field; + } + + /** + * Returns the {@link TermDocs} for the given term. + * + * @param t the term. + * @return the term docs for the given term. + * @throws IOException if an error occurs while reading from the index. + */ + public TermDocs termDocs(final Term t) throws IOException { + if (t.field() != field) { + return reader.termDocs(t); + } + + String text = t.text(); + if (unknownValues.get(text) != null) { + log.debug("EmptyTermDocs({},{})", field, text); + return EmptyTermDocs.INSTANCE; + } + + // maintain cache + CacheEntry entry; + synchronized (cache) { + entry = (CacheEntry) cache.get(text); + if (entry == null) { + // check space + if (cache.size() >= CACHE_SIZE) { + // prune half of them and adjust the rest + CacheEntry[] entries = (CacheEntry[]) cache.values().toArray( + new CacheEntry[cache.size()]); + Arrays.sort(entries); + int threshold = entries[CACHE_SIZE / 2].numAccessed; + for (Iterator it = cache.entrySet().iterator(); it.hasNext(); ) { + Map.Entry e = (Map.Entry) it.next(); + if (((CacheEntry) e.getValue()).numAccessed <= threshold) { + // prune + it.remove(); + } else { + // adjust + CacheEntry ce = (CacheEntry) e.getValue(); + ce.numAccessed = (int) Math.sqrt(ce.numAccessed); + } + } + } + entry = new CacheEntry(); + cache.put(text, entry); + } else { + entry.numAccessed++; + } + } + + // this is a threshold to prevent caching of TermDocs + // that are read only irregularly. + if (entry.numAccessed < 10) { + if (log.isDebugEnabled()) { + log.debug("#{} TermDocs({},{})", + new Object[]{new Integer(entry.numAccessed), + field, text}); + } + return reader.termDocs(t); + } + + if (entry.bits == null) { + // collect bits + BitSet bits = null; + TermDocs tDocs = reader.termDocs(t); + try { + while (tDocs.next()) { + if (bits == null) { + bits = new BitSet(reader.maxDoc()); + } + bits.set(tDocs.doc()); + } + } finally { + tDocs.close(); + } + if (bits != null) { + entry.bits = bits; + } + } + + if (entry.bits == null) { + // none collected + unknownValues.put(text, text); + return EmptyTermDocs.INSTANCE; + } else { + if (log.isDebugEnabled()) { + log.debug("CachedTermDocs({},{},{}/{})", new Object[]{ + field, text, new Integer(entry.bits.cardinality()), + new Integer(reader.maxDoc())}); + } + return new CachedTermDocs(entry.bits); + } + } + + /** + * Implements a {@link TermDocs} base on a {@link BitSet}. + */ + private static final class CachedTermDocs implements TermDocs { + + /** + * The cached docs for this term. + */ + private final BitSet docs; + + /** + * The current position into the {@link #docs}. + */ + private int position = -1; + + /** + * true if there are potentially more docs. + */ + private boolean moreDocs = true; + + public CachedTermDocs(BitSet docs) { + this.docs = docs; + } + + /** + * @throws UnsupportedOperationException always. + */ + public void seek(Term term) throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * @throws UnsupportedOperationException always. + */ + public void seek(TermEnum termEnum) throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * {@inheritDoc} + */ + public int doc() { + return position; + } + + /** + * {@inheritDoc} + */ + public int freq() { + return 1; + } + + /** + * {@inheritDoc} + */ + public boolean next() throws IOException { + if (moreDocs) { + position = docs.nextSetBit(position + 1); + moreDocs = position != -1; + } + return moreDocs; + } + + /** + * {@inheritDoc} + */ + public int read(int[] docs, int[] freqs) throws IOException { + int count; + for (count = 0; count < docs.length && next(); count++) { + docs[count] = doc(); + freqs[count] = 1; + } + return count; + } + + /** + * {@inheritDoc} + */ + public boolean skipTo(int target) throws IOException { + if (moreDocs) { + position = docs.nextSetBit(target); + moreDocs = position != -1; + } + return moreDocs; + } + + /** + * {@inheritDoc} + */ + public void close() throws IOException { + } + } + + private static final class CacheEntry implements Comparable { + + private volatile int numAccessed = 1; + + private volatile BitSet bits; + + public int compareTo(Object o) { + CacheEntry other = (CacheEntry) o; + return (numAccessed < other.numAccessed ? -1 : (numAccessed == other.numAccessed ? 0 : 1)); + } + } +} Property changes on: src\main\java\org\apache\jackrabbit\core\query\lucene\TermDocsCache.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java (revision 754888) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java (working copy) @@ -19,7 +19,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; import org.slf4j.LoggerFactory; @@ -93,7 +92,7 @@ public static Query createMatchAllQuery(String name, IndexFormatVersion version) { if (version.getVersion() >= IndexFormatVersion.V2.getVersion()) { // new index format style - return new TermQuery(new Term(FieldNames.PROPERTIES_SET, name)); + return new JackrabbitTermQuery(new Term(FieldNames.PROPERTIES_SET, name)); } else { return new MatchAllQuery(name); }