Index: solr/common-build.xml =================================================================== --- solr/common-build.xml (revision 1159014) +++ solr/common-build.xml (working copy) @@ -84,7 +84,6 @@ - @@ -171,7 +170,7 @@ + jar-misc, jar-spatial, jar-grouping, jar-queries, jar-queryparser"> @@ -192,7 +191,6 @@ - Index: modules/queries/src/test/org/apache/lucene/queries/FuzzyLikeThisQueryTest.java =================================================================== --- modules/queries/src/test/org/apache/lucene/queries/FuzzyLikeThisQueryTest.java (revision 1159014) +++ modules/queries/src/test/org/apache/lucene/queries/FuzzyLikeThisQueryTest.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search; +package org.apache.lucene.queries; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -24,11 +24,16 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import java.io.IOException; import java.util.HashSet; +import java.util.Set; public class FuzzyLikeThisQueryTest extends LuceneTestCase { private Directory directory; @@ -69,13 +74,12 @@ writer.addDocument(doc); } - //Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match public void testClosestEditDistanceMatchComesFirst() throws Throwable { FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer); flt.addTerms("smith", "name", 0.3f, 1); Query q = flt.rewrite(searcher.getIndexReader()); - HashSet queryTerms = new HashSet(); + Set queryTerms = new HashSet(); q.extractTerms(queryTerms); assertTrue("Should have variant smythe", queryTerms.contains(new Term("name", "smythe"))); assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith"))); @@ -92,7 +96,7 @@ FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer); flt.addTerms("jonathin smoth", "name", 0.3f, 1); Query q = flt.rewrite(searcher.getIndexReader()); - HashSet queryTerms = new HashSet(); + Set queryTerms = new HashSet(); q.extractTerms(queryTerms); assertTrue("Should have variant jonathan", queryTerms.contains(new Term("name", "jonathan"))); assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith"))); @@ -108,7 +112,7 @@ FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer); flt.addTerms("fernando smith", "name", 0.3f, 1); Query q = flt.rewrite(searcher.getIndexReader()); - HashSet queryTerms = new HashSet(); + Set queryTerms = new HashSet(); q.extractTerms(queryTerms); assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith"))); TopDocs topDocs = searcher.search(flt, 1); @@ -124,7 +128,6 @@ fltq1.addTerms("javi", "subject", 0.5f, 2); FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer); fltq2.addTerms("javi", "subject", 0.5f, 2); - assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1, - fltq2); + assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1, fltq2); } } Index: modules/queries/src/java/org/apache/lucene/queries/FuzzyLikeThisQuery.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/FuzzyLikeThisQuery.java (revision 1159014) +++ modules/queries/src/java/org/apache/lucene/queries/FuzzyLikeThisQuery.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search; +package org.apache.lucene.queries; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -19,10 +19,7 @@ import java.io.IOException; import java.io.StringReader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; +import java.util.*; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -30,6 +27,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; +import org.apache.lucene.search.*; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; @@ -38,342 +36,323 @@ * Fuzzifies ALL terms provided as strings and then picks the best n differentiating terms. * In effect this mixes the behaviour of FuzzyQuery and MoreLikeThis but with special consideration * of fuzzy scoring factors. - * This generally produces good results for queries where users may provide details in a number of + * This generally produces good results for queries where users may provide details in a number of * fields and have no knowledge of boolean query syntax and also want a degree of fuzzy matching and * a fast query. - * + *

* For each source term the fuzzy variants are held in a BooleanQuery with no coord factor (because * we are not looking for matches on multiple variants in any one doc). Additionally, a specialized - * TermQuery is used for variants and does not use that variant term's IDF because this would favour rarer - * terms eg misspellings. Instead, all variants use the same IDF ranking (the one for the source query + * TermQuery is used for variants and does not use that variant term's IDF because this would favour rarer + * terms eg misspellings. Instead, all variants use the same IDF ranking (the one for the source query * term) and this is factored into the variant's boost. If the source query term does not exist in the * index the average IDF of the variants is used. */ -public class FuzzyLikeThisQuery extends Query -{ - // TODO: generalize this query (at least it should not reuse this static sim! - // a better way might be to convert this into multitermquery rewrite methods. - // the rewrite method can 'average' the TermContext's term statistics (docfreq,totalTermFreq) - // provided to TermQuery, so that the general idea is agnostic to any scoring system... - static TFIDFSimilarity sim=new DefaultSimilarity(); - Query rewrittenQuery=null; - ArrayList fieldVals=new ArrayList(); - Analyzer analyzer; - - ScoreTermQueue q; - int MAX_VARIANTS_PER_TERM=50; - boolean ignoreTF=false; - private int maxNumTerms; +public class FuzzyLikeThisQuery extends Query { - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((analyzer == null) ? 0 : analyzer.hashCode()); - result = prime * result - + ((fieldVals == null) ? 0 : fieldVals.hashCode()); - result = prime * result + (ignoreTF ? 1231 : 1237); - result = prime * result + maxNumTerms; - return result; + private static final int MAX_VARIANTS_PER_TERM = 50; + + // TODO: generalize this query (at least it should not reuse this static sim! + // a better way might be to convert this into multitermquery rewrite methods. + // the rewrite method can 'average' the TermContext's term statistics (docfreq,totalTermFreq) + // provided to TermQuery, so that the general idea is agnostic to any scoring system... + private static final TFIDFSimilarity sim = new DefaultSimilarity(); + + private Query rewrittenQuery; + private final List fieldVals = new ArrayList(); + private final Analyzer analyzer; + private final ScoreTermQueue q; + private boolean ignoreTF; + private final int maxNumTerms; + + /** + * @param maxNumTerms The total number of terms clauses that will appear once rewritten as a BooleanQuery + * @param analyzer + */ + public FuzzyLikeThisQuery(int maxNumTerms, Analyzer analyzer) { + q = new ScoreTermQueue(maxNumTerms); + this.analyzer = analyzer; + this.maxNumTerms = maxNumTerms; + } + + @Override + public int hashCode() { + int prime = 31; + int result = 1; + result = prime * result + ((analyzer == null) ? 0 : analyzer.hashCode()); + result = prime * result + ((fieldVals == null) ? 0 : fieldVals.hashCode()); + result = prime * result + (ignoreTF ? 1231 : 1237); + result = prime * result + maxNumTerms; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } else if (obj == null) { + return false; + } else if (getClass() != obj.getClass()) { + return false; } - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) + FuzzyLikeThisQuery other = (FuzzyLikeThisQuery) obj; + if (analyzer == null) { + if (other.analyzer != null) { return false; - if (getClass() != obj.getClass()) + } + } else if (!analyzer.equals(other.analyzer)) { + return false; + } + + if (fieldVals == null) { + if (other.fieldVals != null) { return false; - FuzzyLikeThisQuery other = (FuzzyLikeThisQuery) obj; - if (analyzer == null) { - if (other.analyzer != null) - return false; - } else if (!analyzer.equals(other.analyzer)) - return false; - if (fieldVals == null) { - if (other.fieldVals != null) - return false; - } else if (!fieldVals.equals(other.fieldVals)) - return false; - if (ignoreTF != other.ignoreTF) - return false; - if (maxNumTerms != other.maxNumTerms) - return false; - return true; + } + } else if (!fieldVals.equals(other.fieldVals)) { + return false; } + return ignoreTF == other.ignoreTF && maxNumTerms == other.maxNumTerms; + } - /** - * - * @param maxNumTerms The total number of terms clauses that will appear once rewritten as a BooleanQuery - * @param analyzer - */ - public FuzzyLikeThisQuery(int maxNumTerms, Analyzer analyzer) - { - q=new ScoreTermQueue(maxNumTerms); - this.analyzer=analyzer; - this.maxNumTerms = maxNumTerms; + class FieldVals { + private final String queryString; + private final String fieldName; + private final float minSimilarity; + private final int prefixLength; + + public FieldVals(String name, float similarity, int length, String queryString) { + this.fieldName = name; + this.minSimilarity = similarity; + this.prefixLength = length; + this.queryString = queryString; } - class FieldVals - { - String queryString; - String fieldName; - float minSimilarity; - int prefixLength; - public FieldVals(String name, float similarity, int length, String queryString) - { - fieldName = name; - minSimilarity = similarity; - prefixLength = length; - this.queryString = queryString; - } - @Override public int hashCode() { final int prime = 31; int result = 1; - result = prime * result - + ((fieldName == null) ? 0 : fieldName.hashCode()); + result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode()); result = prime * result + Float.floatToIntBits(minSimilarity); result = prime * result + prefixLength; - result = prime * result - + ((queryString == null) ? 0 : queryString.hashCode()); + result = prime * result + ((queryString == null) ? 0 : queryString.hashCode()); return result; } @Override public boolean equals(Object obj) { - if (this == obj) + if (this == obj) { return true; - if (obj == null) + } else if (obj == null) { return false; - if (getClass() != obj.getClass()) + } else if (getClass() != obj.getClass()) { return false; + } + FieldVals other = (FieldVals) obj; if (fieldName == null) { - if (other.fieldName != null) + if (other.fieldName != null) { return false; - } else if (!fieldName.equals(other.fieldName)) + } + } else if (!fieldName.equals(other.fieldName)) { return false; + } + if (Float.floatToIntBits(minSimilarity) != Float - .floatToIntBits(other.minSimilarity)) + .floatToIntBits(other.minSimilarity)) { return false; - if (prefixLength != other.prefixLength) + } + if (prefixLength != other.prefixLength) { return false; + } if (queryString == null) { - if (other.queryString != null) + if (other.queryString != null) { return false; - } else if (!queryString.equals(other.queryString)) + } + } else if (!queryString.equals(other.queryString)) { return false; + } + return true; } - + } - + /** + * Adds user input for "fuzzification" + * + * @param queryString The string which will be parsed by the analyzer and for which fuzzy variants will be parsed + * @param fieldName + * @param minSimilarity The minimum similarity of the term variants (see FuzzyTermsEnum) + * @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum) + */ + public void addTerms(String queryString, String fieldName, float minSimilarity, int prefixLength) { + fieldVals.add(new FieldVals(fieldName, minSimilarity, prefixLength, queryString)); + } + + private void addTerms(IndexReader reader, FieldVals f) throws IOException { + if (f.queryString == null) { + return; } - - /** - * Adds user input for "fuzzification" - * @param queryString The string which will be parsed by the analyzer and for which fuzzy variants will be parsed - * @param fieldName - * @param minSimilarity The minimum similarity of the term variants (see FuzzyTermsEnum) - * @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum) - */ - public void addTerms(String queryString, String fieldName,float minSimilarity, int prefixLength) - { - fieldVals.add(new FieldVals(fieldName,minSimilarity,prefixLength,queryString)); - } - - - private void addTerms(IndexReader reader,FieldVals f) throws IOException - { - if(f.queryString==null) return; - TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString)); - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - - int corpusNumDocs=reader.numDocs(); - HashSet processedTerms=new HashSet(); - ts.reset(); - while (ts.incrementToken()) - { - String term = termAtt.toString(); - if(!processedTerms.contains(term)) - { - processedTerms.add(term); - ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term - float minScore=0; - Term startTerm=new Term(f.fieldName, term); - AttributeSource atts = new AttributeSource(); - MaxNonCompetitiveBoostAttribute maxBoostAtt = - atts.addAttribute(MaxNonCompetitiveBoostAttribute.class); - FuzzyTermsEnum fe = new FuzzyTermsEnum(MultiFields.getTerms(reader, startTerm.field()).iterator(), atts, startTerm, f.minSimilarity, f.prefixLength); - //store the df so all variants use same idf - int df = reader.docFreq(startTerm); - int numVariants=0; - int totalVariantDocFreqs=0; - BytesRef possibleMatch; - BoostAttribute boostAtt = - fe.attributes().addAttribute(BoostAttribute.class); - while ((possibleMatch = fe.next()) != null) { - numVariants++; - totalVariantDocFreqs+=fe.docFreq(); - float score=boostAtt.getBoost(); - if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){ - ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), new BytesRef(possibleMatch)),score,startTerm); - variantsQ.insertWithOverflow(st); - minScore = variantsQ.top().score; // maintain minScore - } - maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY); - } - if(numVariants>0) - { - int avgDf=totalVariantDocFreqs/numVariants; - if(df==0)//no direct match we can use as df for all variants - { - df=avgDf; //use avg df of all variants - } - - // take the top variants (scored by edit distance) and reset the score - // to include an IDF factor then add to the global queue for ranking - // overall top query terms - int size = variantsQ.size(); - for(int i = 0; i < size; i++) - { - ScoreTerm st = variantsQ.pop(); - st.score=(st.score*st.score)*sim.idf(df,corpusNumDocs); - q.insertWithOverflow(st); - } - } - } + TokenStream ts = analyzer.reusableTokenStream(f.fieldName, new StringReader(f.queryString)); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + + int corpusNumDocs = reader.numDocs(); + Set processedTerms = new HashSet(); + ts.reset(); + while (ts.incrementToken()) { + String term = termAtt.toString(); + if (!processedTerms.contains(term)) { + processedTerms.add(term); + ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term + float minScore = 0; + Term startTerm = new Term(f.fieldName, term); + AttributeSource atts = new AttributeSource(); + MaxNonCompetitiveBoostAttribute maxBoostAtt = + atts.addAttribute(MaxNonCompetitiveBoostAttribute.class); + FuzzyTermsEnum fe = new FuzzyTermsEnum(MultiFields.getTerms(reader, startTerm.field()).iterator(), atts, startTerm, f.minSimilarity, f.prefixLength); + //store the df so all variants use same idf + int df = reader.docFreq(startTerm); + int numVariants = 0; + int totalVariantDocFreqs = 0; + BytesRef possibleMatch; + BoostAttribute boostAtt = fe.attributes().addAttribute(BoostAttribute.class); + while ((possibleMatch = fe.next()) != null) { + numVariants++; + totalVariantDocFreqs += fe.docFreq(); + float score = boostAtt.getBoost(); + if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore) { + ScoreTerm st = new ScoreTerm(new Term(startTerm.field(), new BytesRef(possibleMatch)), score, startTerm); + variantsQ.insertWithOverflow(st); + minScore = variantsQ.top().score; // maintain minScore + } + maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY); } - ts.end(); - ts.close(); - } - - @Override - public Query rewrite(IndexReader reader) throws IOException - { - if(rewrittenQuery!=null) - { - return rewrittenQuery; - } - //load up the list of possible terms - for (Iterator iter = fieldVals.iterator(); iter.hasNext();) - { - FieldVals f = iter.next(); - addTerms(reader,f); - } - //clear the list of fields - fieldVals.clear(); - - BooleanQuery bq=new BooleanQuery(); - - - //create BooleanQueries to hold the variants for each token/field pair and ensure it - // has no coord factor - //Step 1: sort the termqueries by term/field - HashMap> variantQueries=new HashMap>(); - int size = q.size(); - for(int i = 0; i < size; i++) - { - ScoreTerm st = q.pop(); - ArrayList l= variantQueries.get(st.fuzziedSourceTerm); - if(l==null) + + if (numVariants > 0) { + int avgDf = totalVariantDocFreqs / numVariants; + if (df == 0)//no direct match we can use as df for all variants { - l=new ArrayList(); - variantQueries.put(st.fuzziedSourceTerm,l); + df = avgDf; //use avg df of all variants } - l.add(st); + + // take the top variants (scored by edit distance) and reset the score + // to include an IDF factor then add to the global queue for ranking + // overall top query terms + int size = variantsQ.size(); + for (int i = 0; i < size; i++) { + ScoreTerm st = variantsQ.pop(); + st.score = (st.score * st.score) * sim.idf(df, corpusNumDocs); + q.insertWithOverflow(st); + } } - //Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries - for (Iterator> iter = variantQueries.values().iterator(); iter.hasNext();) - { - ArrayList variants = iter.next(); - if(variants.size()==1) - { - //optimize where only one selected variant - ScoreTerm st= variants.get(0); - Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1); - tq.setBoost(st.score); // set the boost to a mix of IDF and score - bq.add(tq, BooleanClause.Occur.SHOULD); - } - else - { - BooleanQuery termVariants=new BooleanQuery(true); //disable coord and IDF for these term variants - for (Iterator iterator2 = variants.iterator(); iterator2 - .hasNext();) - { - ScoreTerm st = iterator2.next(); - // found a match - Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1); - tq.setBoost(st.score); // set the boost using the ScoreTerm's score - termVariants.add(tq, BooleanClause.Occur.SHOULD); // add to query - } - bq.add(termVariants, BooleanClause.Occur.SHOULD); // add to query - } - } - //TODO possible alternative step 3 - organize above booleans into a new layer of field-based - // booleans with a minimum-should-match of NumFields-1? - bq.setBoost(getBoost()); - this.rewrittenQuery=bq; - return bq; + } } - - //Holds info for a fuzzy term variant - initially score is set to edit distance (for ranking best - // term variants) then is reset with IDF for use in ranking against all other - // terms/fields - private static class ScoreTerm{ - public Term term; - public float score; - Term fuzziedSourceTerm; - - public ScoreTerm(Term term, float score, Term fuzziedSourceTerm){ - this.term = term; - this.score = score; - this.fuzziedSourceTerm=fuzziedSourceTerm; - } + ts.end(); + ts.close(); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + if (rewrittenQuery != null) { + return rewrittenQuery; + } + //load up the list of possible terms + for (FieldVals f : fieldVals) { + addTerms(reader, f); + } + //clear the list of fields + fieldVals.clear(); + + BooleanQuery bq = new BooleanQuery(); + + //create BooleanQueries to hold the variants for each token/field pair and ensure it + // has no coord factor + //Step 1: sort the termqueries by term/field + Map> variantQueries = new HashMap>(); + int size = q.size(); + for (int i = 0; i < size; i++) { + ScoreTerm st = q.pop(); + List l = variantQueries.get(st.fuzziedSourceTerm); + if (l == null) { + l = new ArrayList(); + variantQueries.put(st.fuzziedSourceTerm, l); } - - private static class ScoreTermQueue extends PriorityQueue { - public ScoreTermQueue(int size){ - super(size); + l.add(st); + } + //Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries + for (List variants : variantQueries.values()) { + if (variants.size() == 1) { + //optimize where only one selected variant + ScoreTerm st = variants.get(0); + Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1); + tq.setBoost(st.score); // set the boost to a mix of IDF and score + bq.add(tq, BooleanClause.Occur.SHOULD); + } else { + BooleanQuery termVariants = new BooleanQuery(true); //disable coord and IDF for these term variants + for (ScoreTerm st : variants) { + // found a match + Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1); + tq.setBoost(st.score); // set the boost using the ScoreTerm's score + termVariants.add(tq, BooleanClause.Occur.SHOULD); // add to query } - - /* (non-Javadoc) - * @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object) - */ - @Override - protected boolean lessThan(ScoreTerm termA, ScoreTerm termB) { - if (termA.score== termB.score) - return termA.term.compareTo(termB.term) > 0; - else - return termA.score < termB.score; - } - - } - + bq.add(termVariants, BooleanClause.Occur.SHOULD); // add to query + } + } + //TODO possible alternative step 3 - organize above booleans into a new layer of field-based + // booleans with a minimum-should-match of NumFields-1? + bq.setBoost(getBoost()); + this.rewrittenQuery = bq; + return bq; + } + + //Holds info for a fuzzy term variant - initially score is set to edit distance (for ranking best + // term variants) then is reset with IDF for use in ranking against all other + // terms/fields + private static class ScoreTerm { + public Term term; + public float score; + Term fuzziedSourceTerm; + + public ScoreTerm(Term term, float score, Term fuzziedSourceTerm) { + this.term = term; + this.score = score; + this.fuzziedSourceTerm = fuzziedSourceTerm; + } + } + + private static class ScoreTermQueue extends PriorityQueue { + public ScoreTermQueue(int size) { + super(size); + } + /* (non-Javadoc) - * @see org.apache.lucene.search.Query#toString(java.lang.String) - */ + * @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object) + */ @Override - public String toString(String field) - { - return null; + protected boolean lessThan(ScoreTerm termA, ScoreTerm termB) { + if (termA.score == termB.score) + return termA.term.compareTo(termB.term) > 0; + else + return termA.score < termB.score; } + } - public boolean isIgnoreTF() - { - return ignoreTF; - } + /* (non-Javadoc) + * @see org.apache.lucene.search.Query#toString(java.lang.String) + */ + @Override + public String toString(String field) { + return null; + } + public boolean isIgnoreTF() { + return ignoreTF; + } - public void setIgnoreTF(boolean ignoreTF) - { - this.ignoreTF = ignoreTF; - } - + public void setIgnoreTF(boolean ignoreTF) { + this.ignoreTF = ignoreTF; + } + } Index: lucene/contrib/contrib-build.xml =================================================================== --- lucene/contrib/contrib-build.xml (revision 1159014) +++ lucene/contrib/contrib-build.xml (working copy) @@ -240,17 +240,6 @@ - - - - - - - - - - - Index: lucene/contrib/queries/lib/jakarta-regexp-1.4.jar (deleted) =================================================================== Index: lucene/contrib/queries/lib/jakarta-regexp-LICENSE-ASL.txt (deleted) =================================================================== Index: lucene/contrib/queries/lib/jakarta-regexp-NOTICE.txt (deleted) =================================================================== Index: lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java (deleted) =================================================================== Index: lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java (deleted) =================================================================== Index: lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestJakartaRegexpCapabilities.java (deleted) =================================================================== Index: lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java (deleted) =================================================================== Index: lucene/contrib/queries/src/test/org/apache/lucene/search/TestSlowCollationMethods.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeTermsEnum.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexCapabilities.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQueryCapable.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/package.html (deleted) =================================================================== Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java (deleted) =================================================================== Index: lucene/contrib/queries/src/java/overview.html (deleted) =================================================================== Index: lucene/contrib/queries/README.txt (deleted) =================================================================== Index: lucene/contrib/queries/build.xml (deleted) =================================================================== Index: lucene/contrib/spatial/build.xml =================================================================== --- lucene/contrib/spatial/build.xml (revision 1159014) +++ lucene/contrib/spatial/build.xml (working copy) @@ -26,10 +26,9 @@ - - + Index: lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/FuzzyLikeThisQueryBuilder.java =================================================================== --- lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/FuzzyLikeThisQueryBuilder.java (revision 1159014) +++ lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/FuzzyLikeThisQueryBuilder.java (working copy) @@ -1,7 +1,7 @@ package org.apache.lucene.xmlparser.builders; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.search.FuzzyLikeThisQuery; +import org.apache.lucene.queries.FuzzyLikeThisQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; import org.apache.lucene.xmlparser.DOMUtils; Index: lucene/contrib/xml-query-parser/build.xml =================================================================== --- lucene/contrib/xml-query-parser/build.xml (revision 1159014) +++ lucene/contrib/xml-query-parser/build.xml (working copy) @@ -26,11 +26,10 @@ - - + Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1159014) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -55,7 +55,6 @@ import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner; -import org.apache.lucene.search.regex.RegexQuery; import org.apache.lucene.search.spans.*; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -337,7 +336,7 @@ } public void testSpanRegexQuery() throws Exception { - query = new SpanOrQuery(new SpanMultiTermQueryWrapper(new RegexQuery(new Term(FIELD_NAME, "ken.*")))); + query = new SpanOrQuery(new SpanMultiTermQueryWrapper(new RegexpQuery(new Term(FIELD_NAME, "ken.*")))); searcher = new IndexSearcher(ramDir, true); hits = searcher.search(query, 100); int maxNumFragmentsRequired = 2; @@ -361,7 +360,7 @@ } public void testRegexQuery() throws Exception { - query = new RegexQuery(new Term(FIELD_NAME, "ken.*")); + query = new RegexpQuery(new Term(FIELD_NAME, "ken.*")); searcher = new IndexSearcher(ramDir, true); hits = searcher.search(query, 100); int maxNumFragmentsRequired = 2; Index: lucene/contrib/highlighter/build.xml =================================================================== --- lucene/contrib/highlighter/build.xml (revision 1159014) +++ lucene/contrib/highlighter/build.xml (working copy) @@ -27,9 +27,8 @@ - - + Index: lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/regex/TestRegexQuery.java =================================================================== --- lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/regex/TestRegexQuery.java (revision 1159014) +++ lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/regex/TestRegexQuery.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search.regex; +package org.apache.lucene.sandbox.queries.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/regex/TestJakartaRegexpCapabilities.java =================================================================== --- lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/regex/TestJakartaRegexpCapabilities.java (revision 1159014) +++ lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/regex/TestJakartaRegexpCapabilities.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search.regex; +package org.apache.lucene.sandbox.queries.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/regex/TestSpanRegexQuery.java =================================================================== --- lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/regex/TestSpanRegexQuery.java (revision 1159014) +++ lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/regex/TestSpanRegexQuery.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search.regex; +package org.apache.lucene.sandbox.queries.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowCollationMethods.java =================================================================== --- lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowCollationMethods.java (revision 0) +++ lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowCollationMethods.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search; +package org.apache.lucene.sandbox.queries; import java.io.IOException; import java.text.Collator; @@ -8,6 +8,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedTermRangeTermsEnum.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedTermRangeTermsEnum.java (revision 0) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedTermRangeTermsEnum.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search; +package org.apache.lucene.sandbox.queries; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -21,6 +21,7 @@ import java.text.Collator; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.FilteredTermsEnum; import org.apache.lucene.util.BytesRef; /** Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedTermRangeFilter.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedTermRangeFilter.java (revision 0) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedTermRangeFilter.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search; +package org.apache.lucene.sandbox.queries; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -19,6 +19,7 @@ import java.text.Collator; +import org.apache.lucene.search.MultiTermQueryWrapperFilter; import org.apache.lucene.search.NumericRangeFilter; // javadoc import org.apache.lucene.search.FieldCacheRangeFilter; // javadoc Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedTermRangeQuery.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedTermRangeQuery.java (revision 0) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedTermRangeQuery.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search; +package org.apache.lucene.sandbox.queries; /** * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexQuery.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexQuery.java (revision 1159014) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexQuery.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search.regex; +package org.apache.lucene.sandbox.queries.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/JakartaRegexpCapabilities.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/JakartaRegexpCapabilities.java (revision 1159014) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/JakartaRegexpCapabilities.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search.regex; +package org.apache.lucene.sandbox.queries.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/JavaUtilRegexCapabilities.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/JavaUtilRegexCapabilities.java (revision 1159014) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/JavaUtilRegexCapabilities.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search.regex; +package org.apache.lucene.sandbox.queries.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexCapabilities.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexCapabilities.java (revision 1159014) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexCapabilities.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search.regex; +package org.apache.lucene.sandbox.queries.regex; import org.apache.lucene.util.BytesRef; Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexQueryCapable.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexQueryCapable.java (revision 1159014) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexQueryCapable.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search.regex; +package org.apache.lucene.sandbox.queries.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexTermsEnum.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexTermsEnum.java (revision 1159014) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexTermsEnum.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search.regex; +package org.apache.lucene.sandbox.queries.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java (revision 0) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.search; +package org.apache.lucene.sandbox.queries; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -21,7 +21,9 @@ import java.text.Collator; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache.DocTerms; +import org.apache.lucene.search.FieldComparator; import org.apache.lucene.util.BytesRef; /** Sorts by a field's value using the given Collator Index: lucene/contrib/sandbox/src/java/overview.html =================================================================== --- lucene/contrib/sandbox/src/java/overview.html (revision 0) +++ lucene/contrib/sandbox/src/java/overview.html (revision 0) @@ -0,0 +1,26 @@ + + + + + Sandbox + + + + Sandbox + + Index: lucene/contrib/sandbox/build.xml =================================================================== --- lucene/contrib/sandbox/build.xml (revision 0) +++ lucene/contrib/sandbox/build.xml (revision 0) @@ -0,0 +1,35 @@ + + + + + + Sandbox for odd contrib code + + + + + + + + + + + Index: lucene/build.xml =================================================================== --- lucene/build.xml (revision 1159014) +++ lucene/build.xml (working copy) @@ -255,7 +255,7 @@ - + @@ -272,7 +272,7 @@ - + Index: dev-tools/idea/solr/solr.iml =================================================================== --- dev-tools/idea/solr/solr.iml (revision 1159014) +++ dev-tools/idea/solr/solr.iml (working copy) @@ -24,7 +24,6 @@ - Index: dev-tools/idea/.idea/modules.xml =================================================================== --- dev-tools/idea/.idea/modules.xml (revision 1159014) +++ dev-tools/idea/.idea/modules.xml (working copy) @@ -9,8 +9,8 @@ - + Index: dev-tools/idea/.idea/workspace.xml =================================================================== --- dev-tools/idea/.idea/workspace.xml (revision 1159014) +++ dev-tools/idea/.idea/workspace.xml (working copy) @@ -127,13 +127,6 @@ - - - - + + + +