Index: contrib/regex/build.xml
===================================================================
--- contrib/regex/build.xml (revision 765635)
+++ contrib/regex/build.xml (working copy)
@@ -24,7 +24,7 @@
+ * A RegexpQuery that utilizes the BRICS automaton package: http://www.brics.dk/automaton/
+ *
+ * Queries are converted to a DFA, and the state machine is used to optimize term enumeration.
+ *
+ * Please note that this regex syntax is a bit different from others in that ^ and $ are implied.
+ *
+ * Construct a new AutomatonQuery.
+ *
+ * Please note that this regex syntax is a bit different from others in that ^ and $ are implied. + *
+ * @param term Term containing field and regular expression + */ + public AutomatonQuery(Term term) { + this(term, new RegExp(term.text()).toAutomaton()); + } + + /** + *+ * Construct a new AutomatonQuery, with a prebuilt automaton + *
+ * @param term Term containing field and some pattern structure + * @param automaton Automaton to use for query + */ + public AutomatonQuery(Term term, Automaton automaton) { + super(term); + this.field = term.field(); + this.automaton = automaton; + } + + /* (non-Javadoc) + * @see org.apache.lucene.search.MultiTermQuery#getEnum(org.apache.lucene.index.IndexReader) + */ + protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + return new AutomatonTermEnum(automaton, field, reader); + } + + + /* (non-Javadoc) + * @see org.apache.lucene.search.MultiTermQuery#hashCode() + */ + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((automaton == null) ? 0 : automaton.hashCode()); + result = prime * result + ((field == null) ? 0 : field.hashCode()); + return result; + } + + + /* (non-Javadoc) + * @see org.apache.lucene.search.MultiTermQuery#equals(java.lang.Object) + */ + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + final AutomatonQuery other = (AutomatonQuery) obj; + if (automaton == null) { + if (other.automaton != null) + return false; + } else if (!automaton.equals(other.automaton)) + return false; + if (field == null) { + if (other.field != null) + return false; + } else if (!field.equals(other.field)) + return false; + return true; + } + +} Index: contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonTermEnum.java =================================================================== --- contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonTermEnum.java (revision 0) +++ contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonTermEnum.java (revision 0) @@ -0,0 +1,227 @@ +package org.apache.lucene.search.regex; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.FilteredTermEnum; + +import dk.brics.automaton.Automaton; +import dk.brics.automaton.RunAutomaton; +import dk.brics.automaton.State; +import dk.brics.automaton.Transition; + +/** + *+ * A FilteredTermEnum that enumerates terms based upon what is accepted by a FSM. + *
+ *
+ * The algorithm is such:
+ * 1. As long as matches are successful, keep reading sequentially.
+ * 2. When a match fails, skip to the next string in lexicographic order that does not enter a reject state.
+ *
+ * The algorithm does not attempt to actually skip to the next string that is completely accepted. + * This is not possible when the language accepted by the FSM is not finite (i.e. * operator). + *
+ */ +final class AutomatonTermEnum extends FilteredTermEnum { + private final IndexReader reader; + private final String field; + private final RunAutomaton runAutomaton; + private final Automaton automaton; + private Term lastTerm = null; + + /** + * Construct an enumerator based upon an automaton, enumerating the specified field, working on a supplied reader. + */ + AutomatonTermEnum(Automaton automaton, String field, IndexReader reader) throws IOException { + super(); + this.reader = reader; + this.field = field; + this.automaton = automaton; + + this.automaton.minimize(); /* minimize the automaton, just in case. this also ensures it is determinized. */ + runAutomaton = new RunAutomaton(automaton); /* tableize the automaton */ + + String startPoint = nextString(""); + if (startPoint == null) { // no terms match this automaton... oh well, it will figure this out. + startPoint = ""; + } + + lastTerm = new Term(field, startPoint); + setEnum(reader.terms(lastTerm)); + } + + //@Override + public float difference() { + return 1.0f; + } + + /** + * Returns true if the term matches the automaton. + * Also stashes away the term to assist with smart enumeration. + */ + //@Override + protected boolean termCompare(Term term) { + lastTerm = term; + return (term.field() == field && runAutomaton.run(term.text())); + } + + /** + * increments to the next term matching this automaton. + * after a successful comparison, it simply tries the next term. + * after an unsuccessful comparison, it seeks to a smarter position. + */ + //@Override + public boolean next() throws IOException { + + do { + if (lastTerm.equals(currentTerm)) { /* the last enumeration was a match, don't skip around */ + actualEnum.next(); + } else { /* seek to the next possible string */ + String nextPoint = nextString(lastTerm.text()); + if (nextPoint == null) { /* no more possible strings can match */ + currentTerm = null; + return false; + } + /* replace the old enumerator with a new one, positioned to a nice place */ + actualEnum.close(); + actualEnum = reader.terms(lastTerm.createTerm(nextPoint)); + } + + Term candidateTerm = actualEnum.term(); /* read a term */ + + /* this means end of enumeration: no more terms for this field or no more terms at all */ + if (candidateTerm == null || candidateTerm.field() != field) { + currentTerm = null; + return false; + } + + /* if the term matches the automaton, success! */ + if (termCompare(candidateTerm)) { + currentTerm = candidateTerm; + return true; + } + } while (true); + + } + + /** this is a dummy, it is not used by this class. */ + //@Override + protected boolean endEnum() { + assert false; // should never be called + return (currentTerm != null); + } + + + /** + * Returns the next String in lexicographic order after s that will not put the machine into a reject state. + * If such a string does not exist, returns null. + * + * The correctness of this method depends heavily upon the properties of a DFA. + * + * @param s input String + * @return next valid String + */ + + private final String nextString(String s) { + State state = automaton.getInitialState(); + int pos = 0; + + // walk the automaton until a character is rejected. + for (pos = 0; pos < s.length(); pos++) { + State nextState = state.step(s.charAt(pos)); + if (nextState == null) + break; + else + state = nextState; + } + + // take the useful portion, and the last non-reject state, and attempt to append characters that will match. + String nextString = nextString(s, state, pos); + if (nextString != null) { + return nextString; + } else { /* no more solutions exist from this useful portion, backtrack */ + if (pos == 0) /* all solutions exhausted */ + return null; + char nextChar = s.charAt(pos - 1); + nextChar++; + String sprime = s.substring(0, pos - 1) + nextChar; + if (runAutomaton.run(sprime)) /* if this is accepted it is good to go as-is */ + return sprime; + else + return nextString(sprime); + } + } + + /** + * Returns the next String in lexicographic order after s that will not put the machine into a reject state. + * Appends some characters to the useful portion. If this cannot satisfy the machine, returns null. + * This method will walk the minimal path, in lexicographic order, as long as possible. + * + * @param s input String + * @param state current non-reject state + * @param useful most useful portion of the string + * @return next valid String + */ + private final String nextString(String s, State state, int useful) { + // the next lexicographic character must be greater than the existing character, if it exists. + char c = 0; + if (useful < s.length()) { + c = s.charAt(useful); + c++; + } + + StringBuffer sb = new StringBuffer(); + // append the useful portion + sb.append(s.substring(0, useful)); + + Set visited = new HashSet(); + visited.add(state); + + Iterator transitions = state.getSortedTransitions(false).iterator(); + + // find the minimal path (lexicographic order) that is >= c + while (transitions.hasNext()) { + Transition transition = (Transition) transitions.next(); + if (transition.getMax() >= c) { + char nextChar = (char) Math.max(c, transition.getMin()); + sb.append(nextChar); + state = transition.getDest(); + // as long as is possible, continue down the minimal path. + // if a loop or accept state is encountered, stop. + while (!visited.contains(state) && !state.isAccept()) { + visited.add(state); + transition = (Transition) state.getSortedTransitions(false).get(0); + sb.append(transition.getMin()); + state = transition.getDest(); + } + return sb.toString(); + } + + } + return null; + } + +} Index: contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonWildcardQuery.java =================================================================== --- contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonWildcardQuery.java (revision 0) +++ contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonWildcardQuery.java (revision 0) @@ -0,0 +1,99 @@ +package org.apache.lucene.search.regex; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryWrapperFilter; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; + +import dk.brics.automaton.Automaton; +import dk.brics.automaton.BasicAutomata; +import dk.brics.automaton.BasicOperations; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + *+ * A Query that accepts the syntax of {@link WildcardQuery} but implements with an Automaton. + * This accelerates more wildcard cases. + * Most cases are accelerated such as ?(a|b)cd?e, but leading * is still slow. + *
+ */ +public class AutomatonWildcardQuery extends AutomatonQuery { + private final boolean termContainsWildcard; + + /** + * Construct a new AutomatonWildcardQuery + * @param term query Term + */ + public AutomatonWildcardQuery(Term term) { + super(term, toAutomaton(term)); + termContainsWildcard = (term.text().indexOf('*') != -1) || (term.text().indexOf('?') != -1); + } + + /* (non-Javadoc) + * @see org.apache.lucene.search.Query#rewrite(org.apache.lucene.index.IndexReader) + */ + public Query rewrite(IndexReader reader) throws IOException { + if (!termContainsWildcard) { + Query simpleQuery = new TermQuery(getTerm()); + if (getConstantScoreRewrite()) + simpleQuery = new ConstantScoreQuery(new QueryWrapperFilter(simpleQuery)); + + simpleQuery.setBoost(getBoost()); + return simpleQuery; + } else { + return super.rewrite(reader); + } + } + + /** + * Convert lucene wildcard syntax into an automaton. + */ + private static Automaton toAutomaton(Term wildcardquery) { + List automata = new ArrayList(); + + String wildcardText = wildcardquery.text(); + + for (int i = 0; i < wildcardText.length(); i++) { + char c = wildcardText.charAt(i); + switch(c) { + case '*': { + automata.add(BasicAutomata.makeAnyString()); + break; + } + case '?': { + automata.add(BasicAutomata.makeAnyChar()); + break; + } + default: { + automata.add(BasicAutomata.makeChar(c)); + } + } + } + + return BasicOperations.concatenate(automata); + } + +} Index: contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonQuery.java =================================================================== --- contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonQuery.java (revision 0) +++ contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonQuery.java (revision 0) @@ -0,0 +1,88 @@ +package org.apache.lucene.search.regex; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import junit.framework.TestCase; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.IndexSearcher; + +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; + +public class TestAutomatonQuery extends TestCase { + private IndexSearcher searcher; + private final String FN = "field"; + + public void setUp() { + RAMDirectory directory = new RAMDirectory(); + try { + IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, + IndexWriter.MaxFieldLength.LIMITED); + Document doc = new Document(); + doc.add(new Field(FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.optimize(); + writer.close(); + searcher = new IndexSearcher(directory); + } catch (Exception e) { + fail(e.toString()); + } + } + + public void tearDown() { + try { + searcher.close(); + } catch (Exception e) { + fail(e.toString()); + } + } + + private Term newTerm(String value) { return new Term(FN, value); } + + private int regexQueryNrHits(String regex) throws Exception { + AutomatonQuery query = new AutomatonQuery( newTerm(regex)); + return searcher.search(query).length(); + } + + public void testRegex1() throws Exception { + assertEquals(1, regexQueryNrHits("q.[aeiou]c.*")); + } + + public void testRegex2() throws Exception { + assertEquals(0, regexQueryNrHits(".[aeiou]c.*")); + } + + public void testRegex3() throws Exception { + assertEquals(0, regexQueryNrHits("q.[aeiou]c")); + } + + public void testEquals() throws Exception { + RegexQuery query1 = new RegexQuery( newTerm("foo.*")); + query1.setRegexImplementation(new JakartaRegexpCapabilities()); + + AutomatonQuery query2 = new AutomatonQuery( newTerm("foo.*")); + assertFalse(query1.equals(query2)); + } + +} + Index: contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonWildcardQuery.java =================================================================== --- contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonWildcardQuery.java (revision 0) +++ contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonWildcardQuery.java (revision 0) @@ -0,0 +1,272 @@ +package org.apache.lucene.search.regex; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.RAMDirectory; + +import java.io.IOException; + +/** + * TestWildcard tests the '*' and '?' wildcard characters. + * + * @version $Id: TestWildcard.java 694004 2008-09-10 21:38:52Z mikemccand $ + * + */ +public class TestAutomatonWildcardQuery + extends LuceneTestCase { + public void testEquals() { + AutomatonWildcardQuery wq1 = new AutomatonWildcardQuery(new Term("field", "b*a")); + AutomatonWildcardQuery wq2 = new AutomatonWildcardQuery(new Term("field", "b*a")); + AutomatonWildcardQuery wq3 = new AutomatonWildcardQuery(new Term("field", "b*a")); + + // reflexive? + assertEquals(wq1, wq2); + assertEquals(wq2, wq1); + + // transitive? + assertEquals(wq2, wq3); + assertEquals(wq1, wq3); + + assertFalse(wq1.equals(null)); + + FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a")); + assertFalse(wq1.equals(fq)); + assertFalse(fq.equals(wq1)); + } + + /** + * Tests if a AutomatonWildcardQuery that has no wildcard in the term is rewritten to a single + * TermQuery. + */ + public void testTermWithoutWildcard() throws IOException { + RAMDirectory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"}); + IndexSearcher searcher = new IndexSearcher(indexStore); + + Query wq = new AutomatonWildcardQuery(new Term("field", "nowildcard")); + assertMatches(searcher, wq, 1); + + wq = searcher.rewrite(wq); + assertTrue(wq instanceof TermQuery); + } + + /** + * Tests Wildcard queries with an asterisk. + */ + public void testAsterisk() + throws IOException { + RAMDirectory indexStore = getIndexStore("body", new String[] + {"metal", "metals"}); + IndexSearcher searcher = new IndexSearcher(indexStore); + Query query1 = new TermQuery(new Term("body", "metal")); + Query query2 = new AutomatonWildcardQuery(new Term("body", "metal*")); + Query query3 = new AutomatonWildcardQuery(new Term("body", "m*tal")); + Query query4 = new AutomatonWildcardQuery(new Term("body", "m*tal*")); + Query query5 = new AutomatonWildcardQuery(new Term("body", "m*tals")); + + BooleanQuery query6 = new BooleanQuery(); + query6.add(query5, BooleanClause.Occur.SHOULD); + + BooleanQuery query7 = new BooleanQuery(); + query7.add(query3, BooleanClause.Occur.SHOULD); + query7.add(query5, BooleanClause.Occur.SHOULD); + + // Queries do not automatically lower-case search terms: + Query query8 = new AutomatonWildcardQuery(new Term("body", "M*tal*")); + + assertMatches(searcher, query1, 1); + assertMatches(searcher, query2, 2); + assertMatches(searcher, query3, 1); + assertMatches(searcher, query4, 2); + assertMatches(searcher, query5, 1); + assertMatches(searcher, query6, 1); + assertMatches(searcher, query7, 2); + assertMatches(searcher, query8, 0); + assertMatches(searcher, new AutomatonWildcardQuery(new Term("body", "*tall")), 0); + assertMatches(searcher, new AutomatonWildcardQuery(new Term("body", "*tal")), 1); + assertMatches(searcher, new AutomatonWildcardQuery(new Term("body", "*tal*")), 2); + } + + /** + * Tests Wildcard queries with a question mark. + * + * @throws IOException if an error occurs + */ + public void testQuestionmark() + throws IOException { + RAMDirectory indexStore = getIndexStore("body", new String[] + {"metal", "metals", "mXtals", "mXtXls"}); + IndexSearcher searcher = new IndexSearcher(indexStore); + Query query1 = new AutomatonWildcardQuery(new Term("body", "m?tal")); + Query query2 = new AutomatonWildcardQuery(new Term("body", "metal?")); + Query query3 = new AutomatonWildcardQuery(new Term("body", "metals?")); + Query query4 = new AutomatonWildcardQuery(new Term("body", "m?t?ls")); + Query query5 = new AutomatonWildcardQuery(new Term("body", "M?t?ls")); + Query query6 = new AutomatonWildcardQuery(new Term("body", "meta??")); + + assertMatches(searcher, query1, 1); + assertMatches(searcher, query2, 1); + assertMatches(searcher, query3, 0); + assertMatches(searcher, query4, 3); + assertMatches(searcher, query5, 0); + assertMatches(searcher, query6, 1); // Query: 'meta??' matches 'metals' not 'metal' + } + + private RAMDirectory getIndexStore(String field, String[] contents) + throws IOException { + RAMDirectory indexStore = new RAMDirectory(); + IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); + for (int i = 0; i < contents.length; ++i) { + Document doc = new Document(); + doc.add(new Field(field, contents[i], Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + writer.optimize(); + writer.close(); + + return indexStore; + } + + private void assertMatches(IndexSearcher searcher, Query q, int expectedMatches) + throws IOException { + ScoreDoc[] result = searcher.search(q, null, 1000).scoreDocs; + assertEquals(expectedMatches, result.length); + } + + /** + * Test that wild card queries are parsed to the correct type and are searched correctly. + * This test looks at both parsing and execution of wildcard queries. + * Although placed here, it also tests prefix queries, verifying that + * prefix queries are not parsed into wild card queries, and viceversa. + * @throws Exception + */ + public void testParsingAndSearching() throws Exception { + String field = "content"; + boolean dbg = false; + QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer()) { + + @Override + protected Query newWildcardQuery(Term t) { + return new AutomatonWildcardQuery(t); + } + + }; + qp.setAllowLeadingWildcard(true); + String docs[] = { + "\\ abcdefg1", + "\\79 hijklmn1", + "\\\\ opqrstu1", + }; + // queries that should find all docs + String matchAll[] = { + "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*" + }; + // queries that should find no docs + String matchNone[] = { + "a*h", "a?h", "*a*h", "?a", "a?", + }; + // queries that should be parsed to prefix queries + String matchOneDocPrefix[][] = { + {"a*", "ab*", "abc*", }, // these should find only doc 0 + {"h*", "hi*", "hij*", "\\\\7*"}, // these should find only doc 1 + {"o*", "op*", "opq*", "\\\\\\\\*"}, // these should find only doc 2 + }; + // queries that should be parsed to wildcard queries + String matchOneDocWild[][] = { + {"*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1", "abc**"}, // these should find only doc 0 + {"*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1", "hij**"}, // these should find only doc 1 + {"*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1", "opq**"}, // these should find only doc 2 + }; + + // prepare the index + RAMDirectory dir = new RAMDirectory(); + IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); + for (int i = 0; i < docs.length; i++) { + Document doc = new Document(); + doc.add(new Field(field,docs[i],Store.NO,Index.ANALYZED)); + iw.addDocument(doc); + } + iw.close(); + + IndexSearcher searcher = new IndexSearcher(dir); + + // test queries that must find all + for (int i = 0; i < matchAll.length; i++) { + String qtxt = matchAll[i]; + Query q = qp.parse(qtxt); + if (dbg) System.out.println("matchAll: qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; + assertEquals(docs.length,hits.length); + } + + // test queries that must find none + for (int i = 0; i < matchNone.length; i++) { + String qtxt = matchNone[i]; + Query q = qp.parse(qtxt); + if (dbg) System.out.println("matchNone: qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; + assertEquals(0,hits.length); + } + + // test queries that must be prefix queries and must find only one doc + for (int i = 0; i < matchOneDocPrefix.length; i++) { + for (int j = 0; j < matchOneDocPrefix[i].length; j++) { + String qtxt = matchOneDocPrefix[i][j]; + Query q = qp.parse(qtxt); + if (dbg) System.out.println("match 1 prefix: doc="+docs[i]+" qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + assertEquals(PrefixQuery.class, q.getClass()); + ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; + assertEquals(1,hits.length); + assertEquals(i,hits[0].doc); + } + } + + // test queries that must be wildcard queries and must find only one doc + for (int i = 0; i < matchOneDocPrefix.length; i++) { + for (int j = 0; j < matchOneDocWild[i].length; j++) { + String qtxt = matchOneDocWild[i][j]; + Query q = qp.parse(qtxt); + if (dbg) System.out.println("match 1 wild: doc="+docs[i]+" qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + assertEquals(AutomatonWildcardQuery.class, q.getClass()); + ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; + assertEquals(1,hits.length); + assertEquals(i,hits[0].doc); + } + } + + searcher.close(); + } + +}