Index: NOTICE.txt
===================================================================
--- NOTICE.txt	(revision 824861)
+++ NOTICE.txt	(working copy)
@@ -28,3 +28,6 @@
 ICU4J, (under contrib/collation) is licensed under an MIT styles license
 (contrib/collation/lib/ICU-LICENSE.txt) and Copyright (c) 1995-2008 
 International Business Machines Corporation and others
+
+Brics Automaton (under contrib/regex) is BSD-licensed, created by Anders Møller.
+See http://www.brics.dk/automaton/
Index: contrib/regex/build.xml
===================================================================
--- contrib/regex/build.xml	(revision 824861)
+++ contrib/regex/build.xml	(working copy)
@@ -24,7 +24,7 @@
   </description>
 
   <path id="additional.dependencies">
-    <fileset dir="lib" includes="*-oro-*.jar,*-regexp-*.jar"/>
+    <fileset dir="lib" includes="*-oro-*.jar,*-regexp-*.jar,*automaton*.jar"/>
   </path>
 
   <pathconvert property="project.classpath"
Index: contrib/regex/lib/automaton.LICENSE
===================================================================
--- contrib/regex/lib/automaton.LICENSE	(revision 0)
+++ contrib/regex/lib/automaton.LICENSE	(revision 0)
@@ -0,0 +1,29 @@
+
+The BSD License
+
+    The following is a BSD license template. To generate your own license, change the values of OWNER, ORGANIZATION and YEAR from their original values as given here, and substitute your own. Also, you may optionally omit clause 3 and still be OSD conformant.
+
+    Note: On January 9th, 2008 the OSI Board approved the "Simplified BSD License" variant used by FreeBSD and others, which omits the final "no-endorsement" clause and is thus roughly equivalent to the MIT License.
+
+    Historical Note: The original license used on BSD Unix had four clauses. The advertising clause (the third of four clauses) required you to acknowledge use of U.C. Berkeley code in your advertising of any product using that code. It was officially rescinded by the Director of the Office of Technology Licensing of the University of California on July 22nd, 1999. He states that clause 3 is "hereby deleted in its entirety." The four clause license has not been approved by OSI. The license below does not contain the advertising clause.
+
+    This prelude is not part of the license.
+
+<OWNER> = Regents of the University of California
+<ORGANIZATION> = University of California, Berkeley
+<YEAR> = 1998
+
+In the original BSD license, both occurrences of the phrase "COPYRIGHT HOLDERS AND CONTRIBUTORS" in the disclaimer read "REGENTS AND CONTRIBUTORS".
+
+Here is the license template:
+
+Copyright (c) <YEAR>, <OWNER>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+    * Neither the name of the <ORGANIZATION> nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Index: contrib/regex/lib/automaton.jar
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream

Property changes on: contrib\regex\lib\automaton.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Index: contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonQuery.java
===================================================================
--- contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonQuery.java	(revision 0)
+++ contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonQuery.java	(revision 0)
@@ -0,0 +1,207 @@
+package org.apache.lucene.search.regex;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.FilteredTermEnum;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SingleTermEnum;
+import org.apache.lucene.util.ToStringUtils;
+
+import dk.brics.automaton.Automaton;
+import dk.brics.automaton.BasicAutomata;
+import dk.brics.automaton.BasicOperations;
+import dk.brics.automaton.RunAutomaton;
+import dk.brics.automaton.State;
+import dk.brics.automaton.Transition;
+
+/**
+ * A {@link Query} that will match terms against a finite-state machine.
+ * <p>
+ * This query will match documents that contain terms accepted by a given
+ * finite-state machine. The automaton can be constructed with the <a
+ * href="http://www.brics.dk/automaton/">brics automaton</a> API. Alternatively,
+ * it can be created from a regular expression with {@link AutomatonRegexQuery}
+ * or from the standard Lucene wildcard syntax with
+ * {@link AutomatonWildcardQuery}.
+ * </p>
+ * <p>
+ * When the query is executed, it will create an equivalent minimal DFA of the
+ * finite-state machine, and will enumerate the term dictionary in an
+ * intelligent way to reduce the number of comparisons. For example: the regular
+ * expression of <code>[dl]og?</code> will make approximately four
+ * comparisons: do, dog, lo, and log.
+ * </p>
+ */
+public class AutomatonQuery extends MultiTermQuery {
+  /** the automaton to match index terms against */
+  protected Automaton automaton;
+  /** term containing the field, and possibly some pattern structure */
+  protected Term term;
+
+  /**
+   * Create a new AutomatonQuery from a brics
+   * {@link dk.brics.automaton.Automaton}.
+   * 
+   * @param term Term containing field and possibly some pattern structure. The
+   *        term text is ignored.
+   * @param automaton Automaton to run, terms that are accepted are considered a
+   *        match.
+   */
+  public AutomatonQuery(Term term, Automaton automaton) {
+    super();
+    this.term = term;
+    this.automaton = automaton;
+    automaton.minimize();
+  }
+
+  @Override
+  protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {  
+    // matches a fixed string in expanded or singleton representation
+    String commonPrefix = automaton.getCommonPrefix();
+    if (automaton.equals(BasicAutomata.makeString(commonPrefix)))
+      return new SingleTermEnum(reader, term.createTerm(commonPrefix));
+    
+    /*
+     * If the DFA has a leading kleen star, or something similar, it will
+     * need to run against the entire term dictionary. In this case its much
+     * better to do just that than to use fancy enumeration.
+     */
+    State state = automaton.getInitialState();
+    for (Transition transition : state.getTransitions())
+      if (transition.getDest() == state
+          && (transition.getMax() - transition.getMin()) > (Character.MAX_VALUE / 3))
+        return new LinearTermEnum(reader);
+
+    return new AutomatonTermEnum(automaton, term, reader);
+  }
+
+  @Override
+  public Query rewrite(IndexReader reader) throws IOException {
+    /*
+     * It is a prefix query, if it accepts the same language as its common
+     * prefix appended with any possible string.
+     */
+    
+    String commonPrefix = automaton.getCommonPrefix();
+    Automaton prefixAutomaton = BasicOperations.concatenate(BasicAutomata
+        .makeString(commonPrefix), BasicAutomata.makeAnyString());
+
+    if (automaton.equals(prefixAutomaton)) {
+      PrefixQuery rewritten = new PrefixQuery(term.createTerm(commonPrefix));
+      rewritten.setBoost(getBoost());
+      rewritten.setRewriteMethod(getRewriteMethod());
+      return rewritten;
+    }
+    
+    return super.rewrite(reader);
+  }
+  
+  
+  /**
+   * A "stupid" FilteredTermEnum that does not seek around the term dictionary.
+   * This is much faster than AutomatonTermEnum when all (or most) of the terms
+   * need to be compared.
+   */
+  class LinearTermEnum extends FilteredTermEnum {
+    private boolean endEnum = false;
+    private String field;
+    private String commonPrefix;
+    private RunAutomaton runAutomaton;
+    
+    LinearTermEnum(IndexReader reader) throws IOException {
+      super();
+      field = term.field();
+      runAutomaton = new RunAutomaton(automaton);
+      commonPrefix = automaton.getCommonPrefix();
+      setEnum(reader.terms(new Term(field, commonPrefix)));
+    }
+    
+    @Override
+    public float difference() {
+      return 1.0F;
+    }
+
+    @Override
+    protected boolean endEnum() {
+      return endEnum;
+    }
+
+    @Override
+    protected boolean termCompare(Term term) {
+      if (term.field() == field && term.text().startsWith(commonPrefix)) {
+        return runAutomaton.run(term.text());
+      } else {
+        endEnum = false;
+        return false;
+      }
+    } 
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = super.hashCode();
+    result = prime * result + ((automaton == null) ? 0 : automaton.hashCode());
+    result = prime * result + ((term == null) ? 0 : term.hashCode());
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (!super.equals(obj))
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    AutomatonQuery other = (AutomatonQuery) obj;
+    if (automaton == null) {
+      if (other.automaton != null)
+        return false;
+    } else if (!automaton.equals(other.automaton))
+      return false;
+    if (term == null) {
+      if (other.term != null)
+        return false;
+    } else if (!term.equals(other.term))
+      return false;
+    return true;
+  }
+
+  @Override
+  public String toString(String field) {
+    StringBuilder buffer = new StringBuilder();
+    if (!term.field().equals(field)) {
+      buffer.append(term.field());
+      buffer.append(":");
+    }
+    buffer.append(getClass().getSimpleName());
+    buffer.append(" {");
+    buffer.append('\n');
+    buffer.append(automaton.toString());
+    buffer.append("}");
+    buffer.append(ToStringUtils.boost(getBoost()));
+    return buffer.toString();
+  }
+}

Property changes on: contrib\regex\src\java\org\apache\lucene\search\regex\AutomatonQuery.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonRegexQuery.java
===================================================================
--- contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonRegexQuery.java	(revision 0)
+++ contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonRegexQuery.java	(revision 0)
@@ -0,0 +1,76 @@
+package org.apache.lucene.search.regex;
+
+import org.apache.lucene.index.Term;
+
+import dk.brics.automaton.AutomatonProvider;
+import dk.brics.automaton.DatatypesAutomatonProvider;
+import dk.brics.automaton.RegExp;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A fast regular expression query based on the <a
+ * href="http://www.brics.dk/automaton/">brics automaton</a> package.
+ * <ul>
+ * <li>Comparisons are <a
+ * href="http://tusker.org/regex/regex_benchmark.html">fast</a>
+ * <li>The term dictionary is enumerated in an intelligent way, to avoid
+ * comparisons. See {@link AutomatonQuery} for more details.
+ * </ul>
+ * <p>
+ * The supported syntax is documented <a href=
+ * "http://www.brics.dk/automaton/doc/index.html?dk/brics/automaton/RegExp.html"
+ * >here</a>.
+ * </p>
+ * @see dk.brics.automaton.RegExp
+ */
+public class AutomatonRegexQuery extends AutomatonQuery {
+  private static AutomatonProvider defaultProvider = new DatatypesAutomatonProvider();
+
+  /**
+   * Constructs a query for terms matching <code>term</code>.
+   * <p>
+   * By default, all regular expression features are enabled.
+   * </p>
+   * @param term regular expression.
+   */
+  public AutomatonRegexQuery(Term term) {
+    this(term, RegExp.ALL);
+  }
+  
+  /**
+   * Constructs a query for terms matching <code>term</code>.
+   * 
+   * @param term regular expression.
+   * @param flags optional RegExp features from {@link dk.brics.automaton.RegExp}
+   */
+  public AutomatonRegexQuery(Term term, int flags) {
+    this(term, flags, defaultProvider);
+  }
+  
+  /**
+   * Constructs a query for terms matching <code>term</code>.
+   * 
+   * @param term regular expression.
+   * @param flags optional RegExp features from {@link dk.brics.automaton.RegExp}
+   * @param provider custom AutomatonProvider for named automata 
+   */
+  public AutomatonRegexQuery(Term term, int flags, AutomatonProvider provider) {
+    super(term, new RegExp(term.text(), flags).toAutomaton(provider));
+  }  
+}

Property changes on: contrib\regex\src\java\org\apache\lucene\search\regex\AutomatonRegexQuery.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonTermEnum.java
===================================================================
--- contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonTermEnum.java	(revision 0)
+++ contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonTermEnum.java	(revision 0)
@@ -0,0 +1,293 @@
+package org.apache.lucene.search.regex;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.FilteredTermEnum;
+
+import dk.brics.automaton.Automaton;
+import dk.brics.automaton.RunAutomaton;
+import dk.brics.automaton.State;
+import dk.brics.automaton.Transition;
+
+/**
+ * A FilteredTermEnum that enumerates terms based upon what is accepted by a
+ * DFA.
+ * <p>
+ * The algorithm is such:
+ * <ol>
+ *   <li>As long as matches are successful, keep reading sequentially.
+ *   <li>When a match fails, skip to the next string in lexicographic order that
+ * does not enter a reject state.
+ * </ol>
+ * <p>
+ * The algorithm does not attempt to actually skip to the next string that is
+ * completely accepted. This is not possible when the language accepted by the
+ * FSM is not finite (i.e. * operator).
+ * </p>
+ */
+class AutomatonTermEnum extends FilteredTermEnum {
+  private final IndexReader reader;
+  private final Term queryTerm;
+  private final RunAutomaton runAutomaton;
+  private final Automaton automaton;
+  // the last term that was compared
+  private Term lastTerm = null;
+  // for complex machines that must make a lot of comparisons
+  private final Map<State, Transition[]> transitionCache;
+  
+  /**
+   * Construct an enumerator based upon an automaton, enumerating the specified
+   * field, working on a supplied reader.
+   */
+  AutomatonTermEnum(Automaton automaton, Term queryTerm, IndexReader reader)
+      throws IOException {
+    super();
+    this.reader = reader;
+    this.queryTerm = queryTerm;
+    this.automaton = automaton;
+    
+    /* 
+     * tableize the automaton. this also ensures it is deterministic, and has no 
+     * transitions to dead states.
+     */
+    runAutomaton = new RunAutomaton(this.automaton);
+    
+    // build a cache of sorted transitions for every state
+    transitionCache = new HashMap<State, Transition[]>(runAutomaton.getSize());
+    for (State state : this.automaton.getStates()) {
+      List<Transition> transitions = state.getSortedTransitions(false);
+      transitionCache.put(state, transitions.toArray(new Transition[transitions.size()]));
+    }
+
+    String startPoint = nextString("");
+    
+    /* 
+     * in this case this automaton will not accept any strings.
+     * start the enumeration at the empty string, next() will return false.
+     */
+    if (startPoint == null) {
+      startPoint = "";
+    }
+
+    lastTerm = queryTerm.createTerm(startPoint);
+    setEnum(reader.terms(lastTerm));
+  }
+
+  @Override
+  public float difference() {
+    return 1.0f;
+  }
+
+  /**
+   * Returns true if the term matches the automaton. Also stashes away the term
+   * to assist with smart enumeration.
+   */
+  @Override
+  protected boolean termCompare(Term term) {
+    lastTerm = term;
+    return (term.field() == queryTerm.field() && runAutomaton.run(term.text()));
+  }
+
+  /**
+   * increments to the next term matching this automaton. after a successful
+   * comparison, it simply tries the next term. after an unsuccessful
+   * comparison, it seeks to a smarter position.
+   */
+  @Override
+  public boolean next() throws IOException {
+
+    do {
+      /* 
+       * if the previous enumeration was a match, don't even bother
+       * trying to compute the next place to seek to.
+       * this is an optimization for a DFA that matches many sequential terms,
+       * such as ab*
+       */
+      if (lastTerm == currentTerm) { 
+        actualEnum.next();
+      } else {
+        // seek to the next possible string
+        String nextPoint = nextString(lastTerm.text());
+        if (nextPoint == null) { // no more possible strings can match
+          currentTerm = null;
+          return false;
+        }
+        // replace the old enumerator with a new one, positioned to a nice place
+        actualEnum.close();
+        actualEnum = reader.terms(lastTerm.createTerm(nextPoint));
+      }
+
+      Term candidateTerm = actualEnum.term(); // read a term
+
+      /*
+       * this means end of enumeration: no more terms for this field or no more
+       * terms at all
+       */
+      if (candidateTerm == null || candidateTerm.field() != queryTerm.field()) {
+        currentTerm = null;
+        return false;
+      }
+      
+      // if the term matches the automaton, success!
+      if (termCompare(candidateTerm)) {
+        currentTerm = candidateTerm;
+        return true;
+      }
+    } while (true);
+  }
+
+  /** this is a dummy, it is not used by this class. */
+  @Override
+  protected boolean endEnum() {
+    assert false; // should never be called
+    return (currentTerm != null);
+  }
+
+  /**
+   * Returns the next String in lexicographic order after s that will not put
+   * the machine into a reject state. If such a string does not exist, returns
+   * null.
+   * 
+   * The correctness of this method depends upon the automaton being deterministic,
+   * and having no transitions to dead states.
+   * 
+   * @param s input String
+   * @return next valid String
+   */
+  private String nextString(String s) {
+    State state = automaton.getInitialState();
+    int pos = 0;
+
+    while (true) {
+      // walk the automaton until a character is rejected.
+      for (pos = 0; pos < s.length(); pos++) {
+        State nextState = step(state, s.charAt(pos));
+        if (nextState == null)
+          break;
+        else
+          state = nextState;
+      }
+
+      // take the useful portion, and the last non-reject state, and attempt to
+      // append characters that will match.
+      String nextString = nextString(s, state, pos);
+      if (nextString != null) {
+        return nextString;
+      } else { /* no more solutions exist from this useful portion, backtrack */
+        if (pos == 0) /* all solutions exhausted */
+          return null;
+        char nextChar = s.charAt(pos - 1);
+        nextChar++;
+        String sprime = s.substring(0, pos - 1) + nextChar;
+        // if this is accepted it is good to go as-is.
+        if (runAutomaton.run(sprime))
+          return sprime;
+        else
+          s = sprime;
+      }
+    }
+  }
+  
+  /**
+   * Returns the next String in lexicographic order after s that will not put
+   * the machine into a reject state. Appends some characters to the useful
+   * portion. If this cannot satisfy the machine, returns null. This method will
+   * walk the minimal path, in lexicographic order, as long as possible.
+   * 
+   * @param s input String
+   * @param state current non-reject state
+   * @param useful most useful portion of the string
+   * @return next valid String
+   */
+  private String nextString(String s, State state, int useful) {
+    /* 
+     * the next lexicographic character must be greater than the existing
+     * character, if it exists.
+     */
+    char c = 0;
+    if (useful < s.length()) {
+      c = s.charAt(useful);
+      c++;
+    }
+
+    StringBuilder sb = new StringBuilder();
+    // append the useful portion
+    sb.append(s, 0, useful);
+
+    Set<State> visited = new HashSet<State>();
+    visited.add(state);
+
+    Transition transitions[] = getTransitions(state);
+
+    // find the minimal path (lexicographic order) that is >= c
+    
+    for (int i = 0; i < transitions.length; i++) {
+      Transition transition = transitions[i];
+      if (transition.getMax() >= c) {
+        char nextChar = (char) Math.max(c, transition.getMin());
+        sb.append(nextChar);
+        state = transition.getDest();
+        /* 
+         * as long as is possible, continue down the minimal path in
+         * lexicographic order. if a loop or accept state is encountered, stop.
+         */
+        while (!visited.contains(state) && !state.isAccept()) {
+          visited.add(state);
+          /* 
+           * Note: we work with a DFA with no transitions to dead states.
+           * so the below is ok, if it is not an accept state,
+           * then there MUST be at least one transition.
+           */
+          transition = getTransitions(state)[0];
+          sb.append(transition.getMin());
+          state = transition.getDest();
+        }
+        return sb.toString();
+      }
+    }
+    return null;
+  }
+  
+  /**
+   * Get the cached set of transitions for a state.
+   */
+  private Transition[] getTransitions(State state) {
+    return transitionCache.get(state);
+  }
+  
+  /**
+   * Step the state machine forward one character,
+   * using cached transitions.
+   */
+  private State step(State state, char c) {
+    Transition transitions[] = getTransitions(state);
+    for (int i = 0; i < transitions.length; i++)
+      if (transitions[i].getMin() <= c && c <= transitions[i].getMax())
+        return transitions[i].getDest();
+    return null;
+  }
+}

Property changes on: contrib\regex\src\java\org\apache\lucene\search\regex\AutomatonTermEnum.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonWildcardQuery.java
===================================================================
--- contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonWildcardQuery.java	(revision 0)
+++ contrib/regex/src/java/org/apache/lucene/search/regex/AutomatonWildcardQuery.java	(revision 0)
@@ -0,0 +1,96 @@
+package org.apache.lucene.search.regex;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.WildcardQuery;
+
+import dk.brics.automaton.Automaton;
+import dk.brics.automaton.BasicAutomata;
+import dk.brics.automaton.BasicOperations;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A faster version of {@link WildcardQuery}.
+ * <p>
+ * This query is especially useful for wildcards containing the <code>?</code>
+ * operator. The term dictionary is enumerated in an intelligent way, to avoid
+ * comparisons. See {@link AutomatonQuery} for more details.
+ * </p>
+ */
+public class AutomatonWildcardQuery extends AutomatonQuery {
+  
+  /**
+   * Constructs a query for terms matching <code>term</code>. 
+   */
+  public AutomatonWildcardQuery(Term term) {
+    super(term, toAutomaton(term));
+  }
+
+  @Override
+  public Query rewrite(IndexReader reader) throws IOException {
+    /*
+     * For this case, AutomatonQuery will already decide
+     * to use LinearTermEnum.
+     * 
+     * But in the special case where all terms must be compared, 
+     * its actually slightly faster to just use WildcardQuery.
+     */
+    if (term.text().startsWith("*")) {
+      WildcardQuery rewritten = new WildcardQuery(term);
+      rewritten.setBoost(getBoost());
+      rewritten.setRewriteMethod(getRewriteMethod());
+      return rewritten;
+    }
+   
+    return super.rewrite(reader);
+  }
+
+  /**
+   * Convert Lucene wildcard syntax into an automaton.
+   */
+  private static Automaton toAutomaton(Term wildcardquery) {
+    List<Automaton> automata = new ArrayList<Automaton>();
+    
+    String wildcardText = wildcardquery.text();
+    
+    for (int i = 0; i < wildcardText.length(); i++) {
+      char c = wildcardText.charAt(i);
+      switch(c) {
+        case '*':  {
+          automata.add(BasicAutomata.makeAnyString());
+          break;
+        }
+        case '?': {
+          automata.add(BasicAutomata.makeAnyChar());
+          break;
+        }
+        default: {
+          automata.add(BasicAutomata.makeChar(c));
+        }
+      }
+    }
+    
+    return BasicOperations.concatenate(automata);
+  }
+}

Property changes on: contrib\regex\src\java\org\apache\lucene\search\regex\AutomatonWildcardQuery.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonQuery.java
===================================================================
--- contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonQuery.java	(revision 0)
+++ contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonQuery.java	(revision 0)
@@ -0,0 +1,262 @@
+package org.apache.lucene.search.regex;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+
+import dk.brics.automaton.Automaton;
+import dk.brics.automaton.BasicAutomata;
+import dk.brics.automaton.BasicOperations;
+
+public class TestAutomatonQuery extends LuceneTestCase {
+  private IndexSearcher searcher;
+
+  private final String FN = "field";
+
+  public void setUp() throws Exception {
+    super.setUp();
+    RAMDirectory directory = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true,
+        IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    Field titleField = new Field("title", "some title", Field.Store.NO,
+        Field.Index.ANALYZED);
+    Field field = new Field(FN, "this is document one", Field.Store.NO,
+        Field.Index.ANALYZED);
+    Field footerField = new Field("footer", "a footer", Field.Store.NO,
+        Field.Index.ANALYZED);
+    doc.add(titleField);
+    doc.add(field);
+    doc.add(footerField);
+    writer.addDocument(doc);
+    field.setValue("some text from doc two, a short piece.");
+    writer.addDocument(doc);
+    field.setValue("doc three has some different stuff");
+    writer.addDocument(doc);
+    writer.optimize();
+    writer.close();
+    searcher = new IndexSearcher(directory, true);
+  }
+
+  public void tearDown() throws Exception {
+    searcher.close();
+    super.tearDown();
+  }
+
+  private Term newTerm(String value) {
+    return new Term(FN, value);
+  }
+
+  private int automatonQueryNrHits(AutomatonQuery query) throws IOException {
+    return searcher.search(query, 5).totalHits;
+  }
+
+  private void assertAutomatonHits(int expected, Automaton automaton)
+      throws IOException {
+    AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton);
+
+    query.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    assertEquals(expected, automatonQueryNrHits(query));
+
+    query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    assertEquals(expected, automatonQueryNrHits(query));
+
+    query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    assertEquals(expected, automatonQueryNrHits(query));
+
+    query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+    assertEquals(expected, automatonQueryNrHits(query));
+  }
+
+  /**
+   * Test some very simple automata.
+   */
+  public void testBasicAutomata() throws IOException {
+    assertAutomatonHits(0, BasicAutomata.makeEmpty());
+    assertAutomatonHits(0, BasicAutomata.makeEmptyString());
+    assertAutomatonHits(1, BasicAutomata.makeAnyChar());
+    assertAutomatonHits(3, BasicAutomata.makeAnyString());
+    assertAutomatonHits(2, BasicAutomata.makeString("doc"));
+  }
+
+  /**
+   * Test that a nondeterministic automaton works correctly. (It should will be
+   * determinized)
+   */
+  public void testNFA() throws IOException {
+    // accept this or three, the union is an NFA (two transitions for 't' from
+    // initial state)
+    Automaton nfa = BasicOperations.union(BasicAutomata.makeString("this"),
+        BasicAutomata.makeString("three"));
+    assertAutomatonHits(2, nfa);
+  }
+
+  public void testEquals() {
+    AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
+        .makeString("foobar"));
+    // reference to a1
+    AutomatonQuery a2 = a1;
+    // same as a1 (accepts the same language, same term)
+    AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"), BasicOperations
+        .concatenate(BasicAutomata.makeString("foo"), BasicAutomata
+            .makeString("bar")));
+    // different than a1 (same term, but different language)
+    AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
+        .makeString("different"));
+    // different than a1 (different term, same language)
+    AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"), BasicAutomata
+        .makeString("foobar"));
+
+    assertEquals(a1, a2);
+    assertEquals(a1.hashCode(), a2.hashCode());
+
+    assertEquals(a1, a3);
+    assertEquals(a1.hashCode(), a3.hashCode());
+
+    assertEquals(a1.toString(), a3.toString());
+
+    // different class
+    AutomatonQuery w1 = new AutomatonWildcardQuery(newTerm("foobar"));
+    // different class
+    AutomatonQuery w2 = new AutomatonRegexQuery(newTerm("foobar"));
+
+    assertFalse(a1.equals(w1));
+    assertFalse(a1.equals(w2));
+    assertFalse(w1.equals(w2));
+    assertFalse(a1.equals(a4));
+    assertFalse(a1.equals(a5));
+    assertFalse(a1.equals(null));
+  }
+
+  /**
+   * Test that rewriting to a single term works as expected, preserves
+   * MultiTermQuery semantics.
+   */
+  public void testRewriteSingleTerm() throws IOException {
+    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"),
+        BasicAutomata.makeString("piece"));
+
+    aq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    aq.setBoost(0.9F);
+    Query rewritten = searcher.rewrite(aq);
+    assertTrue(rewritten instanceof TermQuery);
+    assertEquals(aq.getBoost(), rewritten.getBoost());
+    assertEquals(1, automatonQueryNrHits(aq));
+
+    aq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    aq.setBoost(0.8F);
+    rewritten = searcher.rewrite(aq);
+    assertTrue(rewritten instanceof ConstantScoreQuery);
+    assertEquals(aq.getBoost(), rewritten.getBoost());
+    assertEquals(1, automatonQueryNrHits(aq));
+
+    aq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    aq.setBoost(0.7F);
+    rewritten = searcher.rewrite(aq);
+    assertTrue(rewritten instanceof ConstantScoreQuery);
+    assertEquals(aq.getBoost(), rewritten.getBoost());
+    assertEquals(1, automatonQueryNrHits(aq));
+
+    aq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+    aq.setBoost(0.6F);
+    rewritten = searcher.rewrite(aq);
+    assertTrue(rewritten instanceof ConstantScoreQuery);
+    assertEquals(aq.getBoost(), rewritten.getBoost());
+    assertEquals(1, automatonQueryNrHits(aq));
+  }
+
+  /**
+   * Test that rewriting to a prefix query works as expected, preserves
+   * MultiTermQuery semantics.
+   */
+  public void testRewritePrefix() throws IOException {
+    Automaton pfx = BasicAutomata.makeString("do");
+    pfx.expandSingleton(); // expand singleton representation for testing
+    Automaton prefixAutomaton = BasicOperations.concatenate(pfx, BasicAutomata
+        .makeAnyString());
+    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"),
+        prefixAutomaton);
+    PrefixQuery expected = new PrefixQuery(newTerm("do"));
+
+    aq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    aq.setBoost(0.5F);
+    expected.setRewriteMethod(aq.getRewriteMethod());
+    expected.setBoost(aq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(aq));
+    assertEquals(3, automatonQueryNrHits(aq));
+
+    aq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    aq.setBoost(0.5F);
+    expected.setRewriteMethod(aq.getRewriteMethod());
+    expected.setBoost(aq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(aq));
+    assertEquals(3, automatonQueryNrHits(aq));
+
+    aq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    aq.setBoost(0.5F);
+    expected.setRewriteMethod(aq.getRewriteMethod());
+    expected.setBoost(aq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(aq));
+    assertEquals(3, automatonQueryNrHits(aq));
+
+    aq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+    aq.setBoost(0.5F);
+    expected.setRewriteMethod(aq.getRewriteMethod());
+    expected.setBoost(aq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(aq));
+    assertEquals(3, automatonQueryNrHits(aq));
+  }
+
+  /**
+   * Test that a badly-performing automaton that must visit all the terms does
+   * not use the smart enumeration, this will just waste cpu.
+   */
+  public void testLinearOptimization() throws IOException {
+    AutomatonQuery aq = new AutomatonRegexQuery(newTerm(".*ument"));
+
+    aq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    assertFalse(aq.getEnum(searcher.getIndexReader()) instanceof AutomatonTermEnum);
+    assertEquals(1, automatonQueryNrHits(aq));
+
+    aq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    assertFalse(aq.getEnum(searcher.getIndexReader()) instanceof AutomatonTermEnum);
+    assertEquals(1, automatonQueryNrHits(aq));
+
+    aq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    assertFalse(aq.getEnum(searcher.getIndexReader()) instanceof AutomatonTermEnum);
+    assertEquals(1, automatonQueryNrHits(aq));
+
+    aq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+    assertFalse(aq.getEnum(searcher.getIndexReader()) instanceof AutomatonTermEnum);
+    assertEquals(1, automatonQueryNrHits(aq));
+  }
+}

Property changes on: contrib\regex\src\test\org\apache\lucene\search\regex\TestAutomatonQuery.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonRegexQuery.java
===================================================================
--- contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonRegexQuery.java	(revision 0)
+++ contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonRegexQuery.java	(revision 0)
@@ -0,0 +1,98 @@
+package org.apache.lucene.search.regex;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Some simple regex tests, mostly converted from TestRegexQuery.
+ */
+public class TestAutomatonRegexQuery extends LuceneTestCase {
+  private IndexSearcher searcher;
+  private final String FN = "field";
+
+  public void setUp() throws Exception {
+    super.setUp();
+    RAMDirectory directory = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
+        true, IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    doc.add(new Field(FN,
+        "the quick brown fox jumps over the lazy كلب dog 493432 49344",
+        Field.Store.NO, Field.Index.ANALYZED));
+    writer.addDocument(doc);
+    writer.optimize();
+    writer.close();
+    searcher = new IndexSearcher(directory, true);
+  }
+
+  public void tearDown() throws Exception {
+    searcher.close();
+    super.tearDown();
+  }
+
+  private Term newTerm(String value) { return new Term(FN, value); }
+
+  private int regexQueryNrHits(String regex) throws IOException {
+    AutomatonRegexQuery query = new AutomatonRegexQuery( newTerm(regex));
+    return searcher.search(query, 5).totalHits;
+  }
+
+  public void testRegex1() throws IOException {
+    assertEquals(1, regexQueryNrHits("q.[aeiou]c.*"));
+  }
+
+  public void testRegex2() throws IOException {
+    assertEquals(0, regexQueryNrHits(".[aeiou]c.*"));
+  }
+
+  public void testRegex3() throws IOException {
+    assertEquals(0, regexQueryNrHits("q.[aeiou]c"));
+  }
+  
+  public void testDataTypes() throws IOException {
+    assertEquals(1, regexQueryNrHits("<Arabic>*"));
+    assertEquals(0, regexQueryNrHits("<Greek>*"));
+    assertEquals(1, regexQueryNrHits("<Nd>*"));
+  }
+  
+  public void testNumericRange() throws IOException {
+    assertEquals(1, regexQueryNrHits("<420000-600000>"));
+    assertEquals(0, regexQueryNrHits("<493433-600000>"));
+  }
+  
+  /**
+   * Test a corner case for backtracking:
+   * In this case the term dictionary has 493432 followed by 49344.
+   * When backtracking from 49343... to 4934, its necessary
+   * to test that 4934 itself is ok before trying to append more characters.
+   */
+  public void testBacktracking() throws IOException {
+    assertEquals(1, regexQueryNrHits("4934[314]"));
+  }
+}
+

Property changes on: contrib\regex\src\test\org\apache\lucene\search\regex\TestAutomatonRegexQuery.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonWildcardQuery.java
===================================================================
--- contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonWildcardQuery.java	(revision 0)
+++ contrib/regex/src/test/org/apache/lucene/search/regex/TestAutomatonWildcardQuery.java	(revision 0)
@@ -0,0 +1,319 @@
+package org.apache.lucene.search.regex;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Tests AutomatonWildcardQuery, mostly borrowing from TestWildcardQuery 
+ */
+public class TestAutomatonWildcardQuery extends LuceneTestCase {
+  
+  /**
+   * Tests if an AutomatonWildcardQuery that has no wildcard in the term is 
+   * rewritten to a simpler form (preserving MultiTermQuery semantics).
+   * This is actually done by AutomatonQuery... (see its tests)
+   */
+  public void testTermWithoutWildcard() throws IOException {
+    RAMDirectory indexStore = getIndexStore("field", new String[] {
+        "nowildcard", "nowildcardx" });
+    IndexSearcher searcher = new IndexSearcher(indexStore, true);
+
+    MultiTermQuery wq = new AutomatonWildcardQuery(new Term("field", "nowildcard"));
+    
+    wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    wq.setBoost(0.9F);
+    Query rewritten = searcher.rewrite(wq);
+    assertTrue(rewritten instanceof TermQuery);
+    assertEquals(rewritten.getBoost(), wq.getBoost());
+    assertMatches(searcher, wq, 1);
+    
+    wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    wq.setBoost(0.8F);
+    rewritten = searcher.rewrite(wq);
+    assertTrue(rewritten instanceof ConstantScoreQuery);
+    assertEquals(rewritten.getBoost(), wq.getBoost());
+    assertMatches(searcher, wq, 1);
+    
+    wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    wq.setBoost(0.7F);
+    rewritten = searcher.rewrite(wq);
+    assertTrue(rewritten instanceof ConstantScoreQuery);
+    assertEquals(rewritten.getBoost(), wq.getBoost());
+    assertMatches(searcher, wq, 1);
+    
+    wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+    wq.setBoost(0.6F);
+    rewritten = searcher.rewrite(wq);
+    assertTrue(rewritten instanceof ConstantScoreQuery);
+    assertEquals(rewritten.getBoost(), wq.getBoost());
+    assertMatches(searcher, wq, 1);
+  }
+  
+  /**
+   * Tests if an AutomatonWildcardQuery with a trailing * (a prefix query)
+   * is rewritten to a simpler form (preserving MultiTermQuery semantics).
+   * This is actually done by AutomatonQuery... (see its tests)
+   */
+  public void testTermShouldBePrefix() throws IOException {
+    RAMDirectory indexStore = getIndexStore("field", new String[] {
+        "nowildcard", "nowildcardx" });
+    IndexSearcher searcher = new IndexSearcher(indexStore, true);
+    MultiTermQuery wq = new AutomatonWildcardQuery(new Term("field", "nowild*"));
+    PrefixQuery expected = new PrefixQuery(new Term("field", "nowild"));
+    
+    wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    wq.setBoost(0.5F);
+    expected.setRewriteMethod(wq.getRewriteMethod());
+    expected.setBoost(wq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(wq));
+    assertMatches(searcher, wq, 2);
+    
+    wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    wq.setBoost(0.4F);
+    expected.setRewriteMethod(wq.getRewriteMethod());
+    expected.setBoost(wq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(wq));
+    assertMatches(searcher, wq, 2);
+    
+    wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    wq.setBoost(0.3F);
+    expected.setRewriteMethod(wq.getRewriteMethod());
+    expected.setBoost(wq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(wq));
+    assertMatches(searcher, wq, 2);
+    
+    wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+    wq.setBoost(0.2F);
+    expected.setRewriteMethod(wq.getRewriteMethod());
+    expected.setBoost(wq.getBoost());
+    assertEquals(searcher.rewrite(expected), searcher.rewrite(wq));
+    assertMatches(searcher, wq, 2);
+  }
+
+  /**
+   * Tests Wildcard queries with an asterisk.
+   */
+  public void testAsterisk() throws IOException {
+    RAMDirectory indexStore = getIndexStore("body", new String[]
+    {"metal", "metals"});
+    IndexSearcher searcher = new IndexSearcher(indexStore, true);
+    Query query1 = new TermQuery(new Term("body", "metal"));
+    Query query2 = new AutomatonWildcardQuery(new Term("body", "metal*"));
+    Query query3 = new AutomatonWildcardQuery(new Term("body", "m*tal"));
+    Query query4 = new AutomatonWildcardQuery(new Term("body", "m*tal*"));
+    Query query5 = new AutomatonWildcardQuery(new Term("body", "m*tals"));
+
+    BooleanQuery query6 = new BooleanQuery();
+    query6.add(query5, BooleanClause.Occur.SHOULD);
+
+    BooleanQuery query7 = new BooleanQuery();
+    query7.add(query3, BooleanClause.Occur.SHOULD);
+    query7.add(query5, BooleanClause.Occur.SHOULD);
+
+    // Queries do not automatically lower-case search terms:
+    Query query8 = new AutomatonWildcardQuery(new Term("body", "M*tal*"));
+
+    assertMatches(searcher, query1, 1);
+    assertMatches(searcher, query2, 2);
+    assertMatches(searcher, query3, 1);
+    assertMatches(searcher, query4, 2);
+    assertMatches(searcher, query5, 1);
+    assertMatches(searcher, query6, 1);
+    assertMatches(searcher, query7, 2);
+    assertMatches(searcher, query8, 0);
+    assertMatches(searcher, new AutomatonWildcardQuery(new Term("body", "*tall")), 0);
+    assertMatches(searcher, new AutomatonWildcardQuery(new Term("body", "*tal")), 1);
+    assertMatches(searcher, new AutomatonWildcardQuery(new Term("body", "*tal*")), 2);
+  }
+
+  /**
+   * Tests Wildcard queries with a question mark.
+   *
+   * @throws IOException if an error occurs
+   */
+  public void testQuestionmark() throws IOException {
+    RAMDirectory indexStore = getIndexStore("body", new String[]
+    {"metal", "metals", "mXtals", "mXtXls"});
+    IndexSearcher searcher = new IndexSearcher(indexStore, true);
+    Query query1 = new AutomatonWildcardQuery(new Term("body", "m?tal"));
+    Query query2 = new AutomatonWildcardQuery(new Term("body", "metal?"));
+    Query query3 = new AutomatonWildcardQuery(new Term("body", "metals?"));
+    Query query4 = new AutomatonWildcardQuery(new Term("body", "m?t?ls"));
+    Query query5 = new AutomatonWildcardQuery(new Term("body", "M?t?ls"));
+    Query query6 = new AutomatonWildcardQuery(new Term("body", "meta??"));
+    
+    assertMatches(searcher, query1, 1); 
+    assertMatches(searcher, query2, 1);
+    assertMatches(searcher, query3, 0);
+    assertMatches(searcher, query4, 3);
+    assertMatches(searcher, query5, 0);
+    assertMatches(searcher, query6, 1); // Query: 'meta??' matches 'metals' not 'metal'
+  }
+
+  private RAMDirectory getIndexStore(String field, String[] contents)
+      throws IOException {
+    RAMDirectory indexStore = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(indexStore, new SimpleAnalyzer(),
+        true, IndexWriter.MaxFieldLength.LIMITED);
+    for (int i = 0; i < contents.length; ++i) {
+      Document doc = new Document();
+      doc.add(new Field(field, contents[i], Field.Store.YES, Field.Index.ANALYZED));
+      writer.addDocument(doc);
+    }
+    writer.optimize();
+    writer.close();
+
+    return indexStore;
+  }
+
+  private void assertMatches(IndexSearcher searcher, Query q, int expectedMatches)
+      throws IOException {
+    ScoreDoc[] result = searcher.search(q, null, 1000).scoreDocs;
+    assertEquals(expectedMatches, result.length);
+  }
+
+  /**
+   * Test that wild card queries are parsed to the correct type and are searched correctly.
+   * This test looks at both parsing and execution of wildcard queries.
+   * Although placed here, it also tests prefix queries, verifying that
+   * prefix queries are not parsed into wild card queries, and viceversa.
+   * @throws Exception
+   */
+  public void testParsingAndSearching() throws Exception {
+    String field = "content";
+    boolean dbg = false;
+    
+    // A queryparser that uses AutomatonWildCardQuery
+    // for faster WildcardQuerys
+    QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer()) {
+
+      @Override
+      protected Query newWildcardQuery(Term t) {
+        return new AutomatonWildcardQuery(t);
+      }
+      
+    };
+    
+    qp.setAllowLeadingWildcard(true);
+    String docs[] = {
+        "\\ abcdefg1",
+        "\\79 hijklmn1",
+        "\\\\ opqrstu1",
+    };
+    // queries that should find all docs
+    String matchAll[] = {
+        "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*"
+    };
+    // queries that should find no docs
+    String matchNone[] = {
+        "a*h", "a?h", "*a*h", "?a", "a?",
+    };
+    // queries that should be parsed to prefix queries
+    String matchOneDocPrefix[][] = {
+        {"a*", "ab*", "abc*", }, // these should find only doc 0 
+        {"h*", "hi*", "hij*", "\\\\7*"}, // these should find only doc 1
+        {"o*", "op*", "opq*", "\\\\\\\\*"}, // these should find only doc 2
+    };
+    // queries that should be parsed to wildcard queries
+    String matchOneDocWild[][] = {
+        {"*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1", "abc**"}, // these should find only doc 0
+        {"*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1", "hij**"}, // these should find only doc 1
+        {"*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1", "opq**"}, // these should find only doc 2
+    };
+
+    // prepare the index
+    RAMDirectory dir = new RAMDirectory();
+    IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
+    for (int i = 0; i < docs.length; i++) {
+      Document doc = new Document();
+      doc.add(new Field(field,docs[i],Store.NO,Index.ANALYZED));
+      iw.addDocument(doc);
+    }
+    iw.close();
+    
+    IndexSearcher searcher = new IndexSearcher(dir, true);
+    
+    // test queries that must find all
+    for (int i = 0; i < matchAll.length; i++) {
+      String qtxt = matchAll[i];
+      Query q = qp.parse(qtxt);
+      if (dbg) System.out.println("matchAll: qtxt="+qtxt+" q="+q+" "+q.getClass().getName());
+      ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
+      assertEquals(docs.length,hits.length);
+    }
+    
+    // test queries that must find none
+    for (int i = 0; i < matchNone.length; i++) {
+      String qtxt = matchNone[i];
+      Query q = qp.parse(qtxt);
+      if (dbg) System.out.println("matchNone: qtxt="+qtxt+" q="+q+" "+q.getClass().getName());
+      ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
+      assertEquals(0,hits.length);
+    }
+
+    // test queries that must be prefix queries and must find only one doc
+    for (int i = 0; i < matchOneDocPrefix.length; i++) {
+      for (int j = 0; j < matchOneDocPrefix[i].length; j++) {
+        String qtxt = matchOneDocPrefix[i][j];
+        Query q = qp.parse(qtxt);
+        if (dbg) System.out.println("match 1 prefix: doc="+docs[i]+" qtxt="+qtxt+" q="+q+" "+q.getClass().getName());
+        assertEquals(PrefixQuery.class, q.getClass());
+        ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
+        assertEquals(1,hits.length);
+        assertEquals(i,hits[0].doc);
+      }
+    }
+
+    // test queries that must be wildcard queries and must find only one doc
+    for (int i = 0; i < matchOneDocPrefix.length; i++) {
+      for (int j = 0; j < matchOneDocWild[i].length; j++) {
+        String qtxt = matchOneDocWild[i][j];
+        Query q = qp.parse(qtxt);
+        if (dbg) System.out.println("match 1 wild: doc="+docs[i]+" qtxt="+qtxt+" q="+q+" "+q.getClass().getName());
+        assertEquals(AutomatonWildcardQuery.class, q.getClass());
+        ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
+        assertEquals(1,hits.length);
+        assertEquals(i,hits[0].doc);
+      }
+    }
+
+    searcher.close();
+  }
+}

Property changes on: contrib\regex\src\test\org\apache\lucene\search\regex\TestAutomatonWildcardQuery.java
___________________________________________________________________
Added: svn:eol-style
   + native

