Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1342541) +++ lucene/CHANGES.txt (working copy) @@ -17,6 +17,8 @@ * LUCENE-4023: DisjunctionMaxScorer now implements visitSubScorers(). (Uwe Schindler) +* LUCENE-2566: + - operators allow any amount of whitespace (yonik, janhoy) + API Changes * LUCENE-4023: Changed the visibility of Scorer#visitSubScorers() to Index: lucene/core/src/java/org/apache/lucene/queryParser/QueryParserConstants.java =================================================================== --- lucene/core/src/java/org/apache/lucene/queryParser/QueryParserConstants.java (revision 1342541) +++ lucene/core/src/java/org/apache/lucene/queryParser/QueryParserConstants.java (working copy) @@ -33,47 +33,49 @@ /** RegularExpression Id. */ int MINUS = 12; /** RegularExpression Id. */ - int LPAREN = 13; + int BAREOPER = 13; /** RegularExpression Id. */ - int RPAREN = 14; + int LPAREN = 14; /** RegularExpression Id. */ - int COLON = 15; + int RPAREN = 15; /** RegularExpression Id. */ - int STAR = 16; + int COLON = 16; /** RegularExpression Id. */ - int CARAT = 17; + int STAR = 17; /** RegularExpression Id. */ - int QUOTED = 18; + int CARAT = 18; /** RegularExpression Id. */ - int TERM = 19; + int QUOTED = 19; /** RegularExpression Id. */ - int FUZZY_SLOP = 20; + int TERM = 20; /** RegularExpression Id. */ - int PREFIXTERM = 21; + int FUZZY_SLOP = 21; /** RegularExpression Id. */ - int WILDTERM = 22; + int PREFIXTERM = 22; /** RegularExpression Id. */ - int RANGEIN_START = 23; + int WILDTERM = 23; /** RegularExpression Id. */ - int RANGEEX_START = 24; + int RANGEIN_START = 24; /** RegularExpression Id. */ - int NUMBER = 25; + int RANGEEX_START = 25; /** RegularExpression Id. */ - int RANGEIN_TO = 26; + int NUMBER = 26; /** RegularExpression Id. */ - int RANGEIN_END = 27; + int RANGEIN_TO = 27; /** RegularExpression Id. */ - int RANGEIN_QUOTED = 28; + int RANGEIN_END = 28; /** RegularExpression Id. */ - int RANGEIN_GOOP = 29; + int RANGEIN_QUOTED = 29; /** RegularExpression Id. */ - int RANGEEX_TO = 30; + int RANGEIN_GOOP = 30; /** RegularExpression Id. */ - int RANGEEX_END = 31; + int RANGEEX_TO = 31; /** RegularExpression Id. */ - int RANGEEX_QUOTED = 32; + int RANGEEX_END = 32; /** RegularExpression Id. */ - int RANGEEX_GOOP = 33; + int RANGEEX_QUOTED = 33; + /** RegularExpression Id. */ + int RANGEEX_GOOP = 34; /** Lexical state. */ int Boost = 0; @@ -99,6 +101,7 @@ "", "\"+\"", "\"-\"", + "", "\"(\"", "\")\"", "\":\"", Index: lucene/core/src/java/org/apache/lucene/queryParser/Token.java =================================================================== --- lucene/core/src/java/org/apache/lucene/queryParser/Token.java (revision 1342541) +++ lucene/core/src/java/org/apache/lucene/queryParser/Token.java (working copy) @@ -1,14 +1,21 @@ -/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */ -/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */ +/* Generated By:JavaCC: Do not edit this line. Token.java Version 5.0 */ +/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ package org.apache.lucene.queryParser; /** * Describes the input token stream. */ -public class Token { +public class Token implements java.io.Serializable { /** + * The version identifier for this Serializable class. + * Increment only if the serialized form of the + * class changes. + */ + private static final long serialVersionUID = 1L; + + /** * An integer that describes the kind of this token. This numbering * system is determined by JavaCCParser, and a table of these numbers is * stored in the file ...Constants.java. @@ -75,7 +82,7 @@ */ public Token(int kind) { - this(kind, null); + this(kind, null); } /** @@ -83,8 +90,8 @@ */ public Token(int kind, String image) { - this.kind = kind; - this.image = image; + this.kind = kind; + this.image = image; } /** @@ -92,7 +99,7 @@ */ public String toString() { - return image; + return image; } /** @@ -109,16 +116,16 @@ */ public static Token newToken(int ofKind, String image) { - switch(ofKind) - { - default : return new Token(ofKind, image); - } + switch(ofKind) + { + default : return new Token(ofKind, image); + } } public static Token newToken(int ofKind) { - return newToken(ofKind, null); + return newToken(ofKind, null); } } -/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */ +/* JavaCC - OriginalChecksum=da95d0ec7daad286fab4e748b17294d8 (do not edit this line) */ Index: lucene/core/src/java/org/apache/lucene/queryParser/TokenMgrError.java =================================================================== --- lucene/core/src/java/org/apache/lucene/queryParser/TokenMgrError.java (revision 1342541) +++ lucene/core/src/java/org/apache/lucene/queryParser/TokenMgrError.java (working copy) @@ -1,141 +1,147 @@ -/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */ +/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 5.0 */ /* JavaCCOptions: */ package org.apache.lucene.queryParser; /** Token Manager Error. */ -@SuppressWarnings("serial") public class TokenMgrError extends Error { - /* - * Ordinals for various reasons why an Error of this type can be thrown. - */ + /** + * The version identifier for this Serializable class. + * Increment only if the serialized form of the + * class changes. + */ + private static final long serialVersionUID = 1L; - /** - * Lexical error occurred. - */ - static final int LEXICAL_ERROR = 0; + /* + * Ordinals for various reasons why an Error of this type can be thrown. + */ - /** - * An attempt was made to create a second instance of a static token manager. - */ - static final int STATIC_LEXER_ERROR = 1; + /** + * Lexical error occurred. + */ + static final int LEXICAL_ERROR = 0; - /** - * Tried to change to an invalid lexical state. - */ - static final int INVALID_LEXICAL_STATE = 2; + /** + * An attempt was made to create a second instance of a static token manager. + */ + static final int STATIC_LEXER_ERROR = 1; - /** - * Detected (and bailed out of) an infinite loop in the token manager. - */ - static final int LOOP_DETECTED = 3; + /** + * Tried to change to an invalid lexical state. + */ + static final int INVALID_LEXICAL_STATE = 2; - /** - * Indicates the reason why the exception is thrown. It will have - * one of the above 4 values. - */ - int errorCode; + /** + * Detected (and bailed out of) an infinite loop in the token manager. + */ + static final int LOOP_DETECTED = 3; - /** - * Replaces unprintable characters by their escaped (or unicode escaped) - * equivalents in the given string - */ - protected static final String addEscapes(String str) { - StringBuffer retval = new StringBuffer(); - char ch; - for (int i = 0; i < str.length(); i++) { - switch (str.charAt(i)) - { - case 0 : - continue; - case '\b': - retval.append("\\b"); - continue; - case '\t': - retval.append("\\t"); - continue; - case '\n': - retval.append("\\n"); - continue; - case '\f': - retval.append("\\f"); - continue; - case '\r': - retval.append("\\r"); - continue; - case '\"': - retval.append("\\\""); - continue; - case '\'': - retval.append("\\\'"); - continue; - case '\\': - retval.append("\\\\"); - continue; - default: - if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { - String s = "0000" + Integer.toString(ch, 16); - retval.append("\\u" + s.substring(s.length() - 4, s.length())); - } else { - retval.append(ch); - } - continue; - } + /** + * Indicates the reason why the exception is thrown. It will have + * one of the above 4 values. + */ + int errorCode; + + /** + * Replaces unprintable characters by their escaped (or unicode escaped) + * equivalents in the given string + */ + protected static final String addEscapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); + continue; + case '\t': + retval.append("\\t"); + continue; + case '\n': + retval.append("\\n"); + continue; + case '\f': + retval.append("\\f"); + continue; + case '\r': + retval.append("\\r"); + continue; + case '\"': + retval.append("\\\""); + continue; + case '\'': + retval.append("\\\'"); + continue; + case '\\': + retval.append("\\\\"); + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); + } else { + retval.append(ch); + } + continue; } - return retval.toString(); - } + } + return retval.toString(); + } - /** - * Returns a detailed message for the Error when it is thrown by the - * token manager to indicate a lexical error. - * Parameters : - * EOFSeen : indicates if EOF caused the lexical error - * curLexState : lexical state in which this error occurred - * errorLine : line number when the error occurred - * errorColumn : column number when the error occurred - * errorAfter : prefix that was seen before this error occurred - * curchar : the offending character - * Note: You can customize the lexical error message by modifying this method. - */ - protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { - return("Lexical error at line " + - errorLine + ", column " + - errorColumn + ". Encountered: " + - (EOFSeen ? " " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + - "after : \"" + addEscapes(errorAfter) + "\""); - } + /** + * Returns a detailed message for the Error when it is thrown by the + * token manager to indicate a lexical error. + * Parameters : + * EOFSeen : indicates if EOF caused the lexical error + * curLexState : lexical state in which this error occurred + * errorLine : line number when the error occurred + * errorColumn : column number when the error occurred + * errorAfter : prefix that was seen before this error occurred + * curchar : the offending character + * Note: You can customize the lexical error message by modifying this method. + */ + protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { + return("Lexical error at line " + + errorLine + ", column " + + errorColumn + ". Encountered: " + + (EOFSeen ? " " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + + "after : \"" + addEscapes(errorAfter) + "\""); + } - /** - * You can also modify the body of this method to customize your error messages. - * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not - * of end-users concern, so you can return something like : - * - * "Internal Error : Please file a bug report .... " - * - * from this method for such cases in the release version of your parser. - */ - public String getMessage() { - return super.getMessage(); - } + /** + * You can also modify the body of this method to customize your error messages. + * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + * of end-users concern, so you can return something like : + * + * "Internal Error : Please file a bug report .... " + * + * from this method for such cases in the release version of your parser. + */ + public String getMessage() { + return super.getMessage(); + } - /* - * Constructors of various flavors follow. - */ + /* + * Constructors of various flavors follow. + */ - /** No arg constructor. */ - public TokenMgrError() { - } + /** No arg constructor. */ + public TokenMgrError() { + } - /** Constructor with message and reason. */ - public TokenMgrError(String message, int reason) { - super(message); - errorCode = reason; - } + /** Constructor with message and reason. */ + public TokenMgrError(String message, int reason) { + super(message); + errorCode = reason; + } - /** Full Constructor. */ - public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { - this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); - } + /** Full Constructor. */ + public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { + this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); + } } -/* JavaCC - OriginalChecksum=334e679cf1a88b3070bb8e3d80ee3f5e (do not edit this line) */ +/* JavaCC - OriginalChecksum=03df10dce345f1870429faa756473d14 (do not edit this line) */ Index: lucene/core/src/java/org/apache/lucene/queryParser/QueryParser.java =================================================================== --- lucene/core/src/java/org/apache/lucene/queryParser/QueryParser.java (revision 1342541) +++ lucene/core/src/java/org/apache/lucene/queryParser/QueryParser.java (working copy) @@ -1097,7 +1097,7 @@ } lastCharWasEscapeChar = false; } else { - if (curChar == '\\') { + if (curChar == '\u005c\u005c') { lastCharWasEscapeChar = true; } else { output[length] = curChar; @@ -1139,10 +1139,10 @@ for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); // These characters are part of the query syntax and must be escaped - if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' - || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' + if (c == '\u005c\u005c' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' + || c == '^' || c == '[' || c == ']' || c == '\u005c"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&') { - sb.append('\\'); + sb.append('\u005c\u005c'); } sb.append(c); } @@ -1254,6 +1254,7 @@ case NOT: case PLUS: case MINUS: + case BAREOPER: case LPAREN: case STAR: case QUOTED: @@ -1306,6 +1307,7 @@ ; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case BAREOPER: case STAR: case QUOTED: case TERM: @@ -1353,6 +1355,7 @@ boolean fuzzy = false; Query q; switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case BAREOPER: case STAR: case TERM: case PREFIXTERM: @@ -1377,6 +1380,10 @@ case NUMBER: term = jj_consume_token(NUMBER); break; + case BAREOPER: + term = jj_consume_token(BAREOPER); + term.image = term.image.substring(0,1); + break; default: jj_la1[8] = jj_gen; jj_consume_token(-1); @@ -1611,12 +1618,6 @@ finally { jj_save(0, xla); } } - private boolean jj_3R_2() { - if (jj_scan_token(TERM)) return true; - if (jj_scan_token(COLON)) return true; - return false; - } - private boolean jj_3_1() { Token xsp; xsp = jj_scanpos; @@ -1633,6 +1634,12 @@ return false; } + private boolean jj_3R_2() { + if (jj_scan_token(TERM)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + /** Generated Token Manager. */ public QueryParserTokenManager token_source; /** Current token. */ @@ -1651,10 +1658,10 @@ jj_la1_init_1(); } private static void jj_la1_init_0() { - jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x3ed3f00,0x90000,0x20000,0x3ed2000,0x2690000,0x100000,0x100000,0x20000,0x30000000,0x4000000,0x30000000,0x20000,0x0,0x40000000,0x0,0x20000,0x100000,0x20000,0x3ed0000,}; + jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x7da7f00,0x120000,0x40000,0x7da6000,0x4d22000,0x200000,0x200000,0x40000,0x60000000,0x8000000,0x60000000,0x40000,0x0,0x80000000,0x0,0x40000,0x200000,0x40000,0x7da2000,}; } private static void jj_la1_init_1() { - jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3,0x0,0x3,0x0,0x0,0x0,0x0,}; + jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x6,0x0,0x6,0x0,0x0,0x0,0x0,}; } final private JJCalls[] jj_2_rtns = new JJCalls[1]; private boolean jj_rescan = false; @@ -1789,7 +1796,7 @@ for (int i = 0; i < jj_endpos; i++) { jj_expentry[i] = jj_lasttokens[i]; } - jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) { + jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) { int[] oldentry = (int[])(it.next()); if (oldentry.length == jj_expentry.length) { for (int i = 0; i < jj_expentry.length; i++) { @@ -1808,7 +1815,7 @@ /** Generate ParseException. */ public ParseException generateParseException() { jj_expentries.clear(); - boolean[] la1tokens = new boolean[34]; + boolean[] la1tokens = new boolean[35]; if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; @@ -1825,7 +1832,7 @@ } } } - for (int i = 0; i < 34; i++) { + for (int i = 0; i < 35; i++) { if (la1tokens[i]) { jj_expentry = new int[1]; jj_expentry[0] = i; Index: lucene/core/src/java/org/apache/lucene/queryParser/QueryParser.jj =================================================================== --- lucene/core/src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 1342541) +++ lucene/core/src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy) @@ -1218,6 +1218,7 @@ | | | +| > | | | @@ -1360,6 +1361,7 @@ | term= { prefix=true; } | term= { wildcard=true; } | term= + | term= { term.image = term.image.substring(0,1); } ) [ fuzzySlop= { fuzzy=true; } ] [ boost= [ fuzzySlop= { fuzzy=true; } ] ] Index: lucene/core/src/java/org/apache/lucene/queryParser/CharStream.java =================================================================== --- lucene/core/src/java/org/apache/lucene/queryParser/CharStream.java (revision 1342541) +++ lucene/core/src/java/org/apache/lucene/queryParser/CharStream.java (working copy) @@ -1,5 +1,5 @@ -/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */ -/* JavaCCOptions:STATIC=false */ +/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 5.0 */ +/* JavaCCOptions:STATIC=false,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ package org.apache.lucene.queryParser; /** @@ -17,7 +17,8 @@ * operation. */ -public interface CharStream { +public +interface CharStream { /** * Returns the next character from the selected input. The method @@ -26,6 +27,7 @@ */ char readChar() throws java.io.IOException; + @Deprecated /** * Returns the column position of the character last read. * @deprecated @@ -33,6 +35,7 @@ */ int getColumn(); + @Deprecated /** * Returns the line number of the character last read. * @deprecated @@ -109,4 +112,4 @@ void Done(); } -/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */ +/* JavaCC - OriginalChecksum=6b854f7f279fcc2b052037ffc369be2d (do not edit this line) */ Index: lucene/core/src/java/org/apache/lucene/queryParser/ParseException.java =================================================================== --- lucene/core/src/java/org/apache/lucene/queryParser/ParseException.java (revision 1342541) +++ lucene/core/src/java/org/apache/lucene/queryParser/ParseException.java (working copy) @@ -1,4 +1,4 @@ -/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */ +/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 5.0 */ /* JavaCCOptions:KEEP_LINE_COL=null */ package org.apache.lucene.queryParser; @@ -14,24 +14,24 @@ public class ParseException extends Exception { /** + * The version identifier for this Serializable class. + * Increment only if the serialized form of the + * class changes. + */ + private static final long serialVersionUID = 1L; + + /** * This constructor is used by the method "generateParseException" * in the generated parser. Calling this constructor generates * a new object of this type with the fields "currentToken", - * "expectedTokenSequences", and "tokenImage" set. The boolean - * flag "specialConstructor" is also set to true to indicate that - * this constructor was used to create this object. - * This constructor calls its super class with the empty string - * to force the "toString" method of parent class "Throwable" to - * print the error message in the form: - * ParseException: + * "expectedTokenSequences", and "tokenImage" set. */ public ParseException(Token currentTokenVal, int[][] expectedTokenSequencesVal, String[] tokenImageVal ) { - super(""); - specialConstructor = true; + super(initialise(currentTokenVal, expectedTokenSequencesVal, tokenImageVal)); currentToken = currentTokenVal; expectedTokenSequences = expectedTokenSequencesVal; tokenImage = tokenImageVal; @@ -49,21 +49,13 @@ public ParseException() { super(); - specialConstructor = false; } /** Constructor with message. */ public ParseException(String message) { super(message); - specialConstructor = false; } - /** - * This variable determines which constructor was used to create - * this object and thereby affects the semantics of the - * "getMessage" method (see below). - */ - protected boolean specialConstructor; /** * This is the last token that has been consumed successfully. If @@ -87,19 +79,16 @@ public String[] tokenImage; /** - * This method has the standard behavior when this object has been - * created using the standard constructors. Otherwise, it uses - * "currentToken" and "expectedTokenSequences" to generate a parse + * It uses "currentToken" and "expectedTokenSequences" to generate a parse * error message and returns it. If this object has been created * due to a parse error, and you do not catch it (it gets thrown - * from the parser), then this method is called during the printing - * of the final stack trace, and hence the correct error message + * from the parser) the correct error message * gets displayed. */ - public String getMessage() { - if (!specialConstructor) { - return super.getMessage(); - } + private static String initialise(Token currentToken, + int[][] expectedTokenSequences, + String[] tokenImage) { + String eol = System.getProperty("line.separator", "\n"); StringBuffer expected = new StringBuffer(); int maxSize = 0; for (int i = 0; i < expectedTokenSequences.length; i++) { @@ -149,7 +138,7 @@ * when these raw version cannot be used as part of an ASCII * string literal. */ - protected String add_escapes(String str) { + static String add_escapes(String str) { StringBuffer retval = new StringBuffer(); char ch; for (int i = 0; i < str.length(); i++) { @@ -195,4 +184,4 @@ } } -/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */ +/* JavaCC - OriginalChecksum=2e7670d6260cd2ac6c9cbda0075541b7 (do not edit this line) */ Index: lucene/core/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java =================================================================== --- lucene/core/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 1342541) +++ lucene/core/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy) @@ -65,23 +65,23 @@ switch(curChar) { case 40: - return jjStopAtPos(0, 13); + return jjStopAtPos(0, 14); case 41: - return jjStopAtPos(0, 14); + return jjStopAtPos(0, 15); case 42: - return jjStartNfaWithStates_3(0, 16, 36); + return jjStartNfaWithStates_3(0, 17, 38); case 43: - return jjStopAtPos(0, 11); + return jjStartNfaWithStates_3(0, 11, 15); case 45: - return jjStopAtPos(0, 12); + return jjStartNfaWithStates_3(0, 12, 15); case 58: - return jjStopAtPos(0, 15); + return jjStopAtPos(0, 16); case 91: - return jjStopAtPos(0, 23); + return jjStopAtPos(0, 24); case 94: - return jjStopAtPos(0, 17); + return jjStopAtPos(0, 18); case 123: - return jjStopAtPos(0, 24); + return jjStopAtPos(0, 25); default : return jjMoveNfa_3(0, 0); } @@ -109,7 +109,7 @@ private int jjMoveNfa_3(int startState, int curPos) { int startsAt = 0; - jjnewStateCnt = 36; + jjnewStateCnt = 38; int i = 1; jjstateSet[0] = startState; int kind = 0x7fffffff; @@ -124,47 +124,49 @@ { switch(jjstateSet[--i]) { - case 36: - case 25: - if ((0xfbfffcf8ffffd9ffL & l) == 0L) - break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; case 0: if ((0xfbffd4f8ffffd9ffL & l) != 0L) { - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); } else if ((0x100002600L & l) != 0L) { if (kind > 7) kind = 7; } + else if ((0x280200000000L & l) != 0L) + jjstateSet[jjnewStateCnt++] = 15; else if (curChar == 34) jjCheckNAddStates(0, 2); - else if (curChar == 33) - { - if (kind > 10) - kind = 10; - } if ((0x7bffd0f8ffffd9ffL & l) != 0L) { - if (kind > 19) - kind = 19; + if (kind > 20) + kind = 20; jjCheckNAddStates(3, 7); } else if (curChar == 42) { - if (kind > 21) - kind = 21; + if (kind > 22) + kind = 22; } + else if (curChar == 33) + { + if (kind > 10) + kind = 10; + } if (curChar == 38) jjstateSet[jjnewStateCnt++] = 4; break; + case 38: + case 27: + if ((0xfbfffcf8ffffd9ffL & l) == 0L) + break; + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); + break; case 4: if (curChar == 38 && kind > 8) kind = 8; @@ -178,78 +180,86 @@ kind = 10; break; case 14: + if ((0x280200000000L & l) != 0L) + jjstateSet[jjnewStateCnt++] = 15; + break; + case 15: + if ((0x100002600L & l) != 0L && kind > 13) + kind = 13; + break; + case 16: if (curChar == 34) jjCheckNAddStates(0, 2); break; - case 15: + case 17: if ((0xfffffffbffffffffL & l) != 0L) jjCheckNAddStates(0, 2); break; - case 17: + case 19: jjCheckNAddStates(0, 2); break; - case 18: - if (curChar == 34 && kind > 18) - kind = 18; + case 20: + if (curChar == 34 && kind > 19) + kind = 19; break; - case 20: + case 22: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 20) - kind = 20; + if (kind > 21) + kind = 21; jjAddStates(8, 9); break; - case 21: + case 23: if (curChar == 46) - jjCheckNAdd(22); + jjCheckNAdd(24); break; - case 22: + case 24: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 20) - kind = 20; - jjCheckNAdd(22); - break; - case 23: - if (curChar == 42 && kind > 21) + if (kind > 21) kind = 21; + jjCheckNAdd(24); break; - case 24: + case 25: + if (curChar == 42 && kind > 22) + kind = 22; + break; + case 26: if ((0xfbffd4f8ffffd9ffL & l) == 0L) break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); break; - case 27: - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); + case 29: + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); break; - case 28: + case 30: if ((0x7bffd0f8ffffd9ffL & l) == 0L) break; - if (kind > 19) - kind = 19; + if (kind > 20) + kind = 20; jjCheckNAddStates(3, 7); break; - case 29: + case 31: if ((0x7bfff8f8ffffd9ffL & l) == 0L) break; - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(31, 32); break; - case 31: - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); + case 33: + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(31, 32); break; - case 32: + case 34: if ((0x7bfff8f8ffffd9ffL & l) != 0L) jjCheckNAddStates(10, 12); break; - case 34: + case 36: jjCheckNAddStates(10, 12); break; default : break; @@ -263,36 +273,26 @@ { switch(jjstateSet[--i]) { - case 36: - if ((0x97ffffff87ffffffL & l) != 0L) - { - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - } - else if (curChar == 92) - jjCheckNAddTwoStates(27, 27); - break; case 0: if ((0x97ffffff87ffffffL & l) != 0L) { - if (kind > 19) - kind = 19; + if (kind > 20) + kind = 20; jjCheckNAddStates(3, 7); } else if (curChar == 92) jjCheckNAddStates(13, 15); else if (curChar == 126) { - if (kind > 20) - kind = 20; - jjstateSet[jjnewStateCnt++] = 20; + if (kind > 21) + kind = 21; + jjstateSet[jjnewStateCnt++] = 22; } if ((0x97ffffff87ffffffL & l) != 0L) { - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); } if (curChar == 78) jjstateSet[jjnewStateCnt++] = 11; @@ -303,6 +303,16 @@ else if (curChar == 65) jjstateSet[jjnewStateCnt++] = 2; break; + case 38: + if ((0x97ffffff87ffffffL & l) != 0L) + { + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); + } + else if (curChar == 92) + jjCheckNAddTwoStates(29, 29); + break; case 1: if (curChar == 68 && kind > 8) kind = 8; @@ -343,82 +353,82 @@ if (curChar == 78) jjstateSet[jjnewStateCnt++] = 11; break; - case 15: + case 17: if ((0xffffffffefffffffL & l) != 0L) jjCheckNAddStates(0, 2); break; - case 16: + case 18: if (curChar == 92) - jjstateSet[jjnewStateCnt++] = 17; + jjstateSet[jjnewStateCnt++] = 19; break; - case 17: + case 19: jjCheckNAddStates(0, 2); break; - case 19: + case 21: if (curChar != 126) break; - if (kind > 20) - kind = 20; - jjstateSet[jjnewStateCnt++] = 20; + if (kind > 21) + kind = 21; + jjstateSet[jjnewStateCnt++] = 22; break; - case 24: + case 26: if ((0x97ffffff87ffffffL & l) == 0L) break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); break; - case 25: + case 27: if ((0x97ffffff87ffffffL & l) == 0L) break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); break; - case 26: + case 28: if (curChar == 92) - jjCheckNAddTwoStates(27, 27); + jjCheckNAddTwoStates(29, 29); break; - case 27: - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); + case 29: + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); break; - case 28: + case 30: if ((0x97ffffff87ffffffL & l) == 0L) break; - if (kind > 19) - kind = 19; + if (kind > 20) + kind = 20; jjCheckNAddStates(3, 7); break; - case 29: + case 31: if ((0x97ffffff87ffffffL & l) == 0L) break; - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(31, 32); break; - case 30: + case 32: if (curChar == 92) - jjCheckNAddTwoStates(31, 31); + jjCheckNAddTwoStates(33, 33); break; - case 31: - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); + case 33: + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(31, 32); break; - case 32: + case 34: if ((0x97ffffff87ffffffL & l) != 0L) jjCheckNAddStates(10, 12); break; - case 33: + case 35: if (curChar == 92) - jjCheckNAddTwoStates(34, 34); + jjCheckNAddTwoStates(36, 36); break; - case 34: + case 36: jjCheckNAddStates(10, 12); break; - case 35: + case 37: if (curChar == 92) jjCheckNAddStates(13, 15); break; @@ -437,14 +447,6 @@ { switch(jjstateSet[--i]) { - case 36: - case 25: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) - break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { @@ -453,62 +455,74 @@ } if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); } if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { - if (kind > 19) - kind = 19; + if (kind > 20) + kind = 20; jjCheckNAddStates(3, 7); } break; + case 38: + case 27: + if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) + break; + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); + break; case 15: + if (jjCanMove_0(hiByte, i1, i2, l1, l2) && kind > 13) + kind = 13; + break; case 17: + case 19: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) jjCheckNAddStates(0, 2); break; - case 24: + case 26: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); break; - case 27: + case 29: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); + if (kind > 23) + kind = 23; + jjCheckNAddTwoStates(27, 28); break; - case 28: + case 30: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; - if (kind > 19) - kind = 19; + if (kind > 20) + kind = 20; jjCheckNAddStates(3, 7); break; - case 29: + case 31: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(31, 32); break; - case 31: + case 33: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(31, 32); break; - case 32: + case 34: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) jjCheckNAddStates(10, 12); break; - case 34: + case 36: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) jjCheckNAddStates(10, 12); break; @@ -523,7 +537,7 @@ kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 36 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 38 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } @@ -534,9 +548,9 @@ switch (pos) { case 0: - if ((active0 & 0x40000000L) != 0L) + if ((active0 & 0x80000000L) != 0L) { - jjmatchedKind = 33; + jjmatchedKind = 34; return 6; } return -1; @@ -553,9 +567,9 @@ switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_1(0x40000000L); + return jjMoveStringLiteralDfa1_1(0x80000000L); case 125: - return jjStopAtPos(0, 31); + return jjStopAtPos(0, 32); default : return jjMoveNfa_1(0, 0); } @@ -570,8 +584,8 @@ switch(curChar) { case 79: - if ((active0 & 0x40000000L) != 0L) - return jjStartNfaWithStates_1(1, 30, 6); + if ((active0 & 0x80000000L) != 0L) + return jjStartNfaWithStates_1(1, 31, 6); break; default : break; @@ -607,8 +621,8 @@ case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 33) - kind = 33; + if (kind > 34) + kind = 34; jjCheckNAdd(6); } if ((0x100002600L & l) != 0L) @@ -632,14 +646,14 @@ jjCheckNAddStates(16, 18); break; case 5: - if (curChar == 34 && kind > 32) - kind = 32; + if (curChar == 34 && kind > 33) + kind = 33; break; case 6: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 33) - kind = 33; + if (kind > 34) + kind = 34; jjCheckNAdd(6); break; default : break; @@ -657,8 +671,8 @@ case 6: if ((0xdfffffffffffffffL & l) == 0L) break; - if (kind > 33) - kind = 33; + if (kind > 34) + kind = 34; jjCheckNAdd(6); break; case 2: @@ -691,8 +705,8 @@ } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { - if (kind > 33) - kind = 33; + if (kind > 34) + kind = 34; jjCheckNAdd(6); } break; @@ -703,8 +717,8 @@ case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; - if (kind > 33) - kind = 33; + if (kind > 34) + kind = 34; jjCheckNAdd(6); break; default : break; @@ -749,8 +763,8 @@ case 0: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 25) - kind = 25; + if (kind > 26) + kind = 26; jjAddStates(19, 20); break; case 1: @@ -760,8 +774,8 @@ case 2: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 25) - kind = 25; + if (kind > 26) + kind = 26; jjCheckNAdd(2); break; default : break; @@ -812,9 +826,9 @@ switch (pos) { case 0: - if ((active0 & 0x4000000L) != 0L) + if ((active0 & 0x8000000L) != 0L) { - jjmatchedKind = 29; + jjmatchedKind = 30; return 6; } return -1; @@ -831,9 +845,9 @@ switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_2(0x4000000L); + return jjMoveStringLiteralDfa1_2(0x8000000L); case 93: - return jjStopAtPos(0, 27); + return jjStopAtPos(0, 28); default : return jjMoveNfa_2(0, 0); } @@ -848,8 +862,8 @@ switch(curChar) { case 79: - if ((active0 & 0x4000000L) != 0L) - return jjStartNfaWithStates_2(1, 26, 6); + if ((active0 & 0x8000000L) != 0L) + return jjStartNfaWithStates_2(1, 27, 6); break; default : break; @@ -885,8 +899,8 @@ case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 29) - kind = 29; + if (kind > 30) + kind = 30; jjCheckNAdd(6); } if ((0x100002600L & l) != 0L) @@ -910,14 +924,14 @@ jjCheckNAddStates(16, 18); break; case 5: - if (curChar == 34 && kind > 28) - kind = 28; + if (curChar == 34 && kind > 29) + kind = 29; break; case 6: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 29) - kind = 29; + if (kind > 30) + kind = 30; jjCheckNAdd(6); break; default : break; @@ -935,8 +949,8 @@ case 6: if ((0xffffffffdfffffffL & l) == 0L) break; - if (kind > 29) - kind = 29; + if (kind > 30) + kind = 30; jjCheckNAdd(6); break; case 2: @@ -969,8 +983,8 @@ } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { - if (kind > 29) - kind = 29; + if (kind > 30) + kind = 30; jjCheckNAdd(6); } break; @@ -981,8 +995,8 @@ case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; - if (kind > 29) - kind = 29; + if (kind > 30) + kind = 30; jjCheckNAdd(6); break; default : break; @@ -1003,7 +1017,7 @@ } } static final int[] jjnextStates = { - 15, 16, 18, 29, 32, 23, 33, 30, 20, 21, 32, 23, 33, 31, 34, 27, + 17, 18, 20, 31, 34, 25, 35, 32, 22, 23, 34, 25, 35, 33, 36, 29, 2, 4, 5, 0, 1, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) @@ -1046,8 +1060,8 @@ /** Token literal values. */ public static final String[] jjstrLiteralImages = { "", null, null, null, null, null, null, null, null, null, null, "\53", "\55", -"\50", "\51", "\72", "\52", "\136", null, null, null, null, null, "\133", "\173", -null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, }; +null, "\50", "\51", "\72", "\52", "\136", null, null, null, null, null, "\133", +"\173", null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, }; /** Lexer state names. */ public static final String[] lexStateNames = { @@ -1059,18 +1073,18 @@ /** Lex State array. */ public static final int[] jjnewLexState = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, - 3, -1, 3, -1, -1, -1, 3, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, + 1, 3, -1, 3, -1, -1, -1, 3, -1, -1, }; static final long[] jjtoToken = { - 0x3ffffff01L, + 0x7ffffff01L, }; static final long[] jjtoSkip = { 0x80L, }; protected CharStream input_stream; -private final int[] jjrounds = new int[36]; -private final int[] jjstateSet = new int[72]; +private final int[] jjrounds = new int[38]; +private final int[] jjstateSet = new int[76]; protected char curChar; /** Constructor. */ public QueryParserTokenManager(CharStream stream){ @@ -1095,7 +1109,7 @@ { int i; jjround = 0x80000001; - for (i = 36; i-- > 0;) + for (i = 38; i-- > 0;) jjrounds[i] = 0x80000000; } Index: lucene/backwards/src/test-framework/java/org/apache/lucene/queryParser/QueryParserTestBase.java =================================================================== --- lucene/backwards/src/test-framework/java/org/apache/lucene/queryParser/QueryParserTestBase.java (revision 1342541) +++ lucene/backwards/src/test-framework/java/org/apache/lucene/queryParser/QueryParserTestBase.java (working copy) @@ -322,14 +322,25 @@ assertQueryEquals("a AND -b", null, "+a -b"); assertQueryEquals("a AND !b", null, "+a -b"); assertQueryEquals("a && b", null, "+a +b"); - assertQueryEquals("a && ! b", null, "+a -b"); +// assertQueryEquals("a && ! b", null, "+a -b"); assertQueryEquals("a OR b", null, "a b"); assertQueryEquals("a || b", null, "a b"); assertQueryEquals("a OR !b", null, "a -b"); - assertQueryEquals("a OR ! b", null, "a -b"); +// assertQueryEquals("a OR ! b", null, "a -b"); assertQueryEquals("a OR -b", null, "a -b"); + // +,-,! should be directly adjacent to operand (i.e. not separated by whitespace) to be treated as an operator + Analyzer a = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + } + }; + assertQueryEquals("a - b", a, "a - b"); + assertQueryEquals("a + b", a, "a + b"); + assertQueryEquals("a ! b", a, "a ! b"); + assertQueryEquals("+term -term term", null, "+term -term term"); assertQueryEquals("foo:term AND field:anotherTerm", null, "+foo:term +anotherterm"); Index: lucene/test-framework/src/java/org/apache/lucene/queryParser/QueryParserTestBase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/queryParser/QueryParserTestBase.java (revision 1342541) +++ lucene/test-framework/src/java/org/apache/lucene/queryParser/QueryParserTestBase.java (working copy) @@ -325,14 +325,25 @@ assertQueryEquals("a AND -b", null, "+a -b"); assertQueryEquals("a AND !b", null, "+a -b"); assertQueryEquals("a && b", null, "+a +b"); - assertQueryEquals("a && ! b", null, "+a -b"); +// assertQueryEquals("a && ! b", null, "+a -b"); assertQueryEquals("a OR b", null, "a b"); assertQueryEquals("a || b", null, "a b"); assertQueryEquals("a OR !b", null, "a -b"); - assertQueryEquals("a OR ! b", null, "a -b"); +// assertQueryEquals("a OR ! b", null, "a -b"); assertQueryEquals("a OR -b", null, "a -b"); + // +,-,! should be directly adjacent to operand (i.e. not separated by whitespace) to be treated as an operator + Analyzer a = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + } + }; + assertQueryEquals("a - b", a, "a - b"); + assertQueryEquals("a + b", a, "a + b"); + assertQueryEquals("a ! b", a, "a ! b"); + assertQueryEquals("+term -term term", null, "+term -term term"); assertQueryEquals("foo:term AND field:anotherTerm", null, "+foo:term +anotherterm");