diff --git lucene/queryparser/src/java/org/apache/lucene/queryparser/amend1/FirstAmendmentQueryParser.java lucene/queryparser/src/java/org/apache/lucene/queryparser/amend1/FirstAmendmentQueryParser.java new file mode 100644 index 0000000..eaecdfb --- /dev/null +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/amend1/FirstAmendmentQueryParser.java @@ -0,0 +1,525 @@ +package org.apache.lucene.queryparser.amend1; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.QueryBuilder; + +import java.util.Collections; +import java.util.Map; + +/** + * FirstAmendmentQueryParser is used to parse human readable query syntax. + *

+ * The main idea behind this parser is that a person should be able to type + * whatever they want to represent a query, and this parser will do its best + * to interpret what to search for no matter how poorly composed the request + * may be. + *

+ * Tokens are considered to be any of a term, phrase, or subquery for the + * operations described below. Whitespace including ' ' '\n' '\r' and '\t' + * and certain operators may be used to delimit tokens ( ) + | " . + *

+ * The following operators may be used to create complex queries. + *

+ *

+ * Normal operator precedence will be simple order from right to left. + * For example, the following will evaluate {@code token1 OR token2} first, + * then {@code AND} with {@code token3}: + *

token1 | token2 + token3
+ *

+ * The default operator is {@code AND} if no other operator is specified. + * For example, the following will {@code AND} {@code token1} and {@code token2} together: + *

token1 token2
+ *

+ * An individual term may contain any possible character with certain characters + * requiring escaping using a {@code \}. The following characters will need to be escaped in + * terms and phrases: + * {@code + | " ( ) ' \} + *

+ * The {@code -} operator is a special case. On individual terms (not phrases) the first + * character of a term that is {@code -} must be escaped; however, any {@code -} characters + * beyond the first character in a term may not need to be escaped. + * For example: + *

+ * Note that above examples consider the terms before text processing. + *

+ * The {@code *} operator is a special case. On individual terms (not phrases) the last + * character of a term that is {@code *} must be escaped; however, any {@code *} characters + * before the last character in a term may not need to be escaped: + *

+ * Note that above examples consider the terms before text processing. + *

+ * Any errors in query syntax will be ignored and the parser will attempt + * to decipher what it can; however, this may mean odd or unexpected results. + */ +public class FirstAmendmentQueryParser extends QueryBuilder { + /** Map of fields to query against with their weights */ + protected final Map weights; + /** flags to the parser (to turn features on/off) */ + protected final int flags; + + /** Enables {@code AND} operator (+) */ + public static final int AND_OPERATOR = 1<<0; + /** Enables {@code NOT} operator (-) */ + public static final int NOT_OPERATOR = 1<<1; + /** Enables {@code OR} operator (|) */ + public static final int OR_OPERATOR = 1<<2; + /** Enables {@code PREFIX} operator (*) */ + public static final int PREFIX_OPERATOR = 1<<3; + /** Enables {@code PHRASE} operator (") */ + public static final int PHRASE_OPERATOR = 1<<4; + /** Enables {@code PRECEDENCE} operators: {@code (} and {@code )} */ + public static final int PRECEDENCE_OPERATORS = 1<<5; + /** Enables {@code ESCAPE} operator (\) */ + public static final int ESCAPE_OPERATOR = 1<<6; + /** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */ + public static final int WHITESPACE_OPERATOR = 1<<7; + + private BooleanClause.Occur defaultOperator = BooleanClause.Occur.SHOULD; + + /** Creates a new parser searching over a single field. */ + public FirstAmendmentQueryParser(Analyzer analyzer, String field) { + this(analyzer, Collections.singletonMap(field, 1.0F)); + } + + /** Creates a new parser searching over multiple fields with different weights. */ + public FirstAmendmentQueryParser(Analyzer analyzer, Map weights) { + this(analyzer, weights, -1); + } + + /** Creates a new parser with custom flags used to enable/disable certain features. */ + public FirstAmendmentQueryParser(Analyzer analyzer, Map weights, int flags) { + super(analyzer); + this.weights = weights; + this.flags = flags; + } + + /** Parses the query text and returns parsed query (or null if empty) */ + public Query parse(String queryText) { + char data[] = queryText.toCharArray(); + char buffer[] = new char[data.length]; + + State state = new State(data, buffer, 0, data.length); + parseSubQuery(state); + return state.top; + } + + private void parseSubQuery(State state) { + while (state.index < state.length) { + if (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0) { + // the beginning of a subquery has been found + consumeSubQuery(state); + } else if (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0) { + // this is an extraneous character so it is ignored + ++state.index; + } else if (state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0) { + // the beginning of a phrase has been found + consumePhrase(state); + } else if (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0) { + // an and operation has been explicitly set + // if an operation has already been set this one is ignored + // if a term (or phrase or subquery) has not been found yet the + // operation is also ignored since there is no previous + // term (or phrase or subquery) to and with + if (state.currentOperation == null && state.top != null) { + state.currentOperation = BooleanClause.Occur.MUST; + } + + ++state.index; + } else if (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0) { + // an or operation has been explicitly set + // if an operation has already been set this one is ignored + // if a term (or phrase or subquery) has not been found yet the + // operation is also ignored since there is no previous + // term (or phrase or subquery) to or with + if (state.currentOperation == null && state.top != null) { + state.currentOperation = BooleanClause.Occur.SHOULD; + } + + ++state.index; + } else if (state.data[state.index] == '-' && (flags & NOT_OPERATOR) != 0) { + // a not operator has been found, so increase the not count + // two not operators in a row negate each other + ++state.not; + ++state.index; + + // continue so the not operator is not reset + // before the next character is determined + continue; + } else if ((state.data[state.index] == ' ' + || state.data[state.index] == '\t' + || state.data[state.index] == '\n' + || state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0) { + // ignore any whitespace found as it may have already been + // used a delimiter across a term (or phrase or subquery) + // or is simply extraneous + ++state.index; + } else { + // the beginning of a token has been found + consumeToken(state); + } + + // reset the not operator as even whitespace is not allowed when + // specifying the not operation for a term (or phrase or subquery) + state.not = 0; + } + } + + private void consumeSubQuery(State state) { + assert (flags & PRECEDENCE_OPERATORS) != 0; + int start = ++state.index; + int precedence = 1; + boolean escaped = false; + + while (state.index < state.length) { + if (!escaped) { + if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) { + // an escape character has been found so + // whatever character is next will become + // part of the subquery unless the escape + // character is the last one in the data + escaped = true; + ++state.index; + + continue; + } else if (state.data[state.index] == '(') { + // increase the precedence as there is a + // subquery in the current subquery + ++precedence; + } else if (state.data[state.index] == ')') { + --precedence; + + if (precedence == 0) { + // this should be the end of the subquery + // all characters found will used for + // creating the subquery + break; + } + } + } + + escaped = false; + ++state.index; + } + + if (state.index == state.length) { + // a closing parenthesis was never found so the opening + // parenthesis is considered extraneous and will be ignored + state.index = start; + } else if (state.index == start) { + // a closing parenthesis was found immediately after the opening + // parenthesis so the current operation is reset since it would + // have been applied to this subquery + state.currentOperation = null; + + ++state.index; + } else { + // a complete subquery has been found and is recursively parsed by + // starting over with a new state object + State subState = new State(state.data, state.buffer, start, state.index); + parseSubQuery(subState); + buildQueryTree(state, subState.top); + + ++state.index; + } + } + + private void consumePhrase(State state) { + assert (flags & PHRASE_OPERATOR) != 0; + int start = ++state.index; + int copied = 0; + boolean escaped = false; + + while (state.index < state.length) { + if (!escaped) { + if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) { + // an escape character has been found so + // whatever character is next will become + // part of the phrase unless the escape + // character is the last one in the data + escaped = true; + ++state.index; + + continue; + } else if (state.data[state.index] == '"') { + // this should be the end of the phrase + // all characters found will used for + // creating the phrase query + break; + } + } + + escaped = false; + state.buffer[copied++] = state.data[state.index++]; + } + + if (state.index == state.length) { + // a closing double quote was never found so the opening + // double quote is considered extraneous and will be ignored + state.index = start; + } else if (state.index == start) { + // a closing double quote was found immediately after the opening + // double quote so the current operation is reset since it would + // have been applied to this phrase + state.currentOperation = null; + + ++state.index; + } else { + // a complete phrase has been found and is parsed through + // through the analyzer from the given field + String phrase = new String(state.buffer, 0, copied); + Query branch = newPhraseQuery(phrase); + buildQueryTree(state, branch); + + ++state.index; + } + } + + private void consumeToken(State state) { + int copied = 0; + boolean escaped = false; + boolean prefix = false; + + while (state.index < state.length) { + if (!escaped) { + if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) { + // an escape character has been found so + // whatever character is next will become + // part of the term unless the escape + // character is the last one in the data + escaped = true; + prefix = false; + ++state.index; + + continue; + } else if ((state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0) + || (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0) + || (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0) + || (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0) + || (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0) + || ((state.data[state.index] == ' ' + || state.data[state.index] == '\t' + || state.data[state.index] == '\n' + || state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0)) { + // this should be the end of the term + // all characters found will used for + // creating the term query + break; + } + + // wildcard tracks whether or not the last character + // was a '*' operator that hasn't been escaped + // there must be at least one valid character before + // searching for a prefixed set of terms + prefix = copied > 0 && state.data[state.index] == '*' && (flags & PREFIX_OPERATOR) != 0; + } + + escaped = false; + state.buffer[copied++] = state.data[state.index++]; + } + + if (copied > 0) { + final Query branch; + + if (prefix) { + // if a term is found with a closing '*' it is considered to be a prefix query + // and will have prefix added as an option + String token = new String(state.buffer, 0, copied - 1); + branch = newPrefixQuery(token); + } else { + // a standard term has been found so it will be run through + // the entire analysis chain from the specified schema field + String token = new String(state.buffer, 0, copied); + branch = newDefaultQuery(token); + } + + buildQueryTree(state, branch); + } + } + + // buildQueryTree should be called after a term, phrase, or subquery + // is consumed to be added to our existing query tree + // this method will only add to the existing tree if the branch contained in state is not null + private void buildQueryTree(State state, Query branch) { + if (branch != null) { + // modify our branch to a BooleanQuery wrapper for not + // this is necessary any time a term, phrase, or subquery is negated + if (state.not % 2 == 1) { + BooleanQuery nq = new BooleanQuery(); + nq.add(branch, BooleanClause.Occur.MUST_NOT); + nq.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); + branch = nq; + } + + // first term (or phrase or subquery) found and will begin our query tree + if (state.top == null) { + state.top = branch; + } else { + // more than one term (or phrase or subquery) found + // set currentOperation to the default if no other operation is explicitly set + if (state.currentOperation == null) { + state.currentOperation = defaultOperator; + } + + // operational change requiring a new parent node + // this occurs if the previous operation is not the same as current operation + // because the previous operation must be evaluated separately to preserve + // the proper precedence and the current operation will take over as the top of the tree + if (state.previousOperation != state.currentOperation) { + BooleanQuery bq = new BooleanQuery(); + bq.add(state.top, state.currentOperation); + state.top = bq; + } + + // reset all of the state for reuse + ((BooleanQuery)state.top).add(branch, state.currentOperation); + state.previousOperation = state.currentOperation; + } + + // reset the current operation as it was intended to be applied to + // the incoming term (or phrase or subquery) even if branch was null + // due to other possible errors + state.currentOperation = null; + } + } + + /** + * Factory method to generate a standard query (no phrase or prefix operators). + */ + protected Query newDefaultQuery(String text) { + BooleanQuery bq = new BooleanQuery(true); + for (Map.Entry entry : weights.entrySet()) { + Query q = createBooleanQuery(entry.getKey(), text, defaultOperator); + if (q != null) { + q.setBoost(entry.getValue()); + bq.add(q, BooleanClause.Occur.SHOULD); + } + } + return simplify(bq); + } + + /** + * Factory method to generate a phrase query. + */ + protected Query newPhraseQuery(String text) { + BooleanQuery bq = new BooleanQuery(true); + for (Map.Entry entry : weights.entrySet()) { + Query q = createPhraseQuery(entry.getKey(), text); + if (q != null) { + q.setBoost(entry.getValue()); + bq.add(q, BooleanClause.Occur.SHOULD); + } + } + return simplify(bq); + } + + /** + * Factory method to generate a prefix query. + */ + protected Query newPrefixQuery(String text) { + BooleanQuery bq = new BooleanQuery(true); + for (Map.Entry entry : weights.entrySet()) { + PrefixQuery prefix = new PrefixQuery(new Term(entry.getKey(), text)); + prefix.setBoost(entry.getValue()); + bq.add(prefix, BooleanClause.Occur.SHOULD); + } + return simplify(bq); + } + + /** + * Helper to simplify boolean queries with 0 or 1 clause + */ + protected Query simplify(BooleanQuery bq) { + if (bq.clauses().isEmpty()) { + return null; + } else if (bq.clauses().size() == 1) { + return bq.clauses().get(0).getQuery(); + } else { + return bq; + } + } + + /** + * Returns the implicit operator setting, which will be + * either {@code SHOULD} or {@code MUST}. + */ + public BooleanClause.Occur getDefaultOperator() { + return defaultOperator; + } + + /** + * Sets the implicit operator setting, which must be + * either {@code SHOULD} or {@code MUST}. + */ + public void setDefaultOperator(BooleanClause.Occur operator) { + if (operator != BooleanClause.Occur.SHOULD && operator != BooleanClause.Occur.MUST) { + throw new IllegalArgumentException("invalid operator: only SHOULD or MUST are allowed"); + } + this.defaultOperator = operator; + } + + static class State { + final char[] data; // the characters in the query string + final char[] buffer; // a temporary buffer used to reduce necessary allocations + int index; + int length; + + BooleanClause.Occur currentOperation; + BooleanClause.Occur previousOperation; + int not; + + Query top; + + State(char[] data, char[] buffer, int index, int length) { + this.data = data; + this.buffer = buffer; + this.index = index; + this.length = length; + } + } +} diff --git lucene/queryparser/src/java/org/apache/lucene/queryparser/amend1/package.html lucene/queryparser/src/java/org/apache/lucene/queryparser/amend1/package.html new file mode 100644 index 0000000..0ea5acf --- /dev/null +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/amend1/package.html @@ -0,0 +1,22 @@ + + + + + A simple query parser for human-entered queries. + + \ No newline at end of file diff --git lucene/queryparser/src/test/org/apache/lucene/queryparser/amend1/TestFirstAmendmentQueryParser.java lucene/queryparser/src/test/org/apache/lucene/queryparser/amend1/TestFirstAmendmentQueryParser.java new file mode 100644 index 0000000..c83b0af --- /dev/null +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/amend1/TestFirstAmendmentQueryParser.java @@ -0,0 +1,544 @@ +package org.apache.lucene.queryparser.amend1; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +import static org.apache.lucene.queryparser.amend1.FirstAmendmentQueryParser.AND_OPERATOR; +import static org.apache.lucene.queryparser.amend1.FirstAmendmentQueryParser.ESCAPE_OPERATOR; +import static org.apache.lucene.queryparser.amend1.FirstAmendmentQueryParser.NOT_OPERATOR; +import static org.apache.lucene.queryparser.amend1.FirstAmendmentQueryParser.OR_OPERATOR; +import static org.apache.lucene.queryparser.amend1.FirstAmendmentQueryParser.PHRASE_OPERATOR; +import static org.apache.lucene.queryparser.amend1.FirstAmendmentQueryParser.PRECEDENCE_OPERATORS; +import static org.apache.lucene.queryparser.amend1.FirstAmendmentQueryParser.PREFIX_OPERATOR; +import static org.apache.lucene.queryparser.amend1.FirstAmendmentQueryParser.WHITESPACE_OPERATOR; + +/** Tests for {@link FirstAmendmentQueryParser} */ +public class TestFirstAmendmentQueryParser extends LuceneTestCase { + + /** + * helper to parse a query with whitespace+lowercase analyzer across "field", + * with default operator of MUST + */ + private Query parse(String text) { + Analyzer analyzer = new MockAnalyzer(random()); + FirstAmendmentQueryParser parser = new FirstAmendmentQueryParser(analyzer, "field"); + parser.setDefaultOperator(Occur.MUST); + return parser.parse(text); + } + + /** test a simple term */ + public void testTerm() throws Exception { + Query expected = new TermQuery(new Term("field", "foobar")); + + assertEquals(expected, parse("foobar")); + } + + /** test a simple phrase */ + public void testPhrase() throws Exception { + PhraseQuery expected = new PhraseQuery(); + expected.add(new Term("field", "foo")); + expected.add(new Term("field", "bar")); + + assertEquals(expected, parse("\"foo bar\"")); + } + + /** test a simple prefix */ + public void testPrefix() throws Exception { + PrefixQuery expected = new PrefixQuery(new Term("field", "foobar")); + + assertEquals(expected, parse("foobar*")); + } + + /** test some AND'd terms using '+' operator */ + public void testAND() throws Exception { + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "foo")), Occur.MUST); + expected.add(new TermQuery(new Term("field", "bar")), Occur.MUST); + + assertEquals(expected, parse("foo+bar")); + } + + /** test some AND'd phrases using '+' operator */ + public void testANDPhrase() throws Exception { + PhraseQuery phrase1 = new PhraseQuery(); + phrase1.add(new Term("field", "foo")); + phrase1.add(new Term("field", "bar")); + PhraseQuery phrase2 = new PhraseQuery(); + phrase2.add(new Term("field", "star")); + phrase2.add(new Term("field", "wars")); + BooleanQuery expected = new BooleanQuery(); + expected.add(phrase1, Occur.MUST); + expected.add(phrase2, Occur.MUST); + + assertEquals(expected, parse("\"foo bar\"+\"star wars\"")); + } + + /** test some AND'd terms (just using whitespace) */ + public void testANDImplicit() throws Exception { + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "foo")), Occur.MUST); + expected.add(new TermQuery(new Term("field", "bar")), Occur.MUST); + + assertEquals(expected, parse("foo bar")); + } + + /** test some OR'd terms */ + public void testOR() throws Exception { + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "foo")), Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "bar")), Occur.SHOULD); + + assertEquals(expected, parse("foo|bar")); + assertEquals(expected, parse("foo||bar")); + } + + /** test some OR'd terms (just using whitespace) */ + public void testORImplicit() throws Exception { + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "foo")), Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "bar")), Occur.SHOULD); + + FirstAmendmentQueryParser parser = new FirstAmendmentQueryParser(new MockAnalyzer(random()), "field"); + assertEquals(expected, parser.parse("foo bar")); + } + + /** test some OR'd phrases using '|' operator */ + public void testORPhrase() throws Exception { + PhraseQuery phrase1 = new PhraseQuery(); + phrase1.add(new Term("field", "foo")); + phrase1.add(new Term("field", "bar")); + PhraseQuery phrase2 = new PhraseQuery(); + phrase2.add(new Term("field", "star")); + phrase2.add(new Term("field", "wars")); + BooleanQuery expected = new BooleanQuery(); + expected.add(phrase1, Occur.SHOULD); + expected.add(phrase2, Occur.SHOULD); + + assertEquals(expected, parse("\"foo bar\"|\"star wars\"")); + } + + /** test negated term */ + public void testNOT() throws Exception { + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "foo")), Occur.MUST_NOT); + expected.add(new MatchAllDocsQuery(), Occur.SHOULD); + + assertEquals(expected, parse("-foo")); + assertEquals(expected, parse("-(foo)")); + assertEquals(expected, parse("---foo")); + } + + /** test crazy prefixes with multiple asterisks */ + public void testCrazyPrefixes1() throws Exception { + Query expected = new PrefixQuery(new Term("field", "st*ar")); + + assertEquals(expected, parse("st*ar*")); + } + + /** test prefixes with some escaping */ + public void testCrazyPrefixes2() throws Exception { + Query expected = new PrefixQuery(new Term("field", "st*ar\\*")); + + assertEquals(expected, parse("st*ar\\\\**")); + } + + /** not a prefix query! the prefix operator is escaped */ + public void testTermInDisguise() throws Exception { + Query expected = new TermQuery(new Term("field", "st*ar\\*")); + + assertEquals(expected, parse("sT*Ar\\\\\\*")); + } + + // a number of test cases here have garbage/errors in + // the syntax passed in to test that the query can + // still be interpreted as a guess to what the human + // input was trying to be + + public void testGarbageTerm() throws Exception { + Query expected = new TermQuery(new Term("field", "star")); + + assertEquals(expected, parse("star")); + assertEquals(expected, parse("star\n")); + assertEquals(expected, parse("star\r")); + assertEquals(expected, parse("star\t")); + assertEquals(expected, parse("star(")); + assertEquals(expected, parse("star)")); + assertEquals(expected, parse("star\"")); + assertEquals(expected, parse("\t \r\n\nstar \n \r \t ")); + assertEquals(expected, parse("- + \"\" - star \\")); + } + + public void testGarbageEmpty() throws Exception { + assertNull(parse("")); + assertNull(parse(" ")); + assertNull(parse(" ")); + assertNull(parse("\\ ")); + assertNull(parse("\\ \\ ")); + assertNull(parse("\"\"")); + assertNull(parse("\" \"")); + assertNull(parse("\" \"|\" \"")); + assertNull(parse("(\" \"|\" \")")); + assertNull(parse("\" \" \" \"")); + assertNull(parse("(\" \" \" \")")); + } + + public void testGarbageAND() throws Exception { + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "star")), Occur.MUST); + expected.add(new TermQuery(new Term("field", "wars")), Occur.MUST); + + assertEquals(expected, parse("star wars")); + assertEquals(expected, parse("star+wars")); + assertEquals(expected, parse(" star wars ")); + assertEquals(expected, parse(" star + wars ")); + assertEquals(expected, parse(" | star + + | wars ")); + assertEquals(expected, parse(" | star + + | wars \\")); + } + + public void testGarbageOR() throws Exception { + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "star")), Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "wars")), Occur.SHOULD); + + assertEquals(expected, parse("star|wars")); + assertEquals(expected, parse(" star | wars ")); + assertEquals(expected, parse(" | star | + | wars ")); + assertEquals(expected, parse(" + star | + + wars \\")); + } + + public void testGarbageNOT() throws Exception { + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "star")), Occur.MUST_NOT); + expected.add(new MatchAllDocsQuery(), Occur.SHOULD); + + assertEquals(expected, parse("-star")); + assertEquals(expected, parse("---star")); + assertEquals(expected, parse("- -star -")); + } + + public void testGarbagePhrase() throws Exception { + PhraseQuery expected = new PhraseQuery(); + expected.add(new Term("field", "star")); + expected.add(new Term("field", "wars")); + + assertEquals(expected, parse("\"star wars\"")); + assertEquals(expected, parse("\"star wars\\ \"")); + assertEquals(expected, parse("\"\" | \"star wars\"")); + assertEquals(expected, parse(" \"star wars\" \"\"\\")); + } + + public void testGarbageSubquery() throws Exception { + Query expected = new TermQuery(new Term("field", "star")); + + assertEquals(expected, parse("(star)")); + assertEquals(expected, parse("(star))")); + assertEquals(expected, parse("((star)")); + assertEquals(expected, parse(" -()(star) \n\n\r ")); + assertEquals(expected, parse("| + - ( + - | star \n ) \n")); + } + + public void testCompoundAnd() throws Exception { + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "star")), Occur.MUST); + expected.add(new TermQuery(new Term("field", "wars")), Occur.MUST); + expected.add(new TermQuery(new Term("field", "empire")), Occur.MUST); + + assertEquals(expected, parse("star wars empire")); + assertEquals(expected, parse("star+wars + empire")); + assertEquals(expected, parse(" | --star wars empire \n\\")); + } + + public void testCompoundOr() throws Exception { + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "star")), Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "wars")), Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD); + + assertEquals(expected, parse("star|wars|empire")); + assertEquals(expected, parse("star|wars | empire")); + assertEquals(expected, parse(" | --star|wars|empire \n\\")); + } + + public void testComplex00() throws Exception { + BooleanQuery expected = new BooleanQuery(); + BooleanQuery inner = new BooleanQuery(); + inner.add(new TermQuery(new Term("field", "star")), Occur.SHOULD); + inner.add(new TermQuery(new Term("field", "wars")), Occur.SHOULD); + expected.add(inner, Occur.MUST); + expected.add(new TermQuery(new Term("field", "empire")), Occur.MUST); + + assertEquals(expected, parse("star|wars empire")); + assertEquals(expected, parse("star|wars + empire")); + assertEquals(expected, parse("star| + wars + ----empire |")); + } + + public void testComplex01() throws Exception { + BooleanQuery expected = new BooleanQuery(); + BooleanQuery inner = new BooleanQuery(); + inner.add(new TermQuery(new Term("field", "star")), Occur.MUST); + inner.add(new TermQuery(new Term("field", "wars")), Occur.MUST); + expected.add(inner, Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD); + + assertEquals(expected, parse("star wars | empire")); + assertEquals(expected, parse("star + wars|empire")); + assertEquals(expected, parse("star + | wars | ----empire +")); + } + + public void testComplex02() throws Exception { + BooleanQuery expected = new BooleanQuery(); + BooleanQuery inner = new BooleanQuery(); + inner.add(new TermQuery(new Term("field", "star")), Occur.MUST); + inner.add(new TermQuery(new Term("field", "wars")), Occur.MUST); + expected.add(inner, Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "strikes")), Occur.SHOULD); + + assertEquals(expected, parse("star wars | empire | strikes")); + assertEquals(expected, parse("star + wars|empire | strikes")); + assertEquals(expected, parse("star + | wars | ----empire | + --strikes \\")); + } + + public void testComplex03() throws Exception { + BooleanQuery expected = new BooleanQuery(); + BooleanQuery inner = new BooleanQuery(); + BooleanQuery inner2 = new BooleanQuery(); + inner2.add(new TermQuery(new Term("field", "star")), Occur.MUST); + inner2.add(new TermQuery(new Term("field", "wars")), Occur.MUST); + inner.add(inner2, Occur.SHOULD); + inner.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD); + inner.add(new TermQuery(new Term("field", "strikes")), Occur.SHOULD); + expected.add(inner, Occur.MUST); + expected.add(new TermQuery(new Term("field", "back")), Occur.MUST); + + assertEquals(expected, parse("star wars | empire | strikes back")); + assertEquals(expected, parse("star + wars|empire | strikes + back")); + assertEquals(expected, parse("star + | wars | ----empire | + --strikes + | --back \\")); + } + + public void testComplex04() throws Exception { + BooleanQuery expected = new BooleanQuery(); + BooleanQuery inner = new BooleanQuery(); + BooleanQuery inner2 = new BooleanQuery(); + inner.add(new TermQuery(new Term("field", "star")), Occur.MUST); + inner.add(new TermQuery(new Term("field", "wars")), Occur.MUST); + inner2.add(new TermQuery(new Term("field", "strikes")), Occur.MUST); + inner2.add(new TermQuery(new Term("field", "back")), Occur.MUST); + expected.add(inner, Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD); + expected.add(inner2, Occur.SHOULD); + + assertEquals(expected, parse("(star wars) | empire | (strikes back)")); + assertEquals(expected, parse("(star + wars) |empire | (strikes + back)")); + assertEquals(expected, parse("(star + | wars |) | ----empire | + --(strikes + | --back) \\")); + } + + public void testComplex05() throws Exception { + BooleanQuery expected = new BooleanQuery(); + BooleanQuery inner1 = new BooleanQuery(); + BooleanQuery inner2 = new BooleanQuery(); + BooleanQuery inner3 = new BooleanQuery(); + BooleanQuery inner4 = new BooleanQuery(); + + expected.add(inner1, Occur.SHOULD); + expected.add(inner2, Occur.SHOULD); + + inner1.add(new TermQuery(new Term("field", "star")), Occur.MUST); + inner1.add(new TermQuery(new Term("field", "wars")), Occur.MUST); + + inner2.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD); + inner2.add(inner3, Occur.SHOULD); + + inner3.add(new TermQuery(new Term("field", "strikes")), Occur.MUST); + inner3.add(new TermQuery(new Term("field", "back")), Occur.MUST); + inner3.add(inner4, Occur.MUST); + + inner4.add(new TermQuery(new Term("field", "jarjar")), Occur.MUST_NOT); + inner4.add(new MatchAllDocsQuery(), Occur.SHOULD); + + assertEquals(expected, parse("(star wars) | (empire | (strikes back -jarjar))")); + assertEquals(expected, parse("(star + wars) |(empire | (strikes + back -jarjar) () )")); + assertEquals(expected, parse("(star + | wars |) | --(--empire | + --(strikes + | --back + -jarjar) \"\" ) \"")); + } + + public void testComplex06() throws Exception { + BooleanQuery expected = new BooleanQuery(); + BooleanQuery inner1 = new BooleanQuery(); + BooleanQuery inner2 = new BooleanQuery(); + BooleanQuery inner3 = new BooleanQuery(); + + expected.add(new TermQuery(new Term("field", "star")), Occur.MUST); + expected.add(inner1, Occur.MUST); + + inner1.add(new TermQuery(new Term("field", "wars")), Occur.SHOULD); + inner1.add(inner2, Occur.SHOULD); + + inner2.add(inner3, Occur.MUST); + inner3.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD); + inner3.add(new TermQuery(new Term("field", "strikes")), Occur.SHOULD); + inner2.add(new TermQuery(new Term("field", "back")), Occur.MUST); + inner2.add(new TermQuery(new Term("field", "jar+|jar")), Occur.MUST); + + assertEquals(expected, parse("star (wars | (empire | strikes back jar\\+\\|jar))")); + assertEquals(expected, parse("star + (wars |(empire | strikes + back jar\\+\\|jar) () )")); + assertEquals(expected, parse("star + (| wars | | --(--empire | + --strikes + | --back + jar\\+\\|jar) \"\" ) \"")); + } + + /** test a term with field weights */ + public void testWeightedTerm() throws Exception { + Map weights = new LinkedHashMap<>(); + weights.put("field0", 5f); + weights.put("field1", 10f); + + BooleanQuery expected = new BooleanQuery(true); + Query field0 = new TermQuery(new Term("field0", "foo")); + field0.setBoost(5f); + expected.add(field0, Occur.SHOULD); + Query field1 = new TermQuery(new Term("field1", "foo")); + field1.setBoost(10f); + expected.add(field1, Occur.SHOULD); + + Analyzer analyzer = new MockAnalyzer(random()); + FirstAmendmentQueryParser parser = new FirstAmendmentQueryParser(analyzer, weights); + assertEquals(expected, parser.parse("foo")); + } + + /** test a more complex query with field weights */ + public void testWeightedOR() throws Exception { + Map weights = new LinkedHashMap<>(); + weights.put("field0", 5f); + weights.put("field1", 10f); + + BooleanQuery expected = new BooleanQuery(); + BooleanQuery foo = new BooleanQuery(true); + Query field0 = new TermQuery(new Term("field0", "foo")); + field0.setBoost(5f); + foo.add(field0, Occur.SHOULD); + Query field1 = new TermQuery(new Term("field1", "foo")); + field1.setBoost(10f); + foo.add(field1, Occur.SHOULD); + expected.add(foo, Occur.SHOULD); + + BooleanQuery bar = new BooleanQuery(true); + field0 = new TermQuery(new Term("field0", "bar")); + field0.setBoost(5f); + bar.add(field0, Occur.SHOULD); + field1 = new TermQuery(new Term("field1", "bar")); + field1.setBoost(10f); + bar.add(field1, Occur.SHOULD); + expected.add(bar, Occur.SHOULD); + + Analyzer analyzer = new MockAnalyzer(random()); + FirstAmendmentQueryParser parser = new FirstAmendmentQueryParser(analyzer, weights); + assertEquals(expected, parser.parse("foo|bar")); + } + + /** helper to parse a query with keyword analyzer across "field" */ + private Query parseKeyword(String text, int flags) { + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); + FirstAmendmentQueryParser parser = new FirstAmendmentQueryParser(analyzer, + Collections.singletonMap("field", 1f), + flags); + return parser.parse(text); + } + + /** test the ability to enable/disable phrase operator */ + public void testDisablePhrase() { + Query expected = new TermQuery(new Term("field", "\"test\"")); + assertEquals(expected, parseKeyword("\"test\"", ~PHRASE_OPERATOR)); + } + + /** test the ability to enable/disable prefix operator */ + public void testDisablePrefix() { + Query expected = new TermQuery(new Term("field", "test*")); + assertEquals(expected, parseKeyword("test*", ~PREFIX_OPERATOR)); + } + + /** test the ability to enable/disable AND operator */ + public void testDisableAND() { + Query expected = new TermQuery(new Term("field", "foo+bar")); + assertEquals(expected, parseKeyword("foo+bar", ~AND_OPERATOR)); + expected = new TermQuery(new Term("field", "+foo+bar")); + assertEquals(expected, parseKeyword("+foo+bar", ~AND_OPERATOR)); + } + + /** test the ability to enable/disable OR operator */ + public void testDisableOR() { + Query expected = new TermQuery(new Term("field", "foo|bar")); + assertEquals(expected, parseKeyword("foo|bar", ~OR_OPERATOR)); + expected = new TermQuery(new Term("field", "|foo|bar")); + assertEquals(expected, parseKeyword("|foo|bar", ~OR_OPERATOR)); + } + + /** test the ability to enable/disable NOT operator */ + public void testDisableNOT() { + Query expected = new TermQuery(new Term("field", "-foo")); + assertEquals(expected, parseKeyword("-foo", ~NOT_OPERATOR)); + } + + /** test the ability to enable/disable precedence operators */ + public void testDisablePrecedence() { + Query expected = new TermQuery(new Term("field", "(foo)")); + assertEquals(expected, parseKeyword("(foo)", ~PRECEDENCE_OPERATORS)); + expected = new TermQuery(new Term("field", ")foo(")); + assertEquals(expected, parseKeyword(")foo(", ~PRECEDENCE_OPERATORS)); + } + + /** test the ability to enable/disable escape operators */ + public void testDisableEscape() { + Query expected = new TermQuery(new Term("field", "foo\\bar")); + assertEquals(expected, parseKeyword("foo\\bar", ~ESCAPE_OPERATOR)); + assertEquals(expected, parseKeyword("(foo\\bar)", ~ESCAPE_OPERATOR)); + assertEquals(expected, parseKeyword("\"foo\\bar\"", ~ESCAPE_OPERATOR)); + } + + public void testDisableWhitespace() { + Query expected = new TermQuery(new Term("field", "foo foo")); + assertEquals(expected, parseKeyword("foo foo", ~WHITESPACE_OPERATOR)); + expected = new TermQuery(new Term("field", " foo foo\n ")); + assertEquals(expected, parseKeyword(" foo foo\n ", ~WHITESPACE_OPERATOR)); + expected = new TermQuery(new Term("field", "\t\tfoo foo foo")); + assertEquals(expected, parseKeyword("\t\tfoo foo foo", ~WHITESPACE_OPERATOR)); + } + + // we aren't supposed to barf on any input... + public void testRandomQueries() throws Exception { + for (int i = 0; i < 1000; i++) { + String query = _TestUtil.randomUnicodeString(random()); + parse(query); // no exception + parseKeyword(query, _TestUtil.nextInt(random(), 0, 64)); // no exception + } + } +} \ No newline at end of file