Index: lucene/suggest/build.xml =================================================================== --- lucene/suggest/build.xml (revision 1561346) +++ lucene/suggest/build.xml (working copy) @@ -31,13 +31,10 @@ - - - Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java (revision 1561346) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java (working copy) @@ -1,179 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.text.ParseException; -import java.util.HashMap; -import java.util.List; -import java.util.Set; - -import org.apache.lucene.document.NumericDocValuesField; // javadocs -import org.apache.lucene.expressions.Expression; -import org.apache.lucene.expressions.SimpleBindings; -import org.apache.lucene.expressions.js.JavascriptCompiler; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.index.StoredDocument; -import org.apache.lucene.queries.function.FunctionValues; -import org.apache.lucene.queries.function.ValueSource; -import org.apache.lucene.search.SortField; -import org.apache.lucene.util.BytesRefIterator; - - -/** - *

- * Dictionary with terms and optionally payload information - * taken from stored fields in a Lucene index. Similar to - * {@link DocumentDictionary}, except it computes the weight - * of the terms in a document based on a user-defined expression - * having one or more {@link NumericDocValuesField} in the document. - *

- * NOTE: - *
    - *
  • - * The term and (optionally) payload fields have to be - * stored - *
  • - *
  • - * if the term or (optionally) payload fields supplied - * do not have a value for a document, then the document is - * rejected by the dictionary - *
  • - *
  • - * All the fields used in weightExpression should - * have values for all documents, if any of the fields do not - * have a value for a document, it will default to 0 - *
  • - *
- */ -public class DocumentExpressionDictionary extends DocumentDictionary { - - private final ValueSource weightsValueSource; - - /** - * Creates a new dictionary with the contents of the fields named field - * for the terms and computes the corresponding weights of the term by compiling the - * user-defined weightExpression using the sortFields - * bindings. - */ - public DocumentExpressionDictionary(IndexReader reader, String field, - String weightExpression, Set sortFields) { - this(reader, field, weightExpression, sortFields, null); - } - - /** - * Creates a new dictionary with the contents of the fields named field - * for the terms, payloadField for the corresponding payloads - * and computes the corresponding weights of the term by compiling the - * user-defined weightExpression using the sortFields - * bindings. - */ - public DocumentExpressionDictionary(IndexReader reader, String field, - String weightExpression, Set sortFields, String payload) { - super(reader, field, null, payload); - Expression expression = null; - try { - expression = JavascriptCompiler.compile(weightExpression); - } catch (ParseException e) { - throw new RuntimeException(); - } - SimpleBindings bindings = new SimpleBindings(); - for (SortField sortField: sortFields) { - bindings.add(sortField); - } - - weightsValueSource = expression.getValueSource(bindings); - } - - /** - * Creates a new dictionary with the contents of the fields named field - * for the terms, payloadField for the corresponding payloads - * and uses the weightsValueSource supplied to determine the - * score. - */ - public DocumentExpressionDictionary(IndexReader reader, String field, - ValueSource weightsValueSource, String payload) { - super(reader, field, null, payload); - this.weightsValueSource = weightsValueSource; - } - - /** - * Creates a new dictionary with the contents of the fields named field - * for the terms and uses the weightsValueSource supplied to determine the - * score. - */ - public DocumentExpressionDictionary(IndexReader reader, String field, - ValueSource weightsValueSource) { - super(reader, field, null, null); - this.weightsValueSource = weightsValueSource; - } - - @Override - public BytesRefIterator getWordsIterator() throws IOException { - return new DocumentExpressionInputIterator(payloadField!=null); - } - - final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator { - - private FunctionValues currentWeightValues; - /** leaves of the reader */ - private final List leaves; - /** starting docIds of all the leaves */ - private final int[] starts; - /** current leave index */ - private int currentLeafIndex = 0; - - public DocumentExpressionInputIterator(boolean hasPayloads) - throws IOException { - super(hasPayloads); - leaves = reader.leaves(); - starts = new int[leaves.size() + 1]; - for (int i = 0; i < leaves.size(); i++) { - starts[i] = leaves.get(i).docBase; - } - starts[leaves.size()] = reader.maxDoc(); - currentWeightValues = (leaves.size() > 0) - ? weightsValueSource.getValues(new HashMap(), leaves.get(currentLeafIndex)) - : null; - } - - /** - * Returns the weight for the current docId as computed - * by the weightsValueSource - * */ - @Override - protected long getWeight(StoredDocument doc, int docId) { - if (currentWeightValues == null) { - return 0; - } - int subIndex = ReaderUtil.subIndex(docId, starts); - if (subIndex != currentLeafIndex) { - currentLeafIndex = subIndex; - try { - currentWeightValues = weightsValueSource.getValues(new HashMap(), leaves.get(currentLeafIndex)); - } catch (IOException e) { - throw new RuntimeException(); - } - } - return currentWeightValues.longVal(docId - starts[subIndex]); - } - - } -} Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java (working copy) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java (working copy) @@ -18,22 +18,15 @@ */ import java.io.IOException; -import java.text.ParseException; import java.util.HashMap; import java.util.List; -import java.util.Set; -import org.apache.lucene.document.NumericDocValuesField; // javadocs -import org.apache.lucene.expressions.Expression; -import org.apache.lucene.expressions.SimpleBindings; -import org.apache.lucene.expressions.js.JavascriptCompiler; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; -import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRefIterator; @@ -41,9 +34,8 @@ *

* Dictionary with terms and optionally payload information * taken from stored fields in a Lucene index. Similar to - * {@link DocumentDictionary}, except it computes the weight - * of the terms in a document based on a user-defined expression - * having one or more {@link NumericDocValuesField} in the document. + * {@link DocumentDictionary}, except it obtains the weight + * of the terms in a document based on a {@link ValueSource}. *

* NOTE: *
    @@ -56,60 +48,34 @@ * do not have a value for a document, then the document is * rejected by the dictionary * - *
  • - * All the fields used in weightExpression should - * have values for all documents, if any of the fields do not - * have a value for a document, it will default to 0 - *
  • *
+ *

+ * In practice the {@link ValueSource} will likely be obtained + * using the lucene expression module. The following example shows + * how to create a {@link ValueSource} from a simple addition of two + * fields: + * + * Expression expression = JavascriptCompiler.compile("f1 + f2"); + * SimpleBindings bindings = new SimpleBindings(); + * bindings.add(new SortField("f1", SortField.Type.LONG)); + * bindings.add(new SortField("f2", SortField.Type.LONG)); + * ValueSource valueSource = expression.getValueSource(bindings); + * + *

+ * */ -public class DocumentExpressionDictionary extends DocumentDictionary { +public class DocumentValueSourceDictionary extends DocumentDictionary { private final ValueSource weightsValueSource; /** * Creates a new dictionary with the contents of the fields named field - * for the terms and computes the corresponding weights of the term by compiling the - * user-defined weightExpression using the sortFields - * bindings. - */ - public DocumentExpressionDictionary(IndexReader reader, String field, - String weightExpression, Set sortFields) { - this(reader, field, weightExpression, sortFields, null); - } - - /** - * Creates a new dictionary with the contents of the fields named field * for the terms, payloadField for the corresponding payloads - * and computes the corresponding weights of the term by compiling the - * user-defined weightExpression using the sortFields - * bindings. - */ - public DocumentExpressionDictionary(IndexReader reader, String field, - String weightExpression, Set sortFields, String payload) { - super(reader, field, null, payload); - Expression expression = null; - try { - expression = JavascriptCompiler.compile(weightExpression); - } catch (ParseException e) { - throw new RuntimeException(); - } - SimpleBindings bindings = new SimpleBindings(); - for (SortField sortField: sortFields) { - bindings.add(sortField); - } - - weightsValueSource = expression.getValueSource(bindings); - } - - /** - * Creates a new dictionary with the contents of the fields named field - * for the terms, payloadField for the corresponding payloads * and uses the weightsValueSource supplied to determine the * score. */ - public DocumentExpressionDictionary(IndexReader reader, String field, - ValueSource weightsValueSource, String payload) { + public DocumentValueSourceDictionary(IndexReader reader, String field, + ValueSource weightsValueSource, String payload) { super(reader, field, null, payload); this.weightsValueSource = weightsValueSource; } @@ -119,18 +85,18 @@ * for the terms and uses the weightsValueSource supplied to determine the * score. */ - public DocumentExpressionDictionary(IndexReader reader, String field, - ValueSource weightsValueSource) { + public DocumentValueSourceDictionary(IndexReader reader, String field, + ValueSource weightsValueSource) { super(reader, field, null, null); this.weightsValueSource = weightsValueSource; } @Override public BytesRefIterator getWordsIterator() throws IOException { - return new DocumentExpressionInputIterator(payloadField!=null); + return new DocumentValueSourceInputIterator(payloadField!=null); } - final class DocumentExpressionInputIterator extends DocumentDictionary.DocumentInputIterator { + final class DocumentValueSourceInputIterator extends DocumentDictionary.DocumentInputIterator { private FunctionValues currentWeightValues; /** leaves of the reader */ @@ -140,7 +106,7 @@ /** current leave index */ private int currentLeafIndex = 0; - public DocumentExpressionInputIterator(boolean hasPayloads) + public DocumentValueSourceInputIterator(boolean hasPayloads) throws IOException { super(hasPayloads); leaves = reader.leaves(); Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java (revision 1561346) +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java (working copy) @@ -1,253 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.Set; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.StoredField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LuceneTestCase; -import org.junit.Test; - -public class DocumentExpressionDictionaryTest extends LuceneTestCase { - - static final String FIELD_NAME = "f1"; - static final String WEIGHT_FIELD_NAME_1 = "w1"; - static final String WEIGHT_FIELD_NAME_2 = "w2"; - static final String WEIGHT_FIELD_NAME_3 = "w3"; - static final String PAYLOAD_FIELD_NAME = "p1"; - - private Map generateIndexDocuments(int ndocs) { - Map docs = new HashMap<>(); - for(int i = 0; i < ndocs ; i++) { - Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); - Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); - Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i); - Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i); - Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i); - Document doc = new Document(); - doc.add(field); - doc.add(payload); - doc.add(weight1); - doc.add(weight2); - doc.add(weight3); - docs.put(field.stringValue(), doc); - } - return docs; - } - - @Test - public void testEmptyReader() throws IOException { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - iwc.setMergePolicy(newLogMergePolicy()); - // Make sure the index is created? - RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - writer.commit(); - writer.close(); - IndexReader ir = DirectoryReader.open(dir); - Set sortFields = new HashSet(); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); - Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); - - assertNull(inputIterator.next()); - assertEquals(inputIterator.weight(), 0); - assertNull(inputIterator.payload()); - - ir.close(); - dir.close(); - } - - @Test - public void testBasic() throws IOException { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - iwc.setMergePolicy(newLogMergePolicy()); - RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(atLeast(100)); - for(Document doc: docs.values()) { - writer.addDocument(doc); - } - writer.commit(); - writer.close(); - - IndexReader ir = DirectoryReader.open(dir); - Set sortFields = new HashSet(); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); - Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); - BytesRef f; - while((f = inputIterator.next())!=null) { - Document doc = docs.remove(f.utf8ToString()); - long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); - long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); - long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue(); - assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(inputIterator.weight(), (w1 + w2) - w3); - assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); - } - assertTrue(docs.isEmpty()); - ir.close(); - dir.close(); - } - - @Test - public void testWithoutPayload() throws IOException { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - iwc.setMergePolicy(newLogMergePolicy()); - RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(atLeast(100)); - for(Document doc: docs.values()) { - writer.addDocument(doc); - } - writer.commit(); - writer.close(); - - IndexReader ir = DirectoryReader.open(dir); - Set sortFields = new HashSet(); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); - Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w1 + (0.2 * w2) - (w3 - w1)/2", sortFields); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); - BytesRef f; - while((f = inputIterator.next())!=null) { - Document doc = docs.remove(f.utf8ToString()); - long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); - long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); - long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue(); - assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(inputIterator.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2)); - assertEquals(inputIterator.payload(), null); - } - assertTrue(docs.isEmpty()); - ir.close(); - dir.close(); - } - - @Test - public void testWithDeletions() throws IOException { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - iwc.setMergePolicy(newLogMergePolicy()); - RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(atLeast(100)); - Random rand = random(); - List termsToDel = new ArrayList<>(); - for(Document doc : docs.values()) { - if(rand.nextBoolean() && termsToDel.size() < docs.size()-1) { - termsToDel.add(doc.get(FIELD_NAME)); - } - writer.addDocument(doc); - } - writer.commit(); - - Term[] delTerms = new Term[termsToDel.size()]; - for(int i=0; i < termsToDel.size() ; i++) { - delTerms[i] = new Term(FIELD_NAME, termsToDel.get(i)); - } - - for(Term delTerm: delTerms) { - writer.deleteDocuments(delTerm); - } - writer.commit(); - writer.close(); - - for(String termToDel: termsToDel) { - assertTrue(null!=docs.remove(termToDel)); - } - - IndexReader ir = DirectoryReader.open(dir); - assertTrue("NumDocs should be > 0 but was " + ir.numDocs(), ir.numDocs() > 0); - assertEquals(ir.numDocs(), docs.size()); - Set sortFields = new HashSet(); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); - Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w2-w1", sortFields, PAYLOAD_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); - BytesRef f; - while((f = inputIterator.next())!=null) { - Document doc = docs.remove(f.utf8ToString()); - long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); - long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); - assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(inputIterator.weight(), w2-w1); - assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); - } - assertTrue(docs.isEmpty()); - ir.close(); - dir.close(); - } - - @Test - public void testWithValueSource() throws IOException { - - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - iwc.setMergePolicy(newLogMergePolicy()); - RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); - Map docs = generateIndexDocuments(atLeast(100)); - for(Document doc: docs.values()) { - writer.addDocument(doc); - } - writer.commit(); - writer.close(); - - IndexReader ir = DirectoryReader.open(dir); - Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); - InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); - BytesRef f; - while((f = inputIterator.next())!=null) { - Document doc = docs.remove(f.utf8ToString()); - assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(inputIterator.weight(), 10); - assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); - } - assertTrue(docs.isEmpty()); - ir.close(); - dir.close(); - } - -} Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java (working copy) +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java (working copy) @@ -20,11 +20,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; -import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -37,22 +35,24 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource; -import org.apache.lucene.search.SortField; +import org.apache.lucene.queries.function.valuesource.LongFieldSource; +import org.apache.lucene.queries.function.valuesource.SumFloatFunction; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; -public class DocumentExpressionDictionaryTest extends LuceneTestCase { +public class DocumentValueSourceDictionaryTest extends LuceneTestCase { static final String FIELD_NAME = "f1"; static final String WEIGHT_FIELD_NAME_1 = "w1"; static final String WEIGHT_FIELD_NAME_2 = "w2"; static final String WEIGHT_FIELD_NAME_3 = "w3"; static final String PAYLOAD_FIELD_NAME = "p1"; - + private Map generateIndexDocuments(int ndocs) { Map docs = new HashMap<>(); for(int i = 0; i < ndocs ; i++) { @@ -82,11 +82,7 @@ writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); - Set sortFields = new HashSet(); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); - Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME); + Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); assertNull(inputIterator.next()); @@ -111,11 +107,8 @@ writer.close(); IndexReader ir = DirectoryReader.open(dir); - Set sortFields = new HashSet(); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); - Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "((w1 + w2) - w3)", sortFields, PAYLOAD_FIELD_NAME); + ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)}; + Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME); InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; while((f = inputIterator.next())!=null) { @@ -124,7 +117,7 @@ long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(inputIterator.weight(), (w1 + w2) - w3); + assertEquals(inputIterator.weight(), (w1 + w2 + w3)); assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); } assertTrue(docs.isEmpty()); @@ -146,11 +139,8 @@ writer.close(); IndexReader ir = DirectoryReader.open(dir); - Set sortFields = new HashSet(); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_3, SortField.Type.LONG)); - Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w1 + (0.2 * w2) - (w3 - w1)/2", sortFields); + ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)}; + Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd)); InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; while((f = inputIterator.next())!=null) { @@ -159,7 +149,7 @@ long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(inputIterator.weight(), (long)(w1 + (0.2 * w2) - (w3 - w1)/2)); + assertEquals(inputIterator.weight(), (w1 + w2 + w3)); assertEquals(inputIterator.payload(), null); } assertTrue(docs.isEmpty()); @@ -202,10 +192,9 @@ IndexReader ir = DirectoryReader.open(dir); assertTrue("NumDocs should be > 0 but was " + ir.numDocs(), ir.numDocs() > 0); assertEquals(ir.numDocs(), docs.size()); - Set sortFields = new HashSet(); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_1, SortField.Type.LONG)); - sortFields.add(new SortField(WEIGHT_FIELD_NAME_2, SortField.Type.LONG)); - Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, "w2-w1", sortFields, PAYLOAD_FIELD_NAME); + ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2)}; + + Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME); InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; while((f = inputIterator.next())!=null) { @@ -213,7 +202,7 @@ long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); - assertEquals(inputIterator.weight(), w2-w1); + assertEquals(inputIterator.weight(), w2+w1); assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); } assertTrue(docs.isEmpty()); @@ -236,7 +225,7 @@ writer.close(); IndexReader ir = DirectoryReader.open(dir); - Dictionary dictionary = new DocumentExpressionDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); + Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); InputIterator inputIterator = (InputIterator) dictionary.getWordsIterator(); BytesRef f; while((f = inputIterator.next())!=null) { Index: solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java (revision 1561346) +++ solr/core/src/java/org/apache/solr/spelling/suggest/DocumentExpressionDictionaryFactory.java (working copy) @@ -17,12 +17,17 @@ * limitations under the License. */ +import java.text.ParseException; import java.util.HashSet; import java.util.Set; +import org.apache.lucene.expressions.Expression; +import org.apache.lucene.expressions.SimpleBindings; +import org.apache.lucene.expressions.js.JavascriptCompiler; +import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.search.suggest.DocumentExpressionDictionary; +import org.apache.lucene.search.suggest.DocumentValueSourceDictionary; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.DoubleField; import org.apache.solr.schema.FieldType; @@ -36,7 +41,7 @@ import org.apache.solr.search.SolrIndexSearcher; /** - * Factory for {@link DocumentExpressionDictionary} + * Factory for {@link org.apache.lucene.search.suggest.DocumentValueSourceDictionary} */ public class DocumentExpressionDictionaryFactory extends DictionaryFactory { @@ -89,9 +94,23 @@ } } - return new DocumentExpressionDictionary(searcher.getIndexReader(), field, weightExpression, - sortFields, payloadField); + return new DocumentValueSourceDictionary(searcher.getIndexReader(), field, fromExpression(weightExpression, + sortFields), payloadField); } + + public ValueSource fromExpression(String weightExpression, Set sortFields) { + Expression expression = null; + try { + expression = JavascriptCompiler.compile(weightExpression); + } catch (ParseException e) { + throw new RuntimeException(); + } + SimpleBindings bindings = new SimpleBindings(); + for (SortField sortField : sortFields) { + bindings.add(sortField); + } + return expression.getValueSource(bindings); + } private SortField.Type getSortFieldType(SolrCore core, String sortFieldName) { SortField.Type type = null;