Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1205962) +++ lucene/CHANGES.txt (working copy) @@ -651,6 +651,12 @@ prevents this as best as it can by throwing AlreadyClosedException also on clones. (Uwe Schindler, Robert Muir) +New Features + +* LUCENE-3593: Added a FieldValueFilter that accepts all documents that either + have at least one or no value at all in a specific field. (Simon Willnauer, + Uwe Schindler, Robert Muir) + ======================= Lucene 3.5.0 ======================= Changes in backwards compatibility policy Index: lucene/src/test/org/apache/lucene/search/TestFieldValueFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFieldValueFilter.java (revision 0) +++ lucene/src/test/org/apache/lucene/search/TestFieldValueFilter.java (revision 0) @@ -0,0 +1,117 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * + */ +public class TestFieldValueFilter extends LuceneTestCase { + + public void testFieldValueFilterNoValue() throws IOException { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + int docs = atLeast(10); + int[] docStates = buildIndex(writer, docs); + int numDocsNoValue = 0; + for (int i = 0; i < docStates.length; i++) { + if (docStates[i] == 0) { + numDocsNoValue++; + } + } + + IndexReader reader = IndexReader.open(directory); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs search = searcher.search(new TermQuery(new Term("all", "test")), + new FieldValueFilter("some", true), docs); + assertEquals(search.totalHits, numDocsNoValue); + + ScoreDoc[] scoreDocs = search.scoreDocs; + for (ScoreDoc scoreDoc : scoreDocs) { + assertNull(reader.document(scoreDoc.doc).get("some")); + } + + reader.close(); + searcher.close(); + directory.close(); + } + + public void testFieldValueFilter() throws IOException { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + int docs = atLeast(10); + int[] docStates = buildIndex(writer, docs); + int numDocsWithValue = 0; + for (int i = 0; i < docStates.length; i++) { + if (docStates[i] == 1) { + numDocsWithValue++; + } + } + IndexReader reader = IndexReader.open(directory); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs search = searcher.search(new TermQuery(new Term("all", "test")), + new FieldValueFilter("some"), docs); + assertEquals(search.totalHits, numDocsWithValue); + + ScoreDoc[] scoreDocs = search.scoreDocs; + for (ScoreDoc scoreDoc : scoreDocs) { + assertEquals("value", reader.document(scoreDoc.doc).get("some")); + } + + reader.close(); + searcher.close(); + directory.close(); + } + + private int[] buildIndex(RandomIndexWriter writer, int docs) + throws IOException, CorruptIndexException { + int[] docStates = new int[docs]; + for (int i = 0; i < docs; i++) { + Document doc = new Document(); + if (random.nextBoolean()) { + docStates[i] = 1; + doc.add(newField("some", "value", TextField.TYPE_STORED)); + } + doc.add(newField("all", "test", TextField.TYPE_UNSTORED)); + doc.add(newField("id", "" + i, TextField.TYPE_STORED)); + writer.addDocument(doc); + } + writer.commit(); + int numDeletes = random.nextInt(docs); + for (int i = 0; i < numDeletes; i++) { + int docID = random.nextInt(docs); + writer.deleteDocuments(new Term("id", "" + docID)); + docStates[docID] = 2; + } + writer.close(); + return docStates; + } + +} Property changes on: lucene/src/test/org/apache/lucene/search/TestFieldValueFilter.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/search/FieldValueFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldValueFilter.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/FieldValueFilter.java (revision 0) @@ -0,0 +1,119 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.FieldCacheRangeFilter.FieldCacheDocIdSet; +import org.apache.lucene.util.Bits; + +/** + * A {@link Filter} that accepts all documents that have one or more values in a + * given field. This {@link Filter} request {@link Bits} from the + * {@link FieldCache} and build the bits if not present. + */ +public class FieldValueFilter extends Filter { + private final String field; + private final boolean negate; + + /** + * Creates a new {@link FieldValueFilter} + * + * @param field + * the field to filter + */ + public FieldValueFilter(String field) { + this(field, false); + } + + /** + * Creates a new {@link FieldValueFilter} + * + * @param field + * the field to filter + * @param negate + * iff true all documents with no value in the given + * field are accepted. + * + */ + public FieldValueFilter(String field, boolean negate) { + this.field = field; + this.negate = negate; + } + + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) + throws IOException { + final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField( + context.reader, field); + if (negate) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { + @Override + final boolean matchDoc(int doc) { + return !docsWithField.get(doc); + } + }; + } else { + if (docsWithField instanceof DocIdSet) { + // UweSays: this is always the case for our current impl - but who knows + // :-) + return BitsFilteredDocIdSet.wrap((DocIdSet) docsWithField, acceptDocs); + } + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { + @Override + final boolean matchDoc(int doc) { + return docsWithField.get(doc); + } + }; + } + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((field == null) ? 0 : field.hashCode()); + result = prime * result + (negate ? 1231 : 1237); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + FieldValueFilter other = (FieldValueFilter) obj; + if (field == null) { + if (other.field != null) + return false; + } else if (!field.equals(other.field)) + return false; + if (negate != other.negate) + return false; + return true; + } + + @Override + public String toString() { + return "NoFieldValueFilter [field=" + field + ", negate=" + negate + "]"; + } + +} Property changes on: lucene/src/java/org/apache/lucene/search/FieldValueFilter.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native