Index: java/org/apache/lucene/search/StoredFieldSortFactory.java =================================================================== --- java/org/apache/lucene/search/StoredFieldSortFactory.java (revision 0) +++ java/org/apache/lucene/search/StoredFieldSortFactory.java (revision 0) @@ -0,0 +1,97 @@ +package org.apache.lucene.search; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.FieldSelectorResult; +import org.apache.lucene.index.IndexReader; + +import java.io.IOException; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A StoredFieldSortFactory creates Sort object that compares document's stored field values. It doesn't use FieldCache + * and compares only field values of those documents that are in search resultset. This is much faster than original + * FieldCache-utilizing approach for cases when search resultset is much smaller than the whole index and compared + * field value is not big. + * It requires the sorted field to be stored. + * + * @author Artem Vasiliev + */ +public class StoredFieldSortFactory implements SortComparatorSource { + private StoredFieldSortFactory() {} + + public static SortComparatorSource instance = new StoredFieldSortFactory(); + + public static Sort create(String sortFieldName, boolean sortDescending) { + return new Sort(new SortField(sortFieldName, instance, sortDescending)); + } + + public ScoreDocComparator newComparator(IndexReader reader, String fieldname) throws IOException { + return new StoredFieldComparator(reader, fieldname); + } + + private class StoredFieldComparator implements ScoreDocComparator, FieldSelector { + private IndexReader indexReader; + private String fieldName; + + /** + * Use this constructor if you want to specify a sort type + * + * @param indexReader is passed by a Searcher, so it will be DocFieldCachingIndexReader already if it's created as new + * IndexSearcher(cachingIndexReader). This will give the benefit of using shared (between app's reader and + * searcher) WeakDocumentsCache cache. Otherwise the indexReader will be decorated by DocFieldCachingIndexReader here. + * @param fieldName - name of the field to be compared by + */ + public StoredFieldComparator(IndexReader indexReader, String fieldName) { + this.indexReader = indexReader; + this.fieldName = fieldName; + } + + public int compare(ScoreDoc scoreDoc1, ScoreDoc scoreDoc2) { + return sortValue(scoreDoc1).compareTo(sortValue(scoreDoc2)); + } + + public Comparable sortValue(ScoreDoc scoreDoc) { + Document document; + try { + document = indexReader.document(scoreDoc.doc, this); + } catch (IOException e) { + throw new RuntimeException(e); + } + return document.get(fieldName); + } + + /** + * sort type doesn't matter here + * + * @return sort type + */ + public int sortType() { + return SortField.CUSTOM; + } + + public FieldSelectorResult accept(String givenFieldName) { + if (fieldName.equals(givenFieldName)) { + return FieldSelectorResult.LOAD_AND_BREAK; + } else { + return FieldSelectorResult.NO_LOAD; + } + } + } +} \ No newline at end of file Index: test/org/apache/lucene/search/TestStoredFieldSortedSearch.java =================================================================== --- test/org/apache/lucene/search/TestStoredFieldSortedSearch.java (revision 0) +++ test/org/apache/lucene/search/TestStoredFieldSortedSearch.java (revision 0) @@ -0,0 +1,182 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import junit.framework.Assert; +import junit.framework.TestCase; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.store.FSDirectory; + +import java.io.File; +import java.io.IOException; + +/** + * @author Artem Vasiliev 2007-01-09 + * @version $Id: $ + */ +public class TestStoredFieldSortedSearch extends TestCase { + private static final int DOCS_NUM = 100000; + private static final int FILTERED_DOCS_NUM = 50; //(nearly) the resulting hits size. The more this number, the less the difference between sorting approaches + private static final String SORT_FIELD = "sortField"; + private static final String SEARCH_FIELD = "searchField"; + private static final String SEARCH_TERM_PREFIX = "brebfer"; + + /** + * The index creation takes quite long, about 4mins on my machine when DOCS_NUM == 100000, index files + * are about 6.5M. Timings were this: + * [junit] 47ms elapsed for StoredFiled sort + * [junit] 266ms elapsed for FieldCache'd sort + * Quite big filesystem index is necessary for the difference to be more significant - for smaller + * indexes the difference between two approaches is nonsignificant. + * + * With DOCS_NUM == 1,000,000 it took about 40 mins to create the index. Timings were: first - 1687ms and 1922ms + * (for StoredFiled and FieldCache'd sorts respectively); next times (without index creation) it were about 100ms + * and 2000ms. + + * With DOCS_NUM == 10,000,000 StoredFiled sorted search took about 1s with standart amount of memory + * (-Xmx80m) while FieldCache'd trapped with OutOfMemoryError even with 1G (-Xmx1000m). Resulting index size was + * 640M, its creation took no less than 7hrs. + * + */ + public void setUp() throws IOException { + createIndexIfNotExists(); + final boolean CREATE_INDEX = true; + if (CREATE_INDEX) { + deleteDir(getIndexDir()); + long start = System.currentTimeMillis(); + IndexWriter indexWriter = null; + try { + indexWriter = new IndexWriter(FSDirectory.getDirectory(getIndexDir(), true), new SimpleAnalyzer()); + + int divBy = DOCS_NUM / FILTERED_DOCS_NUM; + for(int i = 0; i < DOCS_NUM; i++) { + Document doc = new Document(); + doc.add(new Field(SORT_FIELD, "lkbhbvgerw" + String.valueOf(Math.random()), + Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field(SEARCH_FIELD, SEARCH_TERM_PREFIX + (i % divBy), Field.Store.NO, Field.Index.UN_TOKENIZED)); + indexWriter.addDocument(doc); + } + } finally { + try { + if (indexWriter != null) { + indexWriter.close(); + } + } catch (IOException e) { + System.err.print("failed to close indexWriter"); + e.printStackTrace(); + } + } + System.out.println((System.currentTimeMillis() - start) + "ms elapsed setting up the index"); + } + } + + private File getIndexDir() { + String path = System.getProperty("index.dir"); + return new File(path); + } + + public void deleteDir(File dir) throws RuntimeException { + final String METHOD_NAME = "deleteDir"; + if (!dir.exists()) return; + if (dir.isDirectory()) { + String[] children = dir.list(); + for (int i = 0; i storedFiledSortTime * 2); + + Assert.assertEquals("hits number must be the same", hits1.length(), hits2.length()); + + Assert.assertTrue("some hits must be present", hits1.length() >0); + } + + private void close(IndexReader indexReader, IndexSearcher indexSearcher) throws IOException { + try { + if (indexReader != null) indexReader.close(); + if (indexSearcher != null) indexSearcher.close(); + } catch (IOException e) { + System.err.println("couldn't close reader or searcher"); + e.printStackTrace(); + } + } + + private void createIndexIfNotExists() throws IOException { + if (!isIndexExist()) { + FSDirectory.getDirectory(getIndexDir(), true); + } + } + + private boolean isIndexExist() { + return getIndexDir().exists(); + } + + private IndexReader createIndexReader() throws IOException { + return IndexReader.open(getIndexDir()); + } + + private IndexSearcher createIndexSearcher(IndexReader indexReader) { + return new IndexSearcher(indexReader); + } +}