Index: common-build.xml =================================================================== --- common-build.xml (revision 827021) +++ common-build.xml (working copy) @@ -58,8 +58,8 @@ - - + + Index: src/java/org/apache/lucene/util/DocIDPriorityQueue.java =================================================================== --- src/java/org/apache/lucene/util/DocIDPriorityQueue.java (revision 0) +++ src/java/org/apache/lucene/util/DocIDPriorityQueue.java (revision 0) @@ -0,0 +1,163 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** A PriorityQueue maintains a partial ordering of its elements such that the + * least element can always be found in constant time. Put()'s and pop()'s + * require log(size) time. + * + *

NOTE: This class pre-allocates a full array of + * length maxSize+1, in {@link #initialize}. + * +*/ +public abstract class DocIDPriorityQueue { + private int size; + private int maxSize; + final protected int[] heap; + public final int base; + + public DocIDPriorityQueue(int maxSize, int base) { + size = 0; + this.base = base; + int heapSize; + if (0 == maxSize) + // We allocate 1 extra to avoid if statement in top() + heapSize = 2; + else + heapSize = maxSize + 1; + heap = new int[heapSize]; + this.maxSize = maxSize; + } + + /** Determines the ordering of objects in this priority queue. Subclasses + must define this one method. */ + public abstract int compare(int a, int b); + + /** + * Adds an Object to a PriorityQueue in log(size) time. If one tries to add + * more objects than maxSize from initialize an + * {@link ArrayIndexOutOfBoundsException} is thrown. + * + * @return the new 'bottom' element in the queue. + */ + public final int add(int element) { + size++; + heap[size] = element; + upHeap(); + return heap[1]; + } + + public abstract Comparable sortValue(int doc); + + public int replace(int element) { + heap[1] = element; + downHeap(); + return heap[1]; + } + + /** Returns the least element of the PriorityQueue in constant time. */ + public final int top() { + // We don't need to check size here: if maxSize is 0, + // then heap is length 2 array with both entries null. + // If size is 0 then heap[1] is already null. + return heap[1]; + } + + /** Removes and returns the least element of the PriorityQueue in log(size) + time. */ + public final int pop() { + if (size > 0) { + int result = heap[1]; // save first value + heap[1] = heap[size]; // move last to first + heap[size] = -1; // permit GC of objects + size--; + downHeap(); // adjust heap + return result; + } else + return -1; + } + + /** + * Should be called when the Object at top changes values. Still log(n) worst + * case, but it's at least twice as fast to + * + *

+   * pq.top().change();
+   * pq.updateTop();
+   * 
+ * + * instead of + * + *
+   * o = pq.pop();
+   * o.change();
+   * pq.push(o);
+   * 
+ * + * @return the new 'top' element. + */ + public final int updateTop() { + downHeap(); + return heap[1]; + } + + /** Returns the number of elements currently stored in the PriorityQueue. */ + public final int size() { + return size; + } + + /** Removes all entries from the PriorityQueue. */ + public final void clear() { + for (int i = 0; i <= size; i++) { + heap[i] = -1; + } + size = 0; + } + + private final void upHeap() { + int i = size; + int node = heap[i]; // save bottom node + int j = i >>> 1; + while (j > 0 && compare(node, heap[j]) < 0) { + heap[i] = heap[j]; // shift parents down + i = j; + j = j >>> 1; + } + heap[i] = node; // install saved node + } + + private final void downHeap() { + int i = 1; + int node = heap[i]; // save top node + int j = i << 1; // find smaller child + int k = j + 1; + if (k <= size && compare(heap[k], heap[j]) < 0) { + j = k; + } + while (j <= size && compare(heap[j], node) < 0) { + heap[i] = heap[j]; // shift up child + i = j; + j = i << 1; + k = j + 1; + if (k <= size && compare(heap[k], heap[j]) < 0) { + j = k; + } + } + heap[i] = node; // install saved node + } +} Property changes on: src/java/org/apache/lucene/util/DocIDPriorityQueue.java ___________________________________________________________________ Added: svn:eol-style + native Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByIntQueue.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByIntQueue.java (revision 0) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByIntQueue.java (revision 0) @@ -0,0 +1,36 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.ScoreDocComparator; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.DocIDPriorityQueue; +import org.apache.lucene.index.IndexReader; +import java.io.IOException; + +public class SortByIntQueue extends DocIDPriorityQueue { + private final int[] values; + + SortByIntQueue(int size, int base, IndexReader reader, String field) throws IOException { + super(size, base); + values = FieldCache.DEFAULT.getInts(reader, field); + } + + @Override + public final int compare(int doc1, int doc2) { + // Cannot simply subtract: could overflow int + final int v1 = values[doc1]; + final int v2 = values[doc2]; + if (v1 > v2) { + return -1; + } else if (v1 < v2) { + return 1; + } else { + return doc2 - doc1; + } + } + + @Override + public Comparable sortValue(int doc) { + return Integer.valueOf(values[doc]); + } +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByIntQueue.java ___________________________________________________________________ Added: svn:eol-style + native Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByStringQueue.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByStringQueue.java (revision 0) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByStringQueue.java (revision 0) @@ -0,0 +1,36 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.ScoreDocComparator; +import org.apache.lucene.util.DocIDPriorityQueue; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldCache; +import java.io.IOException; + +public class SortByStringQueue extends DocIDPriorityQueue { + + private final int[] order; + private final String[] values; + + SortByStringQueue(int size, int base, IndexReader reader, String field) throws IOException { + super(size, base); + FieldCache.StringIndex index = FieldCache.DEFAULT.getStringIndex (reader, field); + order = index.order; + values = index.lookup; + } + + @Override + public final int compare(int doc1, int doc2) { + final int cmp = order[doc2] - order[doc1]; + if (cmp != 0) { + return cmp; + } else { + return doc2 - doc1; + } + } + + @Override + public Comparable sortValue(int doc) { + return values[order[doc]]; + } +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByStringQueue.java ___________________________________________________________________ Added: svn:eol-style + native Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OneSortNoScoreCollector.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OneSortNoScoreCollector.java (revision 0) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OneSortNoScoreCollector.java (revision 0) @@ -0,0 +1,123 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.LinkedList; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.DocIDPriorityQueue; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.ScoreDocComparator; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.SortComparatorSource; + +public class OneSortNoScoreCollector extends Collector { + private final LinkedList _pqList; + private final int _numHits; + private int _totalHits; + private final String _field; + private int _bottom; + private boolean _queueFull; + private DocIDPriorityQueue _currentQueue; + private final boolean stringSort; + + public static class NonScoreDoc extends ScoreDoc { + final DocIDPriorityQueue queue; + + public NonScoreDoc(int docid, DocIDPriorityQueue queue) { + super(docid, 0.0f); + this.queue = queue; + } + } + + public OneSortNoScoreCollector(boolean stringSort, String field,int numHits) { + this.stringSort = stringSort; + _pqList = new LinkedList(); + _numHits = numHits; + _field = field; + _totalHits = 0; + _queueFull = false; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + + @Override + public void collect(int doc) throws IOException { + _totalHits++; + if (_queueFull){ + if (_currentQueue.compare(_bottom,doc) >= 0) { + return; + } + _bottom = _currentQueue.replace(doc); + } + else{ + _bottom = _currentQueue.add(doc); + _queueFull = (_currentQueue.size() >= _numHits); + } + } + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException { + if (stringSort) { + _currentQueue = new SortByStringQueue(_numHits, docBase, reader, _field); + } else { + _currentQueue = new SortByIntQueue(_numHits, docBase, reader, _field); + } + _pqList.add(_currentQueue); + _queueFull = false; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + } + + public int getTotalHits(){ + return _totalHits; + } + + public ArrayList getTop(){ + ArrayList> iterList = new ArrayList>(_pqList.size()); + for (DocIDPriorityQueue pq : _pqList){ + int count = pq.size(); + NonScoreDoc[] resList = new NonScoreDoc[count]; + for (int i = count - 1; i >= 0; i--) { + resList[i] = new NonScoreDoc(pq.pop(), pq); + } + iterList.add(Arrays.asList(resList).iterator()); + } + ArrayList resList = ListMerger.mergeLists(0, _numHits, iterList, new Comparator() { + + public int compare(NonScoreDoc o1, NonScoreDoc o2) { + Comparable s1 = o1.queue.sortValue(o1.doc); + Comparable s2 = o2.queue.sortValue(o2.doc); + if (s1 == null) { + if (s2 == null) { + return 0; + } else { + return -1; + } + } else if (s2 == null) { + return 1; + } + int v = s1.compareTo(s2); + if (v==0){ + return o1.doc + o1.queue.base - o2.doc - o2.queue.base; + } else { + return v; + } + } + }); + + for (NonScoreDoc doc : resList){ + doc.doc += doc.queue.base; + } + return resList; + } +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OneSortNoScoreCollector.java ___________________________________________________________________ Added: svn:eol-style + native Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithOldSortTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithOldSortTask.java (revision 0) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithOldSortTask.java (revision 0) @@ -0,0 +1,167 @@ +package org.apache.lucene.benchmark.byTask.tasks; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.ScoreDocComparator; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.FieldSortedHitQueue; +import org.apache.lucene.index.IndexReader; + +/** + * Does sort search on specified field. + * + */ +public class SearchWithOldSortTask extends ReadTask { + + private Sort sort; + + public SearchWithOldSortTask(PerfRunData runData) { + super(runData); + } + + /** + * SortFields: field:type,field:type[,noscore][,nomaxscore] + * + * If noscore is present, then we turn off score tracking + * in {@link org.apache.lucene.search.TopFieldCollector}. + * If nomaxscore is present, then we turn off maxScore tracking + * in {@link org.apache.lucene.search.TopFieldCollector}. + * + * name:string,page:int,subject:string + * + */ + public void setParams(String sortField) { + super.setParams(sortField); + String[] fields = sortField.split(","); + SortField[] sortFields = new SortField[fields.length]; + int upto = 0; + for (int i = 0; i < fields.length; i++) { + String field = fields[i]; + SortField sortField0; + if (field.equals("doc")) { + sortField0 = SortField.FIELD_DOC; + } if (field.equals("score")) { + sortField0 = SortField.FIELD_SCORE; + } else { + int index = field.lastIndexOf(":"); + String fieldName; + String typeString; + if (index != -1) { + fieldName = field.substring(0, index); + typeString = field.substring(1+index, field.length()); + } else { + throw new RuntimeException("You must specify the sort type ie page:int,subject:string"); + } + int type = getType(typeString); + sortField0 = new SortField(fieldName, type); + } + sortFields[upto++] = sortField0; + } + + if (upto < sortFields.length) { + SortField[] newSortFields = new SortField[upto]; + System.arraycopy(sortFields, 0, newSortFields, 0, upto); + sortFields = newSortFields; + } + this.sort = new Sort(sortFields); + + if (sortFields.length != 1) { + throw new RuntimeException("only 1 sort field allowed"); + } + } + + private int getType(String typeString) { + int type; + if (typeString.equals("float")) { + type = SortField.FLOAT; + } else if (typeString.equals("double")) { + type = SortField.DOUBLE; + } else if (typeString.equals("byte")) { + type = SortField.BYTE; + } else if (typeString.equals("short")) { + type = SortField.SHORT; + } else if (typeString.equals("int")) { + type = SortField.INT; + } else if (typeString.equals("long")) { + type = SortField.LONG; + } else if (typeString.equals("string")) { + type = SortField.STRING; + } else if (typeString.equals("string_val")) { + type = SortField.STRING_VAL; + } else { + throw new RuntimeException("Unrecognized sort field type " + typeString); + } + return type; + } + + public boolean supportsParams() { + return true; + } + + public QueryMaker getQueryMaker() { + return getRunData().getQueryMaker(this); + } + + public boolean withRetrieve() { + return false; + } + + public boolean withSearch() { + return true; + } + + public boolean withTraverse() { + return false; + } + + public boolean withWarm() { + return false; + } + + public boolean withScore() { + return false; + } + + public boolean withMaxScore() { + return false; + } + + public Sort getSort() { + if (sort == null) { + throw new IllegalStateException("No sort field was set"); + } + return sort; + } + + public OneSortNoScoreCollector getCollector(int numHits) { + final SortField sortField = sort.getSort()[0]; + final boolean isString; + if (sortField.getType() == SortField.STRING) { + isString = true; + } else if (sortField.getType() == SortField.INT) { + isString = false; + } else { + throw new RuntimeException(""); + } + + return new OneSortNoScoreCollector(isString, sortField.getField(), numHits); + } +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithOldSortTask.java ___________________________________________________________________ Added: svn:eol-style + native Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ListMerger.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ListMerger.java (revision 0) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ListMerger.java (revision 0) @@ -0,0 +1,149 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.util.PriorityQueue; + +/** + * @author ymatsuda + * + */ +public class ListMerger +{ + public static class MergedIterator implements Iterator + { + private class IteratorNode + { + public Iterator _iterator; + public T _curVal; + + public IteratorNode(Iterator iterator) + { + _iterator = iterator; + _curVal = null; + } + + public boolean fetch() + { + if(_iterator.hasNext()) + { + _curVal = _iterator.next(); + return true; + } + _curVal = null; + return false; + } + } + + private final PriorityQueue _queue; + + private MergedIterator(final int length, final Comparator comparator) + { + _queue = new PriorityQueue() + { + { + this.initialize(length); + } + + @SuppressWarnings("unchecked") + @Override + protected boolean lessThan(Object o1, Object o2) + { + T v1 = ((IteratorNode)o1)._curVal; + T v2 = ((IteratorNode)o2)._curVal; + + return (comparator.compare(v1, v2) < 0); + } + }; + } + + public MergedIterator(final List> iterators, final Comparator comparator) + { + this(iterators.size(), comparator); + for(Iterator iterator : iterators) + { + IteratorNode ctx = new IteratorNode(iterator); + if(ctx.fetch()) _queue.insert(ctx); + } + } + + public MergedIterator(final Iterator[] iterators, final Comparator comparator) + { + this(iterators.length, comparator); + for(Iterator iterator : iterators) + { + IteratorNode ctx = new IteratorNode(iterator); + if(ctx.fetch()) _queue.insert(ctx); + } + } + + public boolean hasNext() + { + return _queue.size() > 0; + } + + @SuppressWarnings("unchecked") + public T next() + { + IteratorNode ctx = (IteratorNode)_queue.top(); + T val = ctx._curVal; + if (ctx.fetch()) + { + _queue.adjustTop(); + } + else + { + _queue.pop(); + } + return val; + } + + public void remove() + { + throw new UnsupportedOperationException(); + } + } + + private ListMerger() { } + + public static Iterator mergeLists(final Iterator[] iterators, final Comparator comparator) + { + return new MergedIterator(iterators, comparator); + } + + public static Iterator mergeLists(final List> iterators, final Comparator comparator) + { + return new MergedIterator(iterators, comparator); + } + + public static ArrayList mergeLists(int offset, int count, Iterator[] iterators, Comparator comparator) + { + return mergeLists(offset, count, new MergedIterator(iterators, comparator)); + } + + public static ArrayList mergeLists(int offset, int count, List> iterators, Comparator comparator) + { + return mergeLists(offset, count, new MergedIterator(iterators, comparator)); + } + + private static ArrayList mergeLists(int offset, int count, Iterator mergedIter) + { + for (int c = 0; c < offset && mergedIter.hasNext(); c++) + { + mergedIter.next(); + } + + ArrayList mergedList = new ArrayList(); + + for (int c = 0; c < count && mergedIter.hasNext(); c++) + { + mergedList.add(mergedIter.next()); + } + + return mergedList; + } + +} Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ListMerger.java ___________________________________________________________________ Added: svn:eol-style + native Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (revision 827021) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (working copy) @@ -31,6 +31,8 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.SortComparatorSource; +import org.apache.lucene.search.ScoreDocComparator; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.ScoreDoc; @@ -59,6 +61,19 @@ public ReadTask(PerfRunData runData) { super(runData); } + + private final SortComparatorSource sortSource = new SortComparatorSource() { + public ScoreDocComparator newComparator(IndexReader reader, String fieldName) throws IOException { + return getScoreDocComparator(reader, fieldName); + } + }; + + public ScoreDocComparator getScoreDocComparator(IndexReader reader, String fieldName) throws IOException { + return null; + } + + private static boolean first = true; + public int doLogic() throws Exception { int res = 0; boolean closeReader = false; @@ -94,20 +109,52 @@ QueryMaker queryMaker = getQueryMaker(); Query q = queryMaker.makeQuery(); Sort sort = getSort(); - TopDocs hits; + TopDocs hits = null; + List hits2 = null; final int numHits = numHits(); + int totalHits = 0; if (numHits > 0) { if (sort != null) { - // TODO: change the following to create TFC with in/out-of order + if (sort.getSort().length != 1) { + throw new RuntimeException("sort length is " + sort.getSort().length); + } + // TODO: change the following to create TFC with in/out-of order // according to whether the query's Scorer. - TopFieldCollector collector = TopFieldCollector.create(sort, numHits, - true, withScore(), withMaxScore(), false); - searcher.search(q, collector); - hits = collector.topDocs(); + if (doOldSortAPI) { + OneSortNoScoreCollector c = getCollector(numHits); + searcher.search(q, c); + hits2 = c.getTop(); + totalHits = c.getTotalHits(); + } else { + TopFieldCollector collector = TopFieldCollector.create(sort, numHits, + true, withScore(), withMaxScore(), false); + searcher.search(q, collector); + hits = collector.topDocs(); + totalHits = hits.totalHits; + } } else { hits = searcher.search(q, numHits); + totalHits = hits.totalHits; } - //System.out.println("q=" + q + ":" + hits.totalHits + " total hits"); + if (first) { + first = false; + System.out.println("NUMHITS=" + totalHits); + System.out.println("MAXDOC=" + searcher.getIndexReader().maxDoc()); + System.out.println("NUMDOCS=" + searcher.getIndexReader().numDocs()); + if (hits != null) { + for(int i=0;i compile.log 2>&1') != 0: + raise RuntimeError('compile failed (see compile.log)') + +BASE_SEARCH_ALG = ''' +analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer +directory=FSDirectory +work.dir = $INDEX$ +search.num.hits = $NUM_HITS$ +query.maker=org.apache.lucene.benchmark.byTask.feeds.FileBasedQueryMaker +file.query.maker.file = queries.txt +log.queries=true +log.step=100000 + +OpenReader +{"XSearchWarm" $SEARCH$} +$ROUNDS$ +CloseReader +RepSumByPrefRound XSearch +''' + +BASE_INDEX_ALG = ''' +analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer + +$OTHER$ + +doc.stored = true +doc.term.vector = false +log.step.AddDoc=10000 + +directory=FSDirectory +autocommit=false +compound=false + +work.dir=$WORKDIR$ + +{ "BuildIndex" + - CreateIndex + $INDEX_LINE$ + - CloseIndex +} + +RepSumByPrefRound BuildIndex +''' + +class RunAlgs: + + def __init__(self, resultsPrefix): + self.counter = 0 + self.results = [] + self.fOut = open('%s.txt' % resultsPrefix, 'wb') + + def makeIndex(self, source, numDocs, balancedNumSegs=None): + + if source not in ('wiki', 'random'): + raise RuntimeError('source must be wiki or random') + + indexName = 'work.%s.nd%gM' % (source, numDocs/1000000.0) + if balancedNumSegs is not None: + indexName += '_balanced%d' % balancedNumSegs + fullIndexPath = '%s/%s' % (INDEX_DIR_BASE, indexName) + + if os.path.exists(fullIndexPath): + print 'Index %s already exists...' % fullIndexPath + return indexName + + print 'Now create index %s...' % fullIndexPath + + s = BASE_INDEX_ALG + + if source == 'wiki': + other = '''content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource +docs.file=%s +doc.tokenized = false +''' % WIKI_FILE + else: + other = '''doc.index.props = true +doc.tokenized = false +doc.body.tokenized = false +content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource +''' + if INDEX_NUM_THREADS > 1: + other += 'doc.reuse.fields=false\n' + s = s.replace('$INDEX_LINE$', '[ { "AddDocs" AddDoc > : %s } : %s' % \ + (numDocs/INDEX_NUM_THREADS, INDEX_NUM_THREADS)) + else: + s = s.replace('$INDEX_LINE$', '{ "AddDocs" AddDoc > : %s' % \ + numDocs) + + s = s.replace('$WORKDIR$', fullIndexPath) + + if balancedNumSegs is not None: + other += ''' merge.factor=1000 + max.buffered=%d + ram.flush.mb=2000 + ''' % (numDocs/balancedNumSegs) + else: + if source == 'random': + other += 'ram.flush.mb=1.0\n' + else: + other += 'ram.flush.mb=32.0\n' + + s = s.replace('$OTHER$', other) + + try: + self.runOne(s, 'index_%s' % indexName, isIndex=True) + except: + if os.path.exists(fullIndexPath): + shutil.rmtree(fullIndexPath) + raise + return indexName + + def getLogPrefix(self, **dArgs): + l = dArgs.items() + l.sort() + return '_'.join(['%s=%s' % tup for tup in l]) + + def runOne(self, alg, logFileName, indexNumDocs=None, queries=None, verify=False, isIndex=False): + + if queries is not None: + if type(queries) in types.StringTypes: + queries = [queries] + open('queries.txt', 'wb').write('\n'.join(queries)) + + if DEBUG: + algFile = 'tmp.alg' + else: + algFile = 'tmp.%s.alg' % os.getpid() + open(algFile, 'wb').write(alg) + + fullLogFileName = '%s/%s' % (LOG_DIR, logFileName) + print ' log: %s' % fullLogFileName + + command = '%s -classpath ../../build/classes/java:../../build/classes/demo:../../build/contrib/highlighter/classes/java:lib/commons-digester-1.7.jar:lib/commons-collections-3.1.jar:lib/commons-compress-1.0.jar:lib/commons-logging-1.0.4.jar:lib/commons-beanutils-1.7.0.jar:lib/xerces-2.9.0.jar:lib/xml-apis-2.9.0.jar:../../build/contrib/benchmark/classes/java org.apache.lucene.benchmark.byTask.Benchmark %s > %s 2>&1' % (JAVA_COMMAND, algFile, fullLogFileName) + + if DEBUG: + print 'command=%s' % command + + try: + t0 = time.time() + if os.system(command) != 0: + raise RuntimeError('FAILED') + t1 = time.time() + finally: + if not DEBUG: + os.remove(algFile) + + if isIndex: + s = open(fullLogFileName, 'rb').read() + if s.find('Exception in thread "') != -1 or s.find('at org.apache.lucene') != -1: + raise RuntimeError('alg hit exceptions') + return + + else: + + # Parse results: + bestQPS = None + count = 0 + nhits = None + ndocs = None + warmTime = None + r = re.compile('^ ([0-9]+): (.*)$') + topN = [] + + for line in open(fullLogFileName, 'rb').readlines(): + m = r.match(line.rstrip()) + if m is not None: + topN.append(m.group(2)) + if line.startswith('NUMHITS='): + nhits = int(line[8:].strip()) + if line.startswith('NUMDOCS='): + ndocs = int(line[8:].strip()) + if line.startswith('XSearchWarm'): + v = line.strip().split() + warmTime = float(v[5]) + if line.startswith('XSearchReal'): + v = line.strip().split() + # print len(v), v + upto = 0 + i = 0 + qps = None + while i < len(v): + if v[i] == '-': + i += 1 + continue + else: + upto += 1 + i += 1 + if upto == 5: + qps = float(v[i-1].replace(',', '')) + break + + if qps is None: + raise RuntimeError('did not find qps') + + count += 1 + if bestQPS is None or qps > bestQPS: + bestQPS = qps + + if not verify: + if count != NUM_ROUND: + raise RuntimeError('did not find %s rounds (got %s)' % (NUM_ROUND, count)) + if warmTime is None: + raise RuntimeError('did not find warm time') + else: + bestQPS = 1.0 + warmTime = None + + if nhits is None: + raise RuntimeError('did not see NUMHITS=line') + + if ndocs is None: + raise RuntimeError('did not see NUMDOCS=line') + + if ndocs != indexNumDocs: + raise RuntimeError('indexNumDocs mismatch: expected %d but got %d' % (indexNumDocs, ndocs)) + + return nhits, warmTime, bestQPS, topN + + def getAlg(self, indexPath, searchTask, numHits, verify=False): + + s = BASE_SEARCH_ALG + + if not verify: + s = s.replace('$ROUNDS$', + ''' + { "Rounds" + { "Run" + { "TestSearchSpeed" + { "XSearchReal" $SEARCH$ > : 3.0s + } + NewRound + } : %d + } + ''' % NUM_ROUND) + else: + s = s.replace('$ROUNDS$', '') + + s = s.replace('$INDEX$', indexPath) + s = s.replace('$SEARCH$', searchTask) + s = s.replace('$NUM_HITS$', str(numHits)) + + return s + + def compare(self, baseline, new, *params): + + if new[0] != baseline[0]: + raise RuntimeError('baseline found %d hits but new found %d hits' % (baseline[0], new[0])) + + qpsOld = baseline[2] + qpsNew = new[2] + pct = 100.0*(qpsNew-qpsOld)/qpsOld + print ' diff: %.1f%%' % pct + self.results.append((qpsOld, qpsNew, params)) + + self.fOut.write('|%s|%.2f|%.2f|%.1f%%|\n' % \ + ('|'.join(str(x) for x in params), + qpsOld, qpsNew, pct)) + self.fOut.flush() + + def save(self, name): + f = open('%s.pk' % name, 'wb') + cPickle.dump(self.results, f) + f.close() + +def verify(r1, r2): + if r1[0] != r2[0]: + raise RuntimeError('different total hits: %s vs %s' % (r1[0], r2[0])) + + h1 = r1[3] + h2 = r2[3] + if len(h1) != len(h2): + raise RuntimeError('different number of results') + else: + for i in range(len(h1)): + s1 = h1[i].replace('score=NaN', 'score=na') + s2 = h2[i].replace('score=NaN', 'score=na') + if s1 != s2: + raise RuntimeError('hit %s differs: %s vs %s' % (i, s1 ,s2)) + +def usage(): + print + print 'Usage: python -u %s -run | -report ' % sys.argv[0] + print + print ' -run runs all tests, saving results to file .pk' + print ' -report opens .pk and prints Jira table' + print ' -verify confirm old & new produce identical results' + print + sys.exit(1) + +def main(): + + if not os.path.exists(LOG_DIR): + os.makedirs(LOG_DIR) + + if '-run' in sys.argv: + i = sys.argv.index('-run') + mode = 'run' + if i < len(sys.argv)-1: + name = sys.argv[1+i] + else: + usage() + elif '-report' in sys.argv: + i = sys.argv.index('-report') + mode = 'report' + if i < len(sys.argv)-1: + name = sys.argv[1+i] + else: + usage() + elif '-verify' in sys.argv: + mode = 'verify' + name = None + else: + usage() + + if mode in ('run', 'verify'): + run(mode, name) + else: + report(name) + +def report(name): + + print '||Source||Seg size||Query||Tot hits||Sort||Top N||QPS old||QPS new||Pct change||' + + results = cPickle.load(open('%s.pk' % name)) + for qpsOld, qpsNew, params in results: + pct = 100.0*(qpsNew-qpsOld)/qpsOld + if pct < 0.0: + c = 'red' + else: + c = 'green' + + if not DO_BALANCED and params[1] == 'balanced': + continue + + params = list(params) + sort = params[4] + sort = sort.replace(':string', '') + sort = sort.replace('doctitle', 'title') + sort = sort.replace('sort_field:int', 'rand int') + sort = sort.replace('random_string', 'rand string') + params[4] = sort + + query = params[2] + if query == '*:*': + query = '' + params[2] = query + + pct = '{color:%s}%.1f%%{color}' % (c, pct) + print '|%s|%.2f|%.2f|%s|' % \ + ('|'.join(str(x) for x in params), + qpsOld, qpsNew, pct) + +def run(mode, name): + + r = RunAlgs(name) + + if not os.path.exists(WIKI_FILE): + print + print 'NOTE: wiki source file "%s" does not exist; skipping wikipedia index tests (edit WIKI_FILE in this script & restart if this is wrong)' % WIKI_FILE + print + doWiki = False + else: + doWiki = True + print + + print + print 'JAVA:\n%s' % os.popen('java -version 2>&1').read() + + print + if osName != 'windows': + print 'OS:\n%s' % os.popen('uname -a 2>&1').read() + else: + print 'OS:\n%s' % sys.platform + + if DO_BALANCED: + balancedTup = (None, 20) + else: + balancedTup = (None,) + + indexes = {} + for source in ('wiki', 'random'): + if source != 'wiki' or doWiki: + for balanced in balancedTup: + #indexes[(source, balanced)] = r.makeIndex(source, 2000000, balancedNumSegs=balanced) + indexes[(source, balanced)] = r.makeIndex(source, INDEX_NUM_DOCS, balancedNumSegs=balanced) + + doVerify = mode == 'verify' + for balanced in balancedTup: + if doWiki: + sources = ('wiki', 'random') + else: + sources = ('random',) + + for source in sources: + if source == 'random': + queries = ('*:*',) + else: + queries = ('1', '*:*') + + for query in queries: + if source == 'random': + sorts = ( + 'random_string:string', + 'country:string' + 'sort_field:int', + ) + else: + sorts = ('doctitle:string',) + for sort in sorts: + for numHits in (10, 25, 50, 100, 500, 1000): + + if balanced is None: + s = 'log' + else: + s = 'balanced' + + print '\nRUN: balanced=%s source=%s query=%s sort=%s nhits=%d' % \ + (s, source, query, sort, numHits) + + prefix = r.getLogPrefix(balanced=balanced, source=source, query=query, sort=sort, numHits=numHits) + indexPath = '%s/%s' % (INDEX_DIR_BASE, indexes[(source, balanced)]) + + # singlePQ -- baseline (current 2.9.x) + s = r.getAlg(indexPath, + 'SearchWithSort(%s,noscore,nomaxscore)' % sort, + numHits, + verify=doVerify) + singlePQ = r.runOne(s, 'singlePQ_%s' % prefix, INDEX_NUM_DOCS, query, verify=doVerify) + + # multiPQ + s = r.getAlg(indexPath, + 'SearchWithOldSort(%s)' % sort, + numHits, + verify=doVerify) + s = 'old.sort.api=true\n' + s + + multiPQ = r.runOne(s, 'multiPQ_%s' % prefix, INDEX_NUM_DOCS, query, verify=doVerify) + print ' %d hits' % singlePQ[0] + + verify(singlePQ, multiPQ) + + if mode == 'run': + + if balanced is None: + bs = 'log' + else: + bs = 'balanced' + + r.compare(singlePQ, multiPQ, + source, bs, query, singlePQ[0], sort, numHits) + r.save(name) + +def cleanScores(l): + for i in range(len(l)): + pos = l[i].find(' score=') + l[i] = l[i][:pos].strip() + +if __name__ == '__main__': + main() Property changes on: contrib/benchmark/sortBench.py ___________________________________________________________________ Added: svn:eol-style + native