Index: common-build.xml
===================================================================
--- common-build.xml (revision 827021)
+++ common-build.xml (working copy)
@@ -58,8 +58,8 @@
-
-
+
+
Index: src/java/org/apache/lucene/util/DocIDPriorityQueue.java
===================================================================
--- src/java/org/apache/lucene/util/DocIDPriorityQueue.java (revision 0)
+++ src/java/org/apache/lucene/util/DocIDPriorityQueue.java (revision 0)
@@ -0,0 +1,163 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** A PriorityQueue maintains a partial ordering of its elements such that the
+ * least element can always be found in constant time. Put()'s and pop()'s
+ * require log(size) time.
+ *
+ *
NOTE: This class pre-allocates a full array of
+ * length maxSize+1, in {@link #initialize}.
+ *
+*/
+public abstract class DocIDPriorityQueue {
+ private int size;
+ private int maxSize;
+ final protected int[] heap;
+ public final int base;
+
+ public DocIDPriorityQueue(int maxSize, int base) {
+ size = 0;
+ this.base = base;
+ int heapSize;
+ if (0 == maxSize)
+ // We allocate 1 extra to avoid if statement in top()
+ heapSize = 2;
+ else
+ heapSize = maxSize + 1;
+ heap = new int[heapSize];
+ this.maxSize = maxSize;
+ }
+
+ /** Determines the ordering of objects in this priority queue. Subclasses
+ must define this one method. */
+ public abstract int compare(int a, int b);
+
+ /**
+ * Adds an Object to a PriorityQueue in log(size) time. If one tries to add
+ * more objects than maxSize from initialize an
+ * {@link ArrayIndexOutOfBoundsException} is thrown.
+ *
+ * @return the new 'bottom' element in the queue.
+ */
+ public final int add(int element) {
+ size++;
+ heap[size] = element;
+ upHeap();
+ return heap[1];
+ }
+
+ public abstract Comparable sortValue(int doc);
+
+ public int replace(int element) {
+ heap[1] = element;
+ downHeap();
+ return heap[1];
+ }
+
+ /** Returns the least element of the PriorityQueue in constant time. */
+ public final int top() {
+ // We don't need to check size here: if maxSize is 0,
+ // then heap is length 2 array with both entries null.
+ // If size is 0 then heap[1] is already null.
+ return heap[1];
+ }
+
+ /** Removes and returns the least element of the PriorityQueue in log(size)
+ time. */
+ public final int pop() {
+ if (size > 0) {
+ int result = heap[1]; // save first value
+ heap[1] = heap[size]; // move last to first
+ heap[size] = -1; // permit GC of objects
+ size--;
+ downHeap(); // adjust heap
+ return result;
+ } else
+ return -1;
+ }
+
+ /**
+ * Should be called when the Object at top changes values. Still log(n) worst
+ * case, but it's at least twice as fast to
+ *
+ *
+ * pq.top().change();
+ * pq.updateTop();
+ *
+ *
+ * instead of
+ *
+ *
+ * o = pq.pop();
+ * o.change();
+ * pq.push(o);
+ *
+ *
+ * @return the new 'top' element.
+ */
+ public final int updateTop() {
+ downHeap();
+ return heap[1];
+ }
+
+ /** Returns the number of elements currently stored in the PriorityQueue. */
+ public final int size() {
+ return size;
+ }
+
+ /** Removes all entries from the PriorityQueue. */
+ public final void clear() {
+ for (int i = 0; i <= size; i++) {
+ heap[i] = -1;
+ }
+ size = 0;
+ }
+
+ private final void upHeap() {
+ int i = size;
+ int node = heap[i]; // save bottom node
+ int j = i >>> 1;
+ while (j > 0 && compare(node, heap[j]) < 0) {
+ heap[i] = heap[j]; // shift parents down
+ i = j;
+ j = j >>> 1;
+ }
+ heap[i] = node; // install saved node
+ }
+
+ private final void downHeap() {
+ int i = 1;
+ int node = heap[i]; // save top node
+ int j = i << 1; // find smaller child
+ int k = j + 1;
+ if (k <= size && compare(heap[k], heap[j]) < 0) {
+ j = k;
+ }
+ while (j <= size && compare(heap[j], node) < 0) {
+ heap[i] = heap[j]; // shift up child
+ i = j;
+ j = i << 1;
+ k = j + 1;
+ if (k <= size && compare(heap[k], heap[j]) < 0) {
+ j = k;
+ }
+ }
+ heap[i] = node; // install saved node
+ }
+}
Property changes on: src/java/org/apache/lucene/util/DocIDPriorityQueue.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByIntQueue.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByIntQueue.java (revision 0)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByIntQueue.java (revision 0)
@@ -0,0 +1,36 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.ScoreDocComparator;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.util.DocIDPriorityQueue;
+import org.apache.lucene.index.IndexReader;
+import java.io.IOException;
+
+public class SortByIntQueue extends DocIDPriorityQueue {
+ private final int[] values;
+
+ SortByIntQueue(int size, int base, IndexReader reader, String field) throws IOException {
+ super(size, base);
+ values = FieldCache.DEFAULT.getInts(reader, field);
+ }
+
+ @Override
+ public final int compare(int doc1, int doc2) {
+ // Cannot simply subtract: could overflow int
+ final int v1 = values[doc1];
+ final int v2 = values[doc2];
+ if (v1 > v2) {
+ return -1;
+ } else if (v1 < v2) {
+ return 1;
+ } else {
+ return doc2 - doc1;
+ }
+ }
+
+ @Override
+ public Comparable sortValue(int doc) {
+ return Integer.valueOf(values[doc]);
+ }
+}
Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByIntQueue.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByStringQueue.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByStringQueue.java (revision 0)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByStringQueue.java (revision 0)
@@ -0,0 +1,36 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.ScoreDocComparator;
+import org.apache.lucene.util.DocIDPriorityQueue;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.FieldCache;
+import java.io.IOException;
+
+public class SortByStringQueue extends DocIDPriorityQueue {
+
+ private final int[] order;
+ private final String[] values;
+
+ SortByStringQueue(int size, int base, IndexReader reader, String field) throws IOException {
+ super(size, base);
+ FieldCache.StringIndex index = FieldCache.DEFAULT.getStringIndex (reader, field);
+ order = index.order;
+ values = index.lookup;
+ }
+
+ @Override
+ public final int compare(int doc1, int doc2) {
+ final int cmp = order[doc2] - order[doc1];
+ if (cmp != 0) {
+ return cmp;
+ } else {
+ return doc2 - doc1;
+ }
+ }
+
+ @Override
+ public Comparable sortValue(int doc) {
+ return values[order[doc]];
+ }
+}
Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SortByStringQueue.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OneSortNoScoreCollector.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OneSortNoScoreCollector.java (revision 0)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OneSortNoScoreCollector.java (revision 0)
@@ -0,0 +1,123 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.LinkedList;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.DocIDPriorityQueue;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.ScoreDocComparator;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.SortComparatorSource;
+
+public class OneSortNoScoreCollector extends Collector {
+ private final LinkedList _pqList;
+ private final int _numHits;
+ private int _totalHits;
+ private final String _field;
+ private int _bottom;
+ private boolean _queueFull;
+ private DocIDPriorityQueue _currentQueue;
+ private final boolean stringSort;
+
+ public static class NonScoreDoc extends ScoreDoc {
+ final DocIDPriorityQueue queue;
+
+ public NonScoreDoc(int docid, DocIDPriorityQueue queue) {
+ super(docid, 0.0f);
+ this.queue = queue;
+ }
+ }
+
+ public OneSortNoScoreCollector(boolean stringSort, String field,int numHits) {
+ this.stringSort = stringSort;
+ _pqList = new LinkedList();
+ _numHits = numHits;
+ _field = field;
+ _totalHits = 0;
+ _queueFull = false;
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ _totalHits++;
+ if (_queueFull){
+ if (_currentQueue.compare(_bottom,doc) >= 0) {
+ return;
+ }
+ _bottom = _currentQueue.replace(doc);
+ }
+ else{
+ _bottom = _currentQueue.add(doc);
+ _queueFull = (_currentQueue.size() >= _numHits);
+ }
+ }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ if (stringSort) {
+ _currentQueue = new SortByStringQueue(_numHits, docBase, reader, _field);
+ } else {
+ _currentQueue = new SortByIntQueue(_numHits, docBase, reader, _field);
+ }
+ _pqList.add(_currentQueue);
+ _queueFull = false;
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ }
+
+ public int getTotalHits(){
+ return _totalHits;
+ }
+
+ public ArrayList getTop(){
+ ArrayList> iterList = new ArrayList>(_pqList.size());
+ for (DocIDPriorityQueue pq : _pqList){
+ int count = pq.size();
+ NonScoreDoc[] resList = new NonScoreDoc[count];
+ for (int i = count - 1; i >= 0; i--) {
+ resList[i] = new NonScoreDoc(pq.pop(), pq);
+ }
+ iterList.add(Arrays.asList(resList).iterator());
+ }
+ ArrayList resList = ListMerger.mergeLists(0, _numHits, iterList, new Comparator() {
+
+ public int compare(NonScoreDoc o1, NonScoreDoc o2) {
+ Comparable s1 = o1.queue.sortValue(o1.doc);
+ Comparable s2 = o2.queue.sortValue(o2.doc);
+ if (s1 == null) {
+ if (s2 == null) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else if (s2 == null) {
+ return 1;
+ }
+ int v = s1.compareTo(s2);
+ if (v==0){
+ return o1.doc + o1.queue.base - o2.doc - o2.queue.base;
+ } else {
+ return v;
+ }
+ }
+ });
+
+ for (NonScoreDoc doc : resList){
+ doc.doc += doc.queue.base;
+ }
+ return resList;
+ }
+}
Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OneSortNoScoreCollector.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithOldSortTask.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithOldSortTask.java (revision 0)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithOldSortTask.java (revision 0)
@@ -0,0 +1,167 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.ScoreDocComparator;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.FieldSortedHitQueue;
+import org.apache.lucene.index.IndexReader;
+
+/**
+ * Does sort search on specified field.
+ *
+ */
+public class SearchWithOldSortTask extends ReadTask {
+
+ private Sort sort;
+
+ public SearchWithOldSortTask(PerfRunData runData) {
+ super(runData);
+ }
+
+ /**
+ * SortFields: field:type,field:type[,noscore][,nomaxscore]
+ *
+ * If noscore is present, then we turn off score tracking
+ * in {@link org.apache.lucene.search.TopFieldCollector}.
+ * If nomaxscore is present, then we turn off maxScore tracking
+ * in {@link org.apache.lucene.search.TopFieldCollector}.
+ *
+ * name:string,page:int,subject:string
+ *
+ */
+ public void setParams(String sortField) {
+ super.setParams(sortField);
+ String[] fields = sortField.split(",");
+ SortField[] sortFields = new SortField[fields.length];
+ int upto = 0;
+ for (int i = 0; i < fields.length; i++) {
+ String field = fields[i];
+ SortField sortField0;
+ if (field.equals("doc")) {
+ sortField0 = SortField.FIELD_DOC;
+ } if (field.equals("score")) {
+ sortField0 = SortField.FIELD_SCORE;
+ } else {
+ int index = field.lastIndexOf(":");
+ String fieldName;
+ String typeString;
+ if (index != -1) {
+ fieldName = field.substring(0, index);
+ typeString = field.substring(1+index, field.length());
+ } else {
+ throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
+ }
+ int type = getType(typeString);
+ sortField0 = new SortField(fieldName, type);
+ }
+ sortFields[upto++] = sortField0;
+ }
+
+ if (upto < sortFields.length) {
+ SortField[] newSortFields = new SortField[upto];
+ System.arraycopy(sortFields, 0, newSortFields, 0, upto);
+ sortFields = newSortFields;
+ }
+ this.sort = new Sort(sortFields);
+
+ if (sortFields.length != 1) {
+ throw new RuntimeException("only 1 sort field allowed");
+ }
+ }
+
+ private int getType(String typeString) {
+ int type;
+ if (typeString.equals("float")) {
+ type = SortField.FLOAT;
+ } else if (typeString.equals("double")) {
+ type = SortField.DOUBLE;
+ } else if (typeString.equals("byte")) {
+ type = SortField.BYTE;
+ } else if (typeString.equals("short")) {
+ type = SortField.SHORT;
+ } else if (typeString.equals("int")) {
+ type = SortField.INT;
+ } else if (typeString.equals("long")) {
+ type = SortField.LONG;
+ } else if (typeString.equals("string")) {
+ type = SortField.STRING;
+ } else if (typeString.equals("string_val")) {
+ type = SortField.STRING_VAL;
+ } else {
+ throw new RuntimeException("Unrecognized sort field type " + typeString);
+ }
+ return type;
+ }
+
+ public boolean supportsParams() {
+ return true;
+ }
+
+ public QueryMaker getQueryMaker() {
+ return getRunData().getQueryMaker(this);
+ }
+
+ public boolean withRetrieve() {
+ return false;
+ }
+
+ public boolean withSearch() {
+ return true;
+ }
+
+ public boolean withTraverse() {
+ return false;
+ }
+
+ public boolean withWarm() {
+ return false;
+ }
+
+ public boolean withScore() {
+ return false;
+ }
+
+ public boolean withMaxScore() {
+ return false;
+ }
+
+ public Sort getSort() {
+ if (sort == null) {
+ throw new IllegalStateException("No sort field was set");
+ }
+ return sort;
+ }
+
+ public OneSortNoScoreCollector getCollector(int numHits) {
+ final SortField sortField = sort.getSort()[0];
+ final boolean isString;
+ if (sortField.getType() == SortField.STRING) {
+ isString = true;
+ } else if (sortField.getType() == SortField.INT) {
+ isString = false;
+ } else {
+ throw new RuntimeException("");
+ }
+
+ return new OneSortNoScoreCollector(isString, sortField.getField(), numHits);
+ }
+}
Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithOldSortTask.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ListMerger.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ListMerger.java (revision 0)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ListMerger.java (revision 0)
@@ -0,0 +1,149 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.util.PriorityQueue;
+
+/**
+ * @author ymatsuda
+ *
+ */
+public class ListMerger
+{
+ public static class MergedIterator implements Iterator
+ {
+ private class IteratorNode
+ {
+ public Iterator _iterator;
+ public T _curVal;
+
+ public IteratorNode(Iterator iterator)
+ {
+ _iterator = iterator;
+ _curVal = null;
+ }
+
+ public boolean fetch()
+ {
+ if(_iterator.hasNext())
+ {
+ _curVal = _iterator.next();
+ return true;
+ }
+ _curVal = null;
+ return false;
+ }
+ }
+
+ private final PriorityQueue _queue;
+
+ private MergedIterator(final int length, final Comparator comparator)
+ {
+ _queue = new PriorityQueue()
+ {
+ {
+ this.initialize(length);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ protected boolean lessThan(Object o1, Object o2)
+ {
+ T v1 = ((IteratorNode)o1)._curVal;
+ T v2 = ((IteratorNode)o2)._curVal;
+
+ return (comparator.compare(v1, v2) < 0);
+ }
+ };
+ }
+
+ public MergedIterator(final List> iterators, final Comparator comparator)
+ {
+ this(iterators.size(), comparator);
+ for(Iterator iterator : iterators)
+ {
+ IteratorNode ctx = new IteratorNode(iterator);
+ if(ctx.fetch()) _queue.insert(ctx);
+ }
+ }
+
+ public MergedIterator(final Iterator[] iterators, final Comparator comparator)
+ {
+ this(iterators.length, comparator);
+ for(Iterator iterator : iterators)
+ {
+ IteratorNode ctx = new IteratorNode(iterator);
+ if(ctx.fetch()) _queue.insert(ctx);
+ }
+ }
+
+ public boolean hasNext()
+ {
+ return _queue.size() > 0;
+ }
+
+ @SuppressWarnings("unchecked")
+ public T next()
+ {
+ IteratorNode ctx = (IteratorNode)_queue.top();
+ T val = ctx._curVal;
+ if (ctx.fetch())
+ {
+ _queue.adjustTop();
+ }
+ else
+ {
+ _queue.pop();
+ }
+ return val;
+ }
+
+ public void remove()
+ {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private ListMerger() { }
+
+ public static Iterator mergeLists(final Iterator[] iterators, final Comparator comparator)
+ {
+ return new MergedIterator(iterators, comparator);
+ }
+
+ public static Iterator mergeLists(final List> iterators, final Comparator comparator)
+ {
+ return new MergedIterator(iterators, comparator);
+ }
+
+ public static ArrayList mergeLists(int offset, int count, Iterator[] iterators, Comparator comparator)
+ {
+ return mergeLists(offset, count, new MergedIterator(iterators, comparator));
+ }
+
+ public static ArrayList mergeLists(int offset, int count, List> iterators, Comparator comparator)
+ {
+ return mergeLists(offset, count, new MergedIterator(iterators, comparator));
+ }
+
+ private static ArrayList mergeLists(int offset, int count, Iterator mergedIter)
+ {
+ for (int c = 0; c < offset && mergedIter.hasNext(); c++)
+ {
+ mergedIter.next();
+ }
+
+ ArrayList mergedList = new ArrayList();
+
+ for (int c = 0; c < count && mergedIter.hasNext(); c++)
+ {
+ mergedList.add(mergedIter.next());
+ }
+
+ return mergedList;
+ }
+
+}
Property changes on: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ListMerger.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (revision 827021)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (working copy)
@@ -31,6 +31,8 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.SortComparatorSource;
+import org.apache.lucene.search.ScoreDocComparator;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.ScoreDoc;
@@ -59,6 +61,19 @@
public ReadTask(PerfRunData runData) {
super(runData);
}
+
+ private final SortComparatorSource sortSource = new SortComparatorSource() {
+ public ScoreDocComparator newComparator(IndexReader reader, String fieldName) throws IOException {
+ return getScoreDocComparator(reader, fieldName);
+ }
+ };
+
+ public ScoreDocComparator getScoreDocComparator(IndexReader reader, String fieldName) throws IOException {
+ return null;
+ }
+
+ private static boolean first = true;
+
public int doLogic() throws Exception {
int res = 0;
boolean closeReader = false;
@@ -94,20 +109,52 @@
QueryMaker queryMaker = getQueryMaker();
Query q = queryMaker.makeQuery();
Sort sort = getSort();
- TopDocs hits;
+ TopDocs hits = null;
+ List hits2 = null;
final int numHits = numHits();
+ int totalHits = 0;
if (numHits > 0) {
if (sort != null) {
- // TODO: change the following to create TFC with in/out-of order
+ if (sort.getSort().length != 1) {
+ throw new RuntimeException("sort length is " + sort.getSort().length);
+ }
+ // TODO: change the following to create TFC with in/out-of order
// according to whether the query's Scorer.
- TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
- true, withScore(), withMaxScore(), false);
- searcher.search(q, collector);
- hits = collector.topDocs();
+ if (doOldSortAPI) {
+ OneSortNoScoreCollector c = getCollector(numHits);
+ searcher.search(q, c);
+ hits2 = c.getTop();
+ totalHits = c.getTotalHits();
+ } else {
+ TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
+ true, withScore(), withMaxScore(), false);
+ searcher.search(q, collector);
+ hits = collector.topDocs();
+ totalHits = hits.totalHits;
+ }
} else {
hits = searcher.search(q, numHits);
+ totalHits = hits.totalHits;
}
- //System.out.println("q=" + q + ":" + hits.totalHits + " total hits");
+ if (first) {
+ first = false;
+ System.out.println("NUMHITS=" + totalHits);
+ System.out.println("MAXDOC=" + searcher.getIndexReader().maxDoc());
+ System.out.println("NUMDOCS=" + searcher.getIndexReader().numDocs());
+ if (hits != null) {
+ for(int i=0;i compile.log 2>&1') != 0:
+ raise RuntimeError('compile failed (see compile.log)')
+
+BASE_SEARCH_ALG = '''
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+work.dir = $INDEX$
+search.num.hits = $NUM_HITS$
+query.maker=org.apache.lucene.benchmark.byTask.feeds.FileBasedQueryMaker
+file.query.maker.file = queries.txt
+log.queries=true
+log.step=100000
+
+OpenReader
+{"XSearchWarm" $SEARCH$}
+$ROUNDS$
+CloseReader
+RepSumByPrefRound XSearch
+'''
+
+BASE_INDEX_ALG = '''
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+
+$OTHER$
+
+doc.stored = true
+doc.term.vector = false
+log.step.AddDoc=10000
+
+directory=FSDirectory
+autocommit=false
+compound=false
+
+work.dir=$WORKDIR$
+
+{ "BuildIndex"
+ - CreateIndex
+ $INDEX_LINE$
+ - CloseIndex
+}
+
+RepSumByPrefRound BuildIndex
+'''
+
+class RunAlgs:
+
+ def __init__(self, resultsPrefix):
+ self.counter = 0
+ self.results = []
+ self.fOut = open('%s.txt' % resultsPrefix, 'wb')
+
+ def makeIndex(self, source, numDocs, balancedNumSegs=None):
+
+ if source not in ('wiki', 'random'):
+ raise RuntimeError('source must be wiki or random')
+
+ indexName = 'work.%s.nd%gM' % (source, numDocs/1000000.0)
+ if balancedNumSegs is not None:
+ indexName += '_balanced%d' % balancedNumSegs
+ fullIndexPath = '%s/%s' % (INDEX_DIR_BASE, indexName)
+
+ if os.path.exists(fullIndexPath):
+ print 'Index %s already exists...' % fullIndexPath
+ return indexName
+
+ print 'Now create index %s...' % fullIndexPath
+
+ s = BASE_INDEX_ALG
+
+ if source == 'wiki':
+ other = '''content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource
+docs.file=%s
+doc.tokenized = false
+''' % WIKI_FILE
+ else:
+ other = '''doc.index.props = true
+doc.tokenized = false
+doc.body.tokenized = false
+content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource
+'''
+ if INDEX_NUM_THREADS > 1:
+ other += 'doc.reuse.fields=false\n'
+ s = s.replace('$INDEX_LINE$', '[ { "AddDocs" AddDoc > : %s } : %s' % \
+ (numDocs/INDEX_NUM_THREADS, INDEX_NUM_THREADS))
+ else:
+ s = s.replace('$INDEX_LINE$', '{ "AddDocs" AddDoc > : %s' % \
+ numDocs)
+
+ s = s.replace('$WORKDIR$', fullIndexPath)
+
+ if balancedNumSegs is not None:
+ other += ''' merge.factor=1000
+ max.buffered=%d
+ ram.flush.mb=2000
+ ''' % (numDocs/balancedNumSegs)
+ else:
+ if source == 'random':
+ other += 'ram.flush.mb=1.0\n'
+ else:
+ other += 'ram.flush.mb=32.0\n'
+
+ s = s.replace('$OTHER$', other)
+
+ try:
+ self.runOne(s, 'index_%s' % indexName, isIndex=True)
+ except:
+ if os.path.exists(fullIndexPath):
+ shutil.rmtree(fullIndexPath)
+ raise
+ return indexName
+
+ def getLogPrefix(self, **dArgs):
+ l = dArgs.items()
+ l.sort()
+ return '_'.join(['%s=%s' % tup for tup in l])
+
+ def runOne(self, alg, logFileName, indexNumDocs=None, queries=None, verify=False, isIndex=False):
+
+ if queries is not None:
+ if type(queries) in types.StringTypes:
+ queries = [queries]
+ open('queries.txt', 'wb').write('\n'.join(queries))
+
+ if DEBUG:
+ algFile = 'tmp.alg'
+ else:
+ algFile = 'tmp.%s.alg' % os.getpid()
+ open(algFile, 'wb').write(alg)
+
+ fullLogFileName = '%s/%s' % (LOG_DIR, logFileName)
+ print ' log: %s' % fullLogFileName
+
+ command = '%s -classpath ../../build/classes/java:../../build/classes/demo:../../build/contrib/highlighter/classes/java:lib/commons-digester-1.7.jar:lib/commons-collections-3.1.jar:lib/commons-compress-1.0.jar:lib/commons-logging-1.0.4.jar:lib/commons-beanutils-1.7.0.jar:lib/xerces-2.9.0.jar:lib/xml-apis-2.9.0.jar:../../build/contrib/benchmark/classes/java org.apache.lucene.benchmark.byTask.Benchmark %s > %s 2>&1' % (JAVA_COMMAND, algFile, fullLogFileName)
+
+ if DEBUG:
+ print 'command=%s' % command
+
+ try:
+ t0 = time.time()
+ if os.system(command) != 0:
+ raise RuntimeError('FAILED')
+ t1 = time.time()
+ finally:
+ if not DEBUG:
+ os.remove(algFile)
+
+ if isIndex:
+ s = open(fullLogFileName, 'rb').read()
+ if s.find('Exception in thread "') != -1 or s.find('at org.apache.lucene') != -1:
+ raise RuntimeError('alg hit exceptions')
+ return
+
+ else:
+
+ # Parse results:
+ bestQPS = None
+ count = 0
+ nhits = None
+ ndocs = None
+ warmTime = None
+ r = re.compile('^ ([0-9]+): (.*)$')
+ topN = []
+
+ for line in open(fullLogFileName, 'rb').readlines():
+ m = r.match(line.rstrip())
+ if m is not None:
+ topN.append(m.group(2))
+ if line.startswith('NUMHITS='):
+ nhits = int(line[8:].strip())
+ if line.startswith('NUMDOCS='):
+ ndocs = int(line[8:].strip())
+ if line.startswith('XSearchWarm'):
+ v = line.strip().split()
+ warmTime = float(v[5])
+ if line.startswith('XSearchReal'):
+ v = line.strip().split()
+ # print len(v), v
+ upto = 0
+ i = 0
+ qps = None
+ while i < len(v):
+ if v[i] == '-':
+ i += 1
+ continue
+ else:
+ upto += 1
+ i += 1
+ if upto == 5:
+ qps = float(v[i-1].replace(',', ''))
+ break
+
+ if qps is None:
+ raise RuntimeError('did not find qps')
+
+ count += 1
+ if bestQPS is None or qps > bestQPS:
+ bestQPS = qps
+
+ if not verify:
+ if count != NUM_ROUND:
+ raise RuntimeError('did not find %s rounds (got %s)' % (NUM_ROUND, count))
+ if warmTime is None:
+ raise RuntimeError('did not find warm time')
+ else:
+ bestQPS = 1.0
+ warmTime = None
+
+ if nhits is None:
+ raise RuntimeError('did not see NUMHITS=line')
+
+ if ndocs is None:
+ raise RuntimeError('did not see NUMDOCS=line')
+
+ if ndocs != indexNumDocs:
+ raise RuntimeError('indexNumDocs mismatch: expected %d but got %d' % (indexNumDocs, ndocs))
+
+ return nhits, warmTime, bestQPS, topN
+
+ def getAlg(self, indexPath, searchTask, numHits, verify=False):
+
+ s = BASE_SEARCH_ALG
+
+ if not verify:
+ s = s.replace('$ROUNDS$',
+ '''
+ { "Rounds"
+ { "Run"
+ { "TestSearchSpeed"
+ { "XSearchReal" $SEARCH$ > : 3.0s
+ }
+ NewRound
+ } : %d
+ }
+ ''' % NUM_ROUND)
+ else:
+ s = s.replace('$ROUNDS$', '')
+
+ s = s.replace('$INDEX$', indexPath)
+ s = s.replace('$SEARCH$', searchTask)
+ s = s.replace('$NUM_HITS$', str(numHits))
+
+ return s
+
+ def compare(self, baseline, new, *params):
+
+ if new[0] != baseline[0]:
+ raise RuntimeError('baseline found %d hits but new found %d hits' % (baseline[0], new[0]))
+
+ qpsOld = baseline[2]
+ qpsNew = new[2]
+ pct = 100.0*(qpsNew-qpsOld)/qpsOld
+ print ' diff: %.1f%%' % pct
+ self.results.append((qpsOld, qpsNew, params))
+
+ self.fOut.write('|%s|%.2f|%.2f|%.1f%%|\n' % \
+ ('|'.join(str(x) for x in params),
+ qpsOld, qpsNew, pct))
+ self.fOut.flush()
+
+ def save(self, name):
+ f = open('%s.pk' % name, 'wb')
+ cPickle.dump(self.results, f)
+ f.close()
+
+def verify(r1, r2):
+ if r1[0] != r2[0]:
+ raise RuntimeError('different total hits: %s vs %s' % (r1[0], r2[0]))
+
+ h1 = r1[3]
+ h2 = r2[3]
+ if len(h1) != len(h2):
+ raise RuntimeError('different number of results')
+ else:
+ for i in range(len(h1)):
+ s1 = h1[i].replace('score=NaN', 'score=na')
+ s2 = h2[i].replace('score=NaN', 'score=na')
+ if s1 != s2:
+ raise RuntimeError('hit %s differs: %s vs %s' % (i, s1 ,s2))
+
+def usage():
+ print
+ print 'Usage: python -u %s -run | -report ' % sys.argv[0]
+ print
+ print ' -run runs all tests, saving results to file .pk'
+ print ' -report opens .pk and prints Jira table'
+ print ' -verify confirm old & new produce identical results'
+ print
+ sys.exit(1)
+
+def main():
+
+ if not os.path.exists(LOG_DIR):
+ os.makedirs(LOG_DIR)
+
+ if '-run' in sys.argv:
+ i = sys.argv.index('-run')
+ mode = 'run'
+ if i < len(sys.argv)-1:
+ name = sys.argv[1+i]
+ else:
+ usage()
+ elif '-report' in sys.argv:
+ i = sys.argv.index('-report')
+ mode = 'report'
+ if i < len(sys.argv)-1:
+ name = sys.argv[1+i]
+ else:
+ usage()
+ elif '-verify' in sys.argv:
+ mode = 'verify'
+ name = None
+ else:
+ usage()
+
+ if mode in ('run', 'verify'):
+ run(mode, name)
+ else:
+ report(name)
+
+def report(name):
+
+ print '||Source||Seg size||Query||Tot hits||Sort||Top N||QPS old||QPS new||Pct change||'
+
+ results = cPickle.load(open('%s.pk' % name))
+ for qpsOld, qpsNew, params in results:
+ pct = 100.0*(qpsNew-qpsOld)/qpsOld
+ if pct < 0.0:
+ c = 'red'
+ else:
+ c = 'green'
+
+ if not DO_BALANCED and params[1] == 'balanced':
+ continue
+
+ params = list(params)
+ sort = params[4]
+ sort = sort.replace(':string', '')
+ sort = sort.replace('doctitle', 'title')
+ sort = sort.replace('sort_field:int', 'rand int')
+ sort = sort.replace('random_string', 'rand string')
+ params[4] = sort
+
+ query = params[2]
+ if query == '*:*':
+ query = ''
+ params[2] = query
+
+ pct = '{color:%s}%.1f%%{color}' % (c, pct)
+ print '|%s|%.2f|%.2f|%s|' % \
+ ('|'.join(str(x) for x in params),
+ qpsOld, qpsNew, pct)
+
+def run(mode, name):
+
+ r = RunAlgs(name)
+
+ if not os.path.exists(WIKI_FILE):
+ print
+ print 'NOTE: wiki source file "%s" does not exist; skipping wikipedia index tests (edit WIKI_FILE in this script & restart if this is wrong)' % WIKI_FILE
+ print
+ doWiki = False
+ else:
+ doWiki = True
+ print
+
+ print
+ print 'JAVA:\n%s' % os.popen('java -version 2>&1').read()
+
+ print
+ if osName != 'windows':
+ print 'OS:\n%s' % os.popen('uname -a 2>&1').read()
+ else:
+ print 'OS:\n%s' % sys.platform
+
+ if DO_BALANCED:
+ balancedTup = (None, 20)
+ else:
+ balancedTup = (None,)
+
+ indexes = {}
+ for source in ('wiki', 'random'):
+ if source != 'wiki' or doWiki:
+ for balanced in balancedTup:
+ #indexes[(source, balanced)] = r.makeIndex(source, 2000000, balancedNumSegs=balanced)
+ indexes[(source, balanced)] = r.makeIndex(source, INDEX_NUM_DOCS, balancedNumSegs=balanced)
+
+ doVerify = mode == 'verify'
+ for balanced in balancedTup:
+ if doWiki:
+ sources = ('wiki', 'random')
+ else:
+ sources = ('random',)
+
+ for source in sources:
+ if source == 'random':
+ queries = ('*:*',)
+ else:
+ queries = ('1', '*:*')
+
+ for query in queries:
+ if source == 'random':
+ sorts = (
+ 'random_string:string',
+ 'country:string'
+ 'sort_field:int',
+ )
+ else:
+ sorts = ('doctitle:string',)
+ for sort in sorts:
+ for numHits in (10, 25, 50, 100, 500, 1000):
+
+ if balanced is None:
+ s = 'log'
+ else:
+ s = 'balanced'
+
+ print '\nRUN: balanced=%s source=%s query=%s sort=%s nhits=%d' % \
+ (s, source, query, sort, numHits)
+
+ prefix = r.getLogPrefix(balanced=balanced, source=source, query=query, sort=sort, numHits=numHits)
+ indexPath = '%s/%s' % (INDEX_DIR_BASE, indexes[(source, balanced)])
+
+ # singlePQ -- baseline (current 2.9.x)
+ s = r.getAlg(indexPath,
+ 'SearchWithSort(%s,noscore,nomaxscore)' % sort,
+ numHits,
+ verify=doVerify)
+ singlePQ = r.runOne(s, 'singlePQ_%s' % prefix, INDEX_NUM_DOCS, query, verify=doVerify)
+
+ # multiPQ
+ s = r.getAlg(indexPath,
+ 'SearchWithOldSort(%s)' % sort,
+ numHits,
+ verify=doVerify)
+ s = 'old.sort.api=true\n' + s
+
+ multiPQ = r.runOne(s, 'multiPQ_%s' % prefix, INDEX_NUM_DOCS, query, verify=doVerify)
+ print ' %d hits' % singlePQ[0]
+
+ verify(singlePQ, multiPQ)
+
+ if mode == 'run':
+
+ if balanced is None:
+ bs = 'log'
+ else:
+ bs = 'balanced'
+
+ r.compare(singlePQ, multiPQ,
+ source, bs, query, singlePQ[0], sort, numHits)
+ r.save(name)
+
+def cleanScores(l):
+ for i in range(len(l)):
+ pos = l[i].find(' score=')
+ l[i] = l[i][:pos].strip()
+
+if __name__ == '__main__':
+ main()
Property changes on: contrib/benchmark/sortBench.py
___________________________________________________________________
Added: svn:eol-style
+ native