Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1026498) +++ lucene/CHANGES.txt (working copy) @@ -316,10 +316,6 @@ Changes in backwards compatibility policy -* LUCENE-1483: Removed utility class oal.util.SorterTemplate; this - class is no longer used by Lucene. (Gunnar Wagenknecht via Mike - McCandless) - * LUCENE-1923: Renamed SegmentInfo & SegmentInfos segString method to toString. These are advanced APIs and subject to change suddenly. (Tim Smith via Mike McCandless) Index: lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (revision 1026498) +++ lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import java.util.Comparator; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -242,7 +243,7 @@ // sort the subset of fields that have vectors // enabled; we could save [small amount of] CPU // here. - quickSort(fields, 0, fieldCount-1); + ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp); for(int i=0;i= hi) - return; - else if (hi == 1+lo) { - if (array[lo].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { - final DocFieldProcessorPerField tmp = array[lo]; - array[lo] = array[hi]; - array[hi] = tmp; - } - return; + + private static final Comparator fieldsComp = new Comparator() { + public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) { + return o1.fieldInfo.name.compareTo(o2.fieldInfo.name); } + }; - int mid = (lo + hi) >>> 1; - - if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { - DocFieldProcessorPerField tmp = array[lo]; - array[lo] = array[mid]; - array[mid] = tmp; - } - - if (array[mid].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { - DocFieldProcessorPerField tmp = array[mid]; - array[mid] = array[hi]; - array[hi] = tmp; - - if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { - DocFieldProcessorPerField tmp2 = array[lo]; - array[lo] = array[mid]; - array[mid] = tmp2; - } - } - - int left = lo + 1; - int right = hi - 1; - - if (left >= right) - return; - - DocFieldProcessorPerField partition = array[mid]; - - for (; ;) { - while (array[right].fieldInfo.name.compareTo(partition.fieldInfo.name) > 0) - --right; - - while (left < right && array[left].fieldInfo.name.compareTo(partition.fieldInfo.name) <= 0) - ++left; - - if (left < right) { - DocFieldProcessorPerField tmp = array[left]; - array[left] = array[right]; - array[right] = tmp; - --right; - } else { - break; - } - } - - quickSort(array, lo, left); - quickSort(array, left + 1, hi); - } - PerDoc[] docFreeList = new PerDoc[1]; int freeCount; int allocCount; Index: lucene/src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 1026498) +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.search.Similarity; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.store.*; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; // for javadocs @@ -1250,7 +1251,7 @@ cfr = new CompoundFileReader(dir, filename); String [] files = cfr.listAll(); - Arrays.sort(files); // sort the array of filename so that the output is more readable + ArrayUtil.quickSort(files); // sort the array of filename so that the output is more readable for (int i = 0; i < files.length; ++i) { long len = cfr.fileLength(files[i]); Index: lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java (revision 1026498) +++ lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java (working copy) @@ -51,8 +51,9 @@ // We don't need to sort the array in any future calls because we know // it will already start off sorted (all scorers on same doc). - // note that this comparator is not consistent with equals! - Arrays.sort(scorers, new Comparator() { // sort the array + // Note that this comparator is not consistent with equals! + // Also we use Arrays.sort() here to be stable! + Arrays.sort(scorers, new Comparator() { // sort the array public int compare(Scorer o1, Scorer o2) { return o1.docID() - o2.docID(); } Index: lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (revision 1026498) +++ lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.PriorityQueue; @@ -213,7 +214,7 @@ // sort by increasing docFreq order if (slop == 0) { - Arrays.sort(postingsFreqs); + ArrayUtil.quickSort(postingsFreqs); } if (slop == 0) { Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1026498) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -507,7 +507,7 @@ final Term placeholderTerm = new Term(query.field); final BooleanQuery bq = new BooleanQuery(true); final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]); - Arrays.sort(scoreTerms, new Comparator() { + ArrayUtil.quickSort(scoreTerms, new Comparator() { public int compare(ScoreTerm st1, ScoreTerm st2) { assert st1.termComp == st2.termComp : "term comparator should not change between segments"; Index: lucene/src/java/org/apache/lucene/search/PhraseQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/PhraseQuery.java (revision 1026498) +++ lucene/src/java/org/apache/lucene/search/PhraseQuery.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Explanation.IDFExplanation; import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; /** A Query that matches documents containing a particular sequence of terms. @@ -201,7 +202,7 @@ // sort by increasing docFreq order if (slop == 0) { - Arrays.sort(postingsFreqs); + ArrayUtil.quickSort(postingsFreqs); } if (slop == 0) { // optimize exact case Index: lucene/src/java/org/apache/lucene/search/QueryTermVector.java =================================================================== --- lucene/src/java/org/apache/lucene/search/QueryTermVector.java (revision 1026498) +++ lucene/src/java/org/apache/lucene/search/QueryTermVector.java (working copy) @@ -31,6 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.TermFreqVector; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; /** @@ -81,7 +82,7 @@ private void processTerms(BytesRef[] queryTerms) { if (queryTerms != null) { - Arrays.sort(queryTerms); + ArrayUtil.quickSort(queryTerms); Map tmpSet = new HashMap(queryTerms.length); //filter out duplicates List tmpList = new ArrayList(queryTerms.length); Index: lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (revision 1026498) +++ lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (working copy) @@ -18,10 +18,10 @@ */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.ArrayUtil; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Comparator; import java.util.HashSet; import java.util.LinkedList; @@ -190,7 +190,7 @@ /** Advance the subSpans to the same document */ private boolean toSameDoc() throws IOException { - Arrays.sort(subSpansByDoc, spanDocComparator); + ArrayUtil.quickSort(subSpansByDoc, spanDocComparator); int firstIndex = 0; int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc(); while (subSpansByDoc[firstIndex].doc() != maxDoc) { Index: lucene/src/java/org/apache/lucene/util/ArrayUtil.java =================================================================== --- lucene/src/java/org/apache/lucene/util/ArrayUtil.java (revision 1026500) +++ lucene/src/java/org/apache/lucene/util/ArrayUtil.java (working copy) @@ -18,6 +18,7 @@ */ import java.util.Collection; +import java.util.Comparator; /** * Methods for manipulating arrays. @@ -467,4 +468,75 @@ return result; } -} + + /** SorterTemplate with custom {@link Comparator} */ + private static final SorterTemplate getSorter(final T[] a, final Comparator comp) { + return new SorterTemplate() { + @Override + protected void swap(int i, int j) { + final T o = a[i]; + a[i] = a[j]; + a[j] = o; + } + + @Override + protected int compare(int i, int j) { + return comp.compare(a[i], a[j]); + } + }; + } + + /** Natural SorterTemplate */ + private static final > SorterTemplate getSorter(final T[] a) { + return new SorterTemplate() { + @Override + protected void swap(int i, int j) { + final T o = a[i]; + a[i] = a[j]; + a[j] = o; + } + + @Override + protected int compare(int i, int j) { + return a[i].compareTo(a[j]); + } + }; + } + + // quickSorts (endindex is exclusive!): + + public static void quickSort(T[] a, int fromIndex, int toIndex, Comparator comp) { + getSorter(a, comp).quickSort(fromIndex, toIndex-1); + } + + public static void quickSort(T[] a, Comparator comp) { + quickSort(a, 0, a.length, comp); + } + + public static > void quickSort(T[] a, int fromIndex, int toIndex) { + getSorter(a).quickSort(fromIndex, toIndex-1); + } + + public static > void quickSort(T[] a) { + quickSort(a, 0, a.length); + } + + // mergeSorts: + + public static void mergeSort(T[] a, int fromIndex, int toIndex, Comparator comp) { + getSorter(a, comp).mergeSort(fromIndex, toIndex-1); + } + + public static void mergeSort(T[] a, Comparator comp) { + mergeSort(a, 0, a.length, comp); + } + + public static > void mergeSort(T[] a, int fromIndex, int toIndex) { + getSorter(a).mergeSort(fromIndex, toIndex-1); + } + + public static > void mergeSort(T[] a) { + mergeSort(a, 0, a.length); + } + +} \ No newline at end of file Index: lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java =================================================================== --- lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java (revision 1026500) +++ lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java (working copy) @@ -556,7 +556,7 @@ public void sort() { if (count > 1) { - Arrays.sort(points, 0, count); + ArrayUtil.quickSort(points, 0, count); } } Index: lucene/src/java/org/apache/lucene/util/automaton/State.java =================================================================== --- lucene/src/java/org/apache/lucene/util/automaton/State.java (revision 1026500) +++ lucene/src/java/org/apache/lucene/util/automaton/State.java (working copy) @@ -233,7 +233,7 @@ /** Sorts transitions array in-place. */ public void sortTransitions(Comparator comparator) { if (numTransitions > 1) - Arrays.sort(transitionsArray, 0, numTransitions, comparator); + ArrayUtil.quickSort(transitionsArray, 0, numTransitions, comparator); } /** Index: lucene/src/java/org/apache/lucene/util/BytesRefHash.java =================================================================== --- lucene/src/java/org/apache/lucene/util/BytesRefHash.java (revision 1026500) +++ lucene/src/java/org/apache/lucene/util/BytesRefHash.java (working copy) @@ -45,18 +45,24 @@ */ public final class BytesRefHash { - private final ByteBlockPool pool; + public static final int DEFAULT_CAPACITY = 16; + + // the following fields are needed by comparator, + // so package private to prevent access$-methods: + final ByteBlockPool pool; + int[] bytesStart; + final BytesRef scratch1 = new BytesRef(); + final BytesRef scratch2 = new BytesRef(); + private int hashSize; private int hashHalfSize; private int hashMask; private int count; private int lastCount = -1; private int[] ords; - private int[] bytesStart; - public static final int DEFAULT_CAPACITY = 16; private final BytesStartArray bytesStartArray; private AtomicLong bytesUsed; - + /** * Creates a new {@link BytesRefHash} with a {@link ByteBlockPool} using a * {@link DirectAllocator}. @@ -152,84 +158,31 @@ * @param comp * the {@link Comparator} used for sorting */ - public int[] sort(Comparator comp) { + public int[] sort(final Comparator comp) { final int[] compact = compact(); - quickSort(comp, compact, 0, count - 1); - return compact; - } - - private void quickSort(Comparator comp, int[] entries, int lo, - int hi) { - if (lo >= hi) - return; - if (hi == 1 + lo) { - if (compare(comp, entries[lo], entries[hi]) > 0) { - final int tmp = entries[lo]; - entries[lo] = entries[hi]; - entries[hi] = tmp; + new SorterTemplate() { + @Override + protected void swap(int i, int j) { + final int o = compact[i]; + compact[i] = compact[j]; + compact[j] = o; } - return; - } - final int mid = (lo + hi) >>> 1; - if (compare(comp, entries[lo], entries[mid]) > 0) { - int tmp = entries[lo]; - entries[lo] = entries[mid]; - entries[mid] = tmp; - } - - if (compare(comp, entries[mid], entries[hi]) > 0) { - int tmp = entries[mid]; - entries[mid] = entries[hi]; - entries[hi] = tmp; - - if (compare(comp, entries[lo], entries[mid]) > 0) { - int tmp2 = entries[lo]; - entries[lo] = entries[mid]; - entries[mid] = tmp2; + + @Override + protected int compare(int i, int j) { + final int ord1 = compact[i], ord2 = compact[j]; + assert bytesStart.length > ord1 && bytesStart.length > ord2; + return comp.compare(pool.setBytesRef(scratch1, bytesStart[ord1]), + pool.setBytesRef(scratch2, bytesStart[ord2])); } - } - int left = lo + 1; - int right = hi - 1; - - if (left >= right) - return; - - final int partition = entries[mid]; - - for (;;) { - while (compare(comp, entries[right], partition) > 0) - --right; - - while (left < right && compare(comp, entries[left], partition) <= 0) - ++left; - - if (left < right) { - final int tmp = entries[left]; - entries[left] = entries[right]; - entries[right] = tmp; - --right; - } else { - break; - } - } - - quickSort(comp, entries, lo, left); - quickSort(comp, entries, left + 1, hi); + }.quickSort(0, count - 1); + return compact; } - private final BytesRef scratch1 = new BytesRef(); - private final BytesRef scratch2 = new BytesRef(); - private boolean equals(int ord, BytesRef b) { return pool.setBytesRef(scratch1, bytesStart[ord]).bytesEquals(b); } - private int compare(Comparator comp, int ord1, int ord2) { - assert bytesStart.length > ord1 && bytesStart.length > ord2; - return comp.compare(pool.setBytesRef(scratch1, bytesStart[ord1]), - pool.setBytesRef(scratch2, bytesStart[ord2])); - } - private boolean shrink(int targetSize) { // Cannot use ArrayUtil.shrink because we require power // of 2: Index: lucene/src/java/org/apache/lucene/util/SorterTemplate.java =================================================================== --- lucene/src/java/org/apache/lucene/util/SorterTemplate.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/SorterTemplate.java (revision 0) @@ -0,0 +1,185 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This class was inspired by CGLIB, but provides a better + * QuickSort algorithm without additional InsertionSort + * at the end. + * To use, subclass and override the two abstract methods + * which compare and modify your data. + * Allows custom swap so that two arrays can be sorted + * at the same time. + * @lucene.internal + */ +public abstract class SorterTemplate { + + private static final int MERGESORT_THRESHOLD = 12; + private static final int QUICKSORT_THRESHOLD = 7; + + /** Implement this method, that swaps slots i and j in your data */ + protected abstract void swap(int i, int j); + + /** Compares slots i and j of you data */ + protected abstract int compare(int i, int j); + + /** Sorts via QuickSort algorithm */ + public final void quickSort(int lo, int hi) { + if (hi - lo <= QUICKSORT_THRESHOLD) { + insertionSort(lo, hi); + return; + } + + int mid = (lo + hi) >>> 1; + + if (compare(lo, mid) > 0) { + swap(lo, mid); + } + + if (compare(mid, hi) > 0) { + swap(mid, hi); + if (compare(lo, mid) > 0) { + swap(lo, mid); + } + } + + int left = lo + 1; + int right = hi - 1; + for (;;) { + while (compare(right, mid) > 0) + --right; + + while (left < right && compare(left, mid) <= 0) + ++left; + + if (left < right) { + if (right == mid) { + mid = left; + } + swap(left, right); + --right; + } else { + break; + } + } + + quickSort(lo, left); + quickSort(left + 1, hi); + } + + /** Sorts via MergeSort algorithm */ + public final void mergeSort(int lo, int hi) { + final int diff = hi - lo; + if (diff <= MERGESORT_THRESHOLD) { + insertionSort(lo, hi); + return; + } + final int mid = lo + diff / 2; + mergeSort(lo, mid); + mergeSort(mid, hi); + merge(lo, mid, hi, mid - lo, hi - mid); + } + + private void merge(int lo, int pivot, int hi, int len1, int len2) { + if (len1 == 0 || len2 == 0) { + return; + } + if (len1 + len2 == 2) { + if (compare(pivot, lo) < 0) { + swap(pivot, lo); + } + return; + } + int first_cut, second_cut; + int len11, len22; + if (len1 > len2) { + len11 = len1 / 2; + first_cut = lo + len11; + second_cut = lower(pivot, hi, first_cut); + len22 = second_cut - pivot; + } else { + len22 = len2 / 2; + second_cut = pivot + len22; + first_cut = upper(lo, pivot, second_cut); + len11 = first_cut - lo; + } + rotate(first_cut, pivot, second_cut); + final int new_mid = first_cut + len22; + merge(lo, first_cut, new_mid, len11, len22); + merge(new_mid, second_cut, hi, len1 - len11, len2 - len22); + } + + private void rotate(int lo, int mid, int hi) { + int lot = lo; + int hit = mid - 1; + while (lot < hit) { + swap(lot++, hit--); + } + lot = mid; hit = hi - 1; + while (lot < hit) { + swap(lot++, hit--); + } + lot = lo; hit = hi - 1; + while (lot < hit) { + swap(lot++, hit--); + } + } + + private int lower(int lo, int hi, int val) { + int len = hi - lo; + while (len > 0) { + final int half = len / 2, + mid = lo + half; + if (compare(mid, val) < 0) { + lo = mid + 1; + len = len - half -1; + } else { + len = half; + } + } + return lo; + } + + private int upper(int lo, int hi, int val) { + int len = hi - lo; + while (len > 0) { + final int half = len / 2, + mid = lo + half; + if (compare(val, mid) < 0) { + len = half; + } else { + lo = mid + 1; + len = len - half -1; + } + } + return lo; + } + + private void insertionSort(int lo, int hi) { + for (int i = lo + 1 ; i <= hi; i++) { + for (int j = i; j > lo; j--) { + if (compare(j - 1, j) > 0) { + swap(j - 1, j); + } else { + break; + } + } + } + } + +} Property changes on: lucene\src\java\org\apache\lucene\util\SorterTemplate.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: lucene/src/test/org/apache/lucene/util/TestArrayUtil.java =================================================================== --- lucene/src/test/org/apache/lucene/util/TestArrayUtil.java (revision 1026498) +++ lucene/src/test/org/apache/lucene/util/TestArrayUtil.java (working copy) @@ -17,6 +17,9 @@ * limitations under the License. */ +import java.util.Arrays; +import java.util.Comparator; + public class TestArrayUtil extends LuceneTestCase { // Ensure ArrayUtil.getNextSize gives linear amortized cost of realloc/copy @@ -113,4 +116,57 @@ assertFalse(left + " does not equal: " + right, ArrayUtil.equals(leftChars, 25, rightChars, 0, left.length())); assertFalse(left + " does not equal: " + right, ArrayUtil.equals(leftChars, 12, rightChars, 0, left.length())); } + + private Integer[] createRandomArray() { + final Integer[] a = new Integer[random.nextInt(1000) + 1]; + for (int i = 0; i < a.length; i++) { + a[i] = Integer.valueOf(random.nextInt(a.length)); + } + return a; + } + + private static final Comparator inverseComp = new Comparator() { + public int compare(Integer a1, Integer a2) { + return a2.compareTo(a1); // inverse + } + }; + + public void testQuickSort() { + for (int i = 0, c = 1000 * RANDOM_MULTIPLIER; i < c; i++) { + Integer[] a1 = createRandomArray(), a2 = a1.clone(); + ArrayUtil.quickSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + + a1 = createRandomArray(); + a2 = a1.clone(); + ArrayUtil.quickSort(a1, inverseComp); + Arrays.sort(a2, inverseComp); + assertArrayEquals(a2, a1); + } + } + + public void testMergeSort() { + for (int i = 0, c = 1000 * RANDOM_MULTIPLIER; i < c; i++) { + Integer[] a1 = createRandomArray(), a2 = a1.clone(); + ArrayUtil.mergeSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + + a1 = createRandomArray(); + a2 = a1.clone(); + ArrayUtil.mergeSort(a1, inverseComp); + Arrays.sort(a2, inverseComp); + assertArrayEquals(a2, a1); + } + } + + // should produce no exceptions + public void testEmptyArraySort() { + Integer[] a = new Integer[0]; + ArrayUtil.quickSort(a); + ArrayUtil.mergeSort(a); + ArrayUtil.quickSort(a, inverseComp); + ArrayUtil.mergeSort(a, inverseComp); + } }