--- lucene/CHANGES.txt (revision 1027999) +++ lucene/CHANGES.txt (working copy) @@ -4,9 +4,11 @@ Changes in backwards compatibility policy -* LUCENE-1483: Removed utility class oal.util.SorterTemplate; this - class is no longer used by Lucene. (Gunnar Wagenknecht via Mike - McCandless) +* LUCENE-2719: Changed API of internal utility class + org.apche.lucene.util.SorterTemplate to support faster quickSort using + pivot values and also merge sort and insertion sort. If you have used + this class, you have to implement two more methods for handling pivots. + (Uwe Schindler, Robert Muir, Mike McCandless) * LUCENE-1923: Renamed SegmentInfo & SegmentInfos segString method to toString. These are advanced APIs and subject to change suddenly. --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (revision 1027999) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (working copy) @@ -23,7 +23,6 @@ import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; -import java.util.Arrays; import java.util.Comparator; import org.apache.lucene.analysis.Analyzer; @@ -36,6 +35,7 @@ import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermVectorOffsetInfo; +import org.apache.lucene.util.ArrayUtil; /** * Hides implementation issues associated with obtaining a TokenStream for use @@ -230,13 +230,9 @@ if (unsortedTokens != null) { tokensInOriginalOrder = unsortedTokens.toArray(new Token[unsortedTokens .size()]); - Arrays.sort(tokensInOriginalOrder, new Comparator() { + ArrayUtil.quickSort(tokensInOriginalOrder, new Comparator() { public int compare(Token t1, Token t2) { - if (t1.startOffset() > t2.endOffset()) - return 1; - if (t1.startOffset() < t2.startOffset()) - return -1; - return 0; + return t1.startOffset() - t2.endOffset(); } }); } --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (revision 1027999) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; @@ -30,6 +29,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermVectorOffsetInfo; +import org.apache.lucene.util.CollectionUtil; public final class TokenStreamFromTermPositionVector extends TokenStream { @@ -74,18 +74,7 @@ this.positionedTokens.add(token); } } - final Comparator tokenComparator = new Comparator() { - public int compare(final Token o1, final Token o2) { - if (o1.getPositionIncrement() < o2.getPositionIncrement()) { - return -1; - } - if (o1.getPositionIncrement() > o2.getPositionIncrement()) { - return 1; - } - return 0; - } - }; - Collections.sort(this.positionedTokens, tokenComparator); + CollectionUtil.mergeSort(this.positionedTokens, tokenComparator); int lastPosition = -1; for (final Token token : this.positionedTokens) { int thisPosition = token.getPositionIncrement(); @@ -95,6 +84,12 @@ this.tokensAtCurrentPosition = this.positionedTokens.iterator(); } + private static final Comparator tokenComparator = new Comparator() { + public int compare(final Token o1, final Token o2) { + return o1.getPositionIncrement() - o2.getPositionIncrement(); + } + }; + @Override public boolean incrementToken() throws IOException { if (this.tokensAtCurrentPosition.hasNext()) { --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (revision 1027999) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (working copy) @@ -44,6 +44,8 @@ import org.apache.lucene.index.TermVectorOffsetInfo; import org.apache.lucene.search.Similarity; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.BitVector; @@ -345,11 +347,7 @@ for (Map.Entry> eField_TermDocInfos : termDocumentInformationsByField.entrySet()) { - Collections.sort(eField_TermDocInfos.getValue(), new Comparator() { - public int compare(InstantiatedTermDocumentInformation instantiatedTermDocumentInformation, InstantiatedTermDocumentInformation instantiatedTermDocumentInformation1) { - return instantiatedTermDocumentInformation.getTerm().getTerm().compareTo(instantiatedTermDocumentInformation1.getTerm().getTerm()); - } - }); + CollectionUtil.quickSort(eField_TermDocInfos.getValue(), tdComp); // add term vector if (documentFieldSettingsByFieldName.get(eField_TermDocInfos.getKey()).storeTermVector) { @@ -366,7 +364,7 @@ // order document informations in dirty terms for (InstantiatedTerm term : dirtyTerms) { // todo optimize, i believe this is useless, that the natural order is document number? - Arrays.sort(term.getAssociatedDocuments(), InstantiatedTermDocumentInformation.documentNumberComparator); + ArrayUtil.mergeSort(term.getAssociatedDocuments(), InstantiatedTermDocumentInformation.documentNumberComparator); // // update association class reference for speedy skipTo() // for (int i = 0; i < term.getAssociatedDocuments().length; i++) { @@ -426,6 +424,12 @@ } + private static final Comparator tdComp = new Comparator() { + public int compare(InstantiatedTermDocumentInformation instantiatedTermDocumentInformation, InstantiatedTermDocumentInformation instantiatedTermDocumentInformation1) { + return instantiatedTermDocumentInformation.getTerm().getTerm().compareTo(instantiatedTermDocumentInformation1.getTerm().getTerm()); + } + }; + /** * Adds a document to this index. If the document contains more than * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are --- lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 1027999) +++ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) @@ -51,6 +51,7 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.RAMDirectory; // for javadocs +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Constants; // for javadocs /** @@ -514,7 +515,7 @@ entries[i] = iter.next(); } - if (size > 1) Arrays.sort(entries, termComparator); + if (size > 1) ArrayUtil.quickSort(entries, termComparator); return entries; } --- lucene/NOTICE.txt (revision 1027999) +++ lucene/NOTICE.txt (working copy) @@ -58,4 +58,9 @@ Some files (contrib/analyzers/common/src/test/.../WordBreakTestUnicode_*.java and data files under contrib/icu/src/data/) are derived from Unicode data such as the Unicode Character Database. See http://unicode.org/copyright.html for more -details. \ No newline at end of file +details. + +The class org.apache.lucene.SorterTemplate was inspired by CGLIB's class with +the same name. The implementation part is mainly done using pre-existing +Lucene sorting code. In-place stable mergesort was borrowed from CGLIB, +which is Apache-licensed. --- lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (working copy) @@ -19,12 +19,12 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.ThreadInterruptedException; +import org.apache.lucene.util.CollectionUtil; import java.io.IOException; import java.util.List; import java.util.ArrayList; import java.util.Comparator; -import java.util.Collections; /** A {@link MergeScheduler} that runs each merge using a * separate thread. @@ -138,7 +138,7 @@ } // Larger merges come first - protected static class CompareByMergeDocCount implements Comparator { + protected static final Comparator compareByMergeDocCount = new Comparator() { public int compare(MergeThread t1, MergeThread t2) { final MergePolicy.OneMerge m1 = t1.getCurrentMerge(); final MergePolicy.OneMerge m2 = t2.getCurrentMerge(); @@ -148,13 +148,13 @@ return c2 - c1; } - } + }; /** Called whenever the running merges have changed, to * pause & unpause threads. */ protected synchronized void updateMergeThreads() { - Collections.sort(mergeThreads, new CompareByMergeDocCount()); + CollectionUtil.mergeSort(mergeThreads, compareByMergeDocCount); final int count = mergeThreads.size(); int pri = mergeThreadPriority; --- lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import java.util.Comparator; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -241,7 +242,7 @@ // sort the subset of fields that have vectors // enabled; we could save [small amount of] CPU // here. - quickSort(fields, 0, fieldCount-1); + ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp); for(int i=0;i= hi) - return; - else if (hi == 1+lo) { - if (array[lo].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { - final DocFieldProcessorPerField tmp = array[lo]; - array[lo] = array[hi]; - array[hi] = tmp; - } - return; + + private static final Comparator fieldsComp = new Comparator() { + public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) { + return o1.fieldInfo.name.compareTo(o2.fieldInfo.name); } + }; - int mid = (lo + hi) >>> 1; - - if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { - DocFieldProcessorPerField tmp = array[lo]; - array[lo] = array[mid]; - array[mid] = tmp; - } - - if (array[mid].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { - DocFieldProcessorPerField tmp = array[mid]; - array[mid] = array[hi]; - array[hi] = tmp; - - if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { - DocFieldProcessorPerField tmp2 = array[lo]; - array[lo] = array[mid]; - array[mid] = tmp2; - } - } - - int left = lo + 1; - int right = hi - 1; - - if (left >= right) - return; - - DocFieldProcessorPerField partition = array[mid]; - - for (; ;) { - while (array[right].fieldInfo.name.compareTo(partition.fieldInfo.name) > 0) - --right; - - while (left < right && array[left].fieldInfo.name.compareTo(partition.fieldInfo.name) <= 0) - ++left; - - if (left < right) { - DocFieldProcessorPerField tmp = array[left]; - array[left] = array[right]; - array[right] = tmp; - --right; - } else { - break; - } - } - - quickSort(array, lo, left); - quickSort(array, left + 1, hi); - } - PerDoc[] docFreeList = new PerDoc[1]; int freeCount; int allocCount; --- lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (working copy) @@ -21,10 +21,10 @@ import java.io.IOException; import java.util.Collection; -import java.util.Collections; import java.util.Map; import java.util.ArrayList; import java.util.List; +import org.apache.lucene.util.CollectionUtil; final class FreqProxTermsWriter extends TermsHashConsumer { @@ -80,7 +80,7 @@ } // Sort by field name - Collections.sort(allFields); + CollectionUtil.quickSort(allFields); final int numAllFields = allFields.size(); // TODO: allow Lucene user to customize this consumer: --- lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -31,6 +31,7 @@ import java.util.Collection; import org.apache.lucene.store.NoSuchDirectoryException; +import org.apache.lucene.util.CollectionUtil; /* * This class keeps track of each SegmentInfos instance that @@ -231,7 +232,7 @@ } // We keep commits list in sorted order (oldest to newest): - Collections.sort(commits); + CollectionUtil.mergeSort(commits); // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.*; +import org.apache.lucene.util.ArrayUtil; import java.io.File; import java.io.FileOutputStream; @@ -1096,7 +1097,7 @@ cfr = new CompoundFileReader(dir, filename); String [] files = cfr.listAll(); - Arrays.sort(files); // sort the array of filename so that the output is more readable + ArrayUtil.quickSort(files); // sort the array of filename so that the output is more readable for (int i = 0; i < files.length; ++i) { long len = cfr.fileLength(files[i]); --- lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.document.Fieldable; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.SorterTemplate; final class TermsHashPerField extends InvertedDocConsumerPerField { @@ -147,7 +148,7 @@ ints[upto+stream]); } - private synchronized void compactPostings() { + private void compactPostings() { int upto = 0; for(int i=0;i= hi) - return; - else if (hi == 1+lo) { - if (comparePostings(termIDs[lo], termIDs[hi]) > 0) { - final int tmp = termIDs[lo]; - termIDs[lo] = termIDs[hi]; - termIDs[hi] = tmp; + final int[] postingsHash = this.postingsHash; + new SorterTemplate() { + @Override + protected void swap(int i, int j) { + final int o = postingsHash[i]; + postingsHash[i] = postingsHash[j]; + postingsHash[j] = o; } - return; - } + + @Override + protected int compare(int i, int j) { + final int term1 = postingsHash[i], term2 = postingsHash[j]; + if (term1 == term2) + return 0; + final int textStart1 = postingsArray.textStarts[term1], + textStart2 = postingsArray.textStarts[term2]; + final char[] text1 = charPool.buffers[textStart1 >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + final int pos1 = textStart1 & DocumentsWriter.CHAR_BLOCK_MASK; + final char[] text2 = charPool.buffers[textStart2 >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + final int pos2 = textStart2 & DocumentsWriter.CHAR_BLOCK_MASK; + return comparePostings(text1, pos1, text2, pos2); + } - int mid = (lo + hi) >>> 1; - - if (comparePostings(termIDs[lo], termIDs[mid]) > 0) { - int tmp = termIDs[lo]; - termIDs[lo] = termIDs[mid]; - termIDs[mid] = tmp; - } - - if (comparePostings(termIDs[mid], termIDs[hi]) > 0) { - int tmp = termIDs[mid]; - termIDs[mid] = termIDs[hi]; - termIDs[hi] = tmp; - - if (comparePostings(termIDs[lo], termIDs[mid]) > 0) { - int tmp2 = termIDs[lo]; - termIDs[lo] = termIDs[mid]; - termIDs[mid] = tmp2; + @Override + protected void setPivot(int i) { + pivotTerm = postingsHash[i]; + final int textStart = postingsArray.textStarts[pivotTerm]; + pivotBuf = charPool.buffers[textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + pivotBufPos = textStart & DocumentsWriter.CHAR_BLOCK_MASK; } - } + + @Override + protected int comparePivot(int j) { + final int term = postingsHash[j]; + if (pivotTerm == term) + return 0; + final int textStart = postingsArray.textStarts[term]; + final char[] text = charPool.buffers[textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + final int pos = textStart & DocumentsWriter.CHAR_BLOCK_MASK; + return comparePostings(pivotBuf, pivotBufPos, text, pos); + } + + private int pivotTerm, pivotBufPos; + private char[] pivotBuf; - int left = lo + 1; - int right = hi - 1; + /** Compares term text for two Posting instance and + * returns -1 if p1 < p2; 1 if p1 > p2; else 0. */ + private int comparePostings(final char[] text1, int pos1, final char[] text2, int pos2) { + assert text1 != text2 || pos1 != pos2; - if (left >= right) - return; - - int partition = termIDs[mid]; - - for (; ;) { - while (comparePostings(termIDs[right], partition) > 0) - --right; - - while (left < right && comparePostings(termIDs[left], partition) <= 0) - ++left; - - if (left < right) { - int tmp = termIDs[left]; - termIDs[left] = termIDs[right]; - termIDs[right] = tmp; - --right; - } else { - break; + while(true) { + final char c1 = text1[pos1++]; + final char c2 = text2[pos2++]; + if (c1 != c2) { + if (0xffff == c2) + return 1; + else if (0xffff == c1) + return -1; + else + return c1-c2; + } else + // This method should never compare equal postings + // unless p1==p2 + assert c1 != 0xffff; + } } - } - - quickSort(termIDs, lo, left); - quickSort(termIDs, left + 1, hi); + }.quickSort(0, numPostings-1); + return postingsHash; } - /** Compares term text for two Posting instance and - * returns -1 if p1 < p2; 1 if p1 > p2; else 0. */ - int comparePostings(int term1, int term2) { - - if (term1 == term2) - return 0; - - final int textStart1 = postingsArray.textStarts[term1]; - final int textStart2 = postingsArray.textStarts[term2]; - - final char[] text1 = charPool.buffers[textStart1 >> DocumentsWriter.CHAR_BLOCK_SHIFT]; - int pos1 = textStart1 & DocumentsWriter.CHAR_BLOCK_MASK; - final char[] text2 = charPool.buffers[textStart2 >> DocumentsWriter.CHAR_BLOCK_SHIFT]; - int pos2 = textStart2 & DocumentsWriter.CHAR_BLOCK_MASK; - - assert text1 != text2 || pos1 != pos2; - - while(true) { - final char c1 = text1[pos1++]; - final char c2 = text2[pos2++]; - if (c1 != c2) { - if (0xffff == c2) - return 1; - else if (0xffff == c1) - return -1; - else - return c1-c2; - } else - // This method should never compare equal postings - // unless p1==p2 - assert c1 != 0xffff; - } - } - /** Test whether the text for current RawPostingList p equals * current tokenText. */ private boolean postingEquals(final int termID, final char[] tokenText, final int tokenTextLen) { --- lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java (working copy) @@ -17,9 +17,9 @@ * limitations under the License. */ +import org.apache.lucene.util.ArrayUtil; import java.io.IOException; import java.util.Collection; -import java.util.Arrays; import java.util.Comparator; /** Scorer for conjunctions, sets of queries, all of which are required. */ @@ -51,8 +51,10 @@ // We don't need to sort the array in any future calls because we know // it will already start off sorted (all scorers on same doc). - // note that this comparator is not consistent with equals! - Arrays.sort(scorers, new Comparator() { // sort the array + // Note that this comparator is not consistent with equals! + // Also we use mergeSort here to be stable (so order of Scoreres that + // match on first document keeps preserved): + ArrayUtil.mergeSort(scorers, new Comparator() { // sort the array public int compare(Scorer o1, Scorer o2) { return o1.docID() - o2.docID(); } --- lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.MultipleTermPositions; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermPositions; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ToStringUtils; /** @@ -197,7 +198,7 @@ // sort by increasing docFreq order if (slop == 0) { - Arrays.sort(postingsFreqs); + ArrayUtil.quickSort(postingsFreqs); } if (slop == 0) { --- lucene/src/java/org/apache/lucene/search/PhraseQuery.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/search/PhraseQuery.java (working copy) @@ -20,13 +20,13 @@ import java.io.IOException; import java.util.Set; import java.util.ArrayList; -import java.util.Arrays; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermPositions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Explanation.IDFExplanation; import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.util.ArrayUtil; /** A Query that matches documents containing a particular sequence of terms. * A PhraseQuery is built by QueryParser for input like "new york". @@ -188,7 +188,7 @@ // sort by increasing docFreq order if (slop == 0) { - Arrays.sort(postingsFreqs); + ArrayUtil.quickSort(postingsFreqs); } if (slop == 0) { // optimize exact case --- lucene/src/java/org/apache/lucene/search/QueryTermVector.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/search/QueryTermVector.java (working copy) @@ -30,6 +30,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.TermFreqVector; +import org.apache.lucene.util.ArrayUtil; /** * @@ -77,7 +78,7 @@ private void processTerms(String[] queryTerms) { if (queryTerms != null) { - Arrays.sort(queryTerms); + ArrayUtil.quickSort(queryTerms); Map tmpSet = new HashMap(queryTerms.length); //filter out duplicates List tmpList = new ArrayList(queryTerms.length); --- lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (working copy) @@ -18,10 +18,10 @@ */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.ArrayUtil; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Comparator; import java.util.HashSet; import java.util.LinkedList; @@ -190,7 +190,7 @@ /** Advance the subSpans to the same document */ private boolean toSameDoc() throws IOException { - Arrays.sort(subSpansByDoc, spanDocComparator); + ArrayUtil.quickSort(subSpansByDoc, spanDocComparator); int firstIndex = 0; int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc(); while (subSpansByDoc[firstIndex].doc() != maxDoc) { --- lucene/src/java/org/apache/lucene/util/ArrayUtil.java (revision 1027999) +++ lucene/src/java/org/apache/lucene/util/ArrayUtil.java (working copy) @@ -17,6 +17,9 @@ * limitations under the License. */ +import java.util.Collection; +import java.util.Comparator; + /** * Methods for manipulating arrays. * @@ -345,4 +348,177 @@ code = code * 31 + array[i]; return code; } -} + + /** SorterTemplate with custom {@link Comparator} */ + private static SorterTemplate getSorter(final T[] a, final Comparator comp) { + return new SorterTemplate() { + @Override + protected void swap(int i, int j) { + final T o = a[i]; + a[i] = a[j]; + a[j] = o; + } + + @Override + protected int compare(int i, int j) { + return comp.compare(a[i], a[j]); + } + + @Override + protected void setPivot(int i) { + pivot = a[i]; + } + + @Override + protected int comparePivot(int j) { + return comp.compare(pivot, a[j]); + } + + private T pivot; + }; + } + + /** Natural SorterTemplate */ + private static > SorterTemplate getSorter(final T[] a) { + return new SorterTemplate() { + @Override + protected void swap(int i, int j) { + final T o = a[i]; + a[i] = a[j]; + a[j] = o; + } + + @Override + protected int compare(int i, int j) { + return a[i].compareTo(a[j]); + } + + @Override + protected void setPivot(int i) { + pivot = a[i]; + } + + @Override + protected int comparePivot(int j) { + return pivot.compareTo(a[j]); + } + + private T pivot; + }; + } + + // quickSorts (endindex is exclusive!): + + /** + * Sorts the given array slice using the {@link Comparator}. This method uses the quick sort + * algorithm, but falls back to insertion sort for small arrays. + * @param fromIndex start index (inclusive) + * @param toIndex end index (exclusive) + */ + public static void quickSort(T[] a, int fromIndex, int toIndex, Comparator comp) { + getSorter(a, comp).quickSort(fromIndex, toIndex-1); + } + + /** + * Sorts the given array using the {@link Comparator}. This method uses the quick sort + * algorithm, but falls back to insertion sort for small arrays. + */ + public static void quickSort(T[] a, Comparator comp) { + quickSort(a, 0, a.length, comp); + } + + /** + * Sorts the given array slice in natural order. This method uses the quick sort + * algorithm, but falls back to insertion sort for small arrays. + * @param fromIndex start index (inclusive) + * @param toIndex end index (exclusive) + */ + public static > void quickSort(T[] a, int fromIndex, int toIndex) { + getSorter(a).quickSort(fromIndex, toIndex-1); + } + + /** + * Sorts the given array in natural order. This method uses the quick sort + * algorithm, but falls back to insertion sort for small arrays. + */ + public static > void quickSort(T[] a) { + quickSort(a, 0, a.length); + } + + // mergeSorts: + + /** + * Sorts the given array slice using the {@link Comparator}. This method uses the merge sort + * algorithm, but falls back to insertion sort for small arrays. + * @param fromIndex start index (inclusive) + * @param toIndex end index (exclusive) + */ + public static void mergeSort(T[] a, int fromIndex, int toIndex, Comparator comp) { + getSorter(a, comp).mergeSort(fromIndex, toIndex-1); + } + + /** + * Sorts the given array using the {@link Comparator}. This method uses the merge sort + * algorithm, but falls back to insertion sort for small arrays. + */ + public static void mergeSort(T[] a, Comparator comp) { + mergeSort(a, 0, a.length, comp); + } + + /** + * Sorts the given array slice in natural order. This method uses the merge sort + * algorithm, but falls back to insertion sort for small arrays. + * @param fromIndex start index (inclusive) + * @param toIndex end index (exclusive) + */ + public static > void mergeSort(T[] a, int fromIndex, int toIndex) { + getSorter(a).mergeSort(fromIndex, toIndex-1); + } + + /** + * Sorts the given array in natural order. This method uses the merge sort + * algorithm, but falls back to insertion sort for small arrays. + */ + public static > void mergeSort(T[] a) { + mergeSort(a, 0, a.length); + } + + // insertionSorts: + + /** + * Sorts the given array slice using the {@link Comparator}. This method uses the insertion sort + * algorithm. It is only recommened to use this algorithm for partially sorted small arrays! + * @param fromIndex start index (inclusive) + * @param toIndex end index (exclusive) + */ + public static void insertionSort(T[] a, int fromIndex, int toIndex, Comparator comp) { + getSorter(a, comp).insertionSort(fromIndex, toIndex-1); + } + + /** + * Sorts the given array using the {@link Comparator}. This method uses the insertion sort + * algorithm. It is only recommened to use this algorithm for partially sorted small arrays! + */ + public static void insertionSort(T[] a, Comparator comp) { + insertionSort(a, 0, a.length, comp); + } + + /** + * Sorts the given array slice in natural order. This method uses the insertion sort + * algorithm. It is only recommened to use this algorithm for partially sorted small arrays! + * @param fromIndex start index (inclusive) + * @param toIndex end index (exclusive) + */ + public static > void insertionSort(T[] a, int fromIndex, int toIndex) { + getSorter(a).insertionSort(fromIndex, toIndex-1); + } + + /** + * Sorts the given array in natural order. This method uses the insertion sort + * algorithm. It is only recommened to use this algorithm for partially sorted small arrays! + */ + public static > void insertionSort(T[] a) { + insertionSort(a, 0, a.length); + } + +} \ No newline at end of file --- lucene/src/test/org/apache/lucene/util/TestArrayUtil.java (revision 1027999) +++ lucene/src/test/org/apache/lucene/util/TestArrayUtil.java (working copy) @@ -17,6 +17,9 @@ * limitations under the License. */ +import java.util.Arrays; +import java.util.Collections; + public class TestArrayUtil extends LuceneTestCase { // Ensure ArrayUtil.getNextSize gives linear amortized cost of realloc/copy @@ -100,4 +103,127 @@ } + + private Integer[] createRandomArray(int maxSize) { + final Integer[] a = new Integer[random.nextInt(maxSize) + 1]; + for (int i = 0; i < a.length; i++) { + a[i] = Integer.valueOf(random.nextInt(a.length)); + } + return a; + } + + public void testQuickSort() { + for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) { + Integer[] a1 = createRandomArray(1000), a2 = a1.clone(); + ArrayUtil.quickSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + + a1 = createRandomArray(1000); + a2 = a1.clone(); + ArrayUtil.quickSort(a1, Collections.reverseOrder()); + Arrays.sort(a2, Collections.reverseOrder()); + assertArrayEquals(a2, a1); + // reverse back, so we can test that completely backwards sorted array (worst case) is working: + ArrayUtil.quickSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + } + } + + public void testMergeSort() { + for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) { + Integer[] a1 = createRandomArray(1000), a2 = a1.clone(); + ArrayUtil.mergeSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + + a1 = createRandomArray(1000); + a2 = a1.clone(); + ArrayUtil.mergeSort(a1, Collections.reverseOrder()); + Arrays.sort(a2, Collections.reverseOrder()); + assertArrayEquals(a2, a1); + // reverse back, so we can test that completely backwards sorted array (worst case) is working: + ArrayUtil.mergeSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + } + } + + public void testInsertionSort() { + for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) { + Integer[] a1 = createRandomArray(30), a2 = a1.clone(); + ArrayUtil.insertionSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + + a1 = createRandomArray(30); + a2 = a1.clone(); + ArrayUtil.insertionSort(a1, Collections.reverseOrder()); + Arrays.sort(a2, Collections.reverseOrder()); + assertArrayEquals(a2, a1); + // reverse back, so we can test that completely backwards sorted array (worst case) is working: + ArrayUtil.insertionSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + } + } + + static class Item implements Comparable { + final int val, order; + + Item(int val, int order) { + this.val = val; + this.order = order; + } + + public int compareTo(Item other) { + return this.order - other.order; + } + + @Override + public String toString() { + return Integer.toString(val); + } + } + + public void testMergeSortStability() { + Item[] items = new Item[100]; + for (int i = 0; i < items.length; i++) { + // half of the items have value but same order. The value of this items is sorted, + // so they should always be in order after sorting. + // The other half has defined order, but no (-1) value (they should appear after + // all above, when sorted). + final boolean equal = random.nextBoolean(); + items[i] = new Item(equal ? (i+1) : -1, equal ? 0 : (random.nextInt(1000)+1)); + } + + if (VERBOSE) System.out.println("Before: " + Arrays.toString(items)); + // if you replace this with ArrayUtil.quickSort(), test should fail: + ArrayUtil.mergeSort(items); + if (VERBOSE) System.out.println("Sorted: " + Arrays.toString(items)); + + Item last = items[0]; + for (int i = 1; i < items.length; i++) { + final Item act = items[i]; + if (act.order == 0) { + // order of "equal" items should be not mixed up + assertTrue(act.val > last.val); + } + assertTrue(act.order >= last.order); + last = act; + } + } + + // should produce no exceptions + public void testEmptyArraySort() { + Integer[] a = new Integer[0]; + ArrayUtil.quickSort(a); + ArrayUtil.mergeSort(a); + ArrayUtil.insertionSort(a); + ArrayUtil.quickSort(a, Collections.reverseOrder()); + ArrayUtil.mergeSort(a, Collections.reverseOrder()); + ArrayUtil.insertionSort(a, Collections.reverseOrder()); + } + }