Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 882637) +++ CHANGES.txt (working copy) @@ -17,6 +17,10 @@ Optimizations +* LUCENE-2086: When resolving deleted terms, do so in term sort order + for better locality for the disk heads (Bogdan Ghidireac via Mike + McCandless) + Build ======================= Release 3.0.0 2009-11-25 ======================= Index: src/java/org/apache/lucene/index/BufferedDeletes.java =================================================================== --- src/java/org/apache/lucene/index/BufferedDeletes.java (revision 882637) +++ src/java/org/apache/lucene/index/BufferedDeletes.java (working copy) @@ -18,6 +18,8 @@ */ import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; @@ -33,11 +35,21 @@ * previously flushed segments. */ class BufferedDeletes { int numTerms; - HashMap terms = new HashMap(); - HashMap queries = new HashMap(); + Map terms; + Map queries = new HashMap(); List docIDs = new ArrayList(); long bytesUsed; + private final boolean doTermSort; + public BufferedDeletes(boolean doTermSort) { + this.doTermSort = doTermSort; + if (doTermSort) { + terms = new TreeMap(); + } else { + terms = new HashMap(); + } + } + // Number of documents a delete term applies to. final static class Num { private int num; @@ -104,11 +116,15 @@ MergePolicy.OneMerge merge, int mergeDocCount) { - final HashMap newDeleteTerms; + final Map newDeleteTerms; // Remap delete-by-term if (terms.size() > 0) { - newDeleteTerms = new HashMap(); + if (doTermSort) { + newDeleteTerms = new TreeMap(); + } else { + newDeleteTerms = new HashMap(); + } for(Entry entry : terms.entrySet()) { Num num = entry.getValue(); newDeleteTerms.put(entry.getKey(), Index: src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 882637) +++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.Map; import java.util.HashSet; import java.util.List; import java.util.Map.Entry; @@ -219,11 +220,11 @@ // Deletes done after the last flush; these are discarded // on abort - private BufferedDeletes deletesInRAM = new BufferedDeletes(); + private BufferedDeletes deletesInRAM = new BufferedDeletes(false); // Deletes done before the last flush; these are still // kept on abort - private BufferedDeletes deletesFlushed = new BufferedDeletes(); + private BufferedDeletes deletesFlushed = new BufferedDeletes(true); // The max number of delete terms that can be buffered before // they must be flushed to disk. @@ -828,7 +829,7 @@ } // for testing - synchronized HashMap getBufferedDeleteTerms() { + synchronized Map getBufferedDeleteTerms() { return deletesInRAM.terms; } @@ -969,7 +970,6 @@ try { for (Entry entry: deletesFlushed.terms.entrySet()) { Term term = entry.getKey(); - docs.seek(term); int limit = entry.getValue().getNum(); while (docs.next()) {