Index: src/test/org/apache/lucene/search/TestTermVectors.java =================================================================== --- src/test/org/apache/lucene/search/TestTermVectors.java (revision 619898) +++ src/test/org/apache/lucene/search/TestTermVectors.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.util.English; import java.io.IOException; @@ -34,7 +34,7 @@ public class TestTermVectors extends LuceneTestCase { private IndexSearcher searcher; - private RAMDirectory directory = new RAMDirectory(); + private Directory directory = new MockRAMDirectory(); public TestTermVectors(String s) { super(s); } @@ -91,6 +91,37 @@ } } + public void testTermVectorsFieldOrder() throws IOException { + Directory dir = new MockRAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true); + Document doc = new Document(); + doc.add(new Field("c", "some content here", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("a", "some content here", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("b", "some content here", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("x", "some content here", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.close(); + IndexReader reader = IndexReader.open(dir); + TermFreqVector[] v = reader.getTermFreqVectors(0); + assertEquals(4, v.length); + String[] expectedFields = new String[]{"a", "b", "c", "x"}; + int[] expectedPositions = new int[]{1, 2, 0}; + for(int i=0;i= hi) + return; + + int mid = (lo + hi) >>> 1; + + if (array[lo].compareTo(array[mid]) > 0) { + FieldData tmp = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp; + } + + if (array[mid].compareTo(array[hi]) > 0) { + FieldData tmp = array[mid]; + array[mid] = array[hi]; + array[hi] = tmp; + + if (array[lo].compareTo(array[mid]) > 0) { + FieldData tmp2 = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp2; + } + } + + int left = lo + 1; + int right = hi - 1; + + if (left >= right) + return; + + FieldData partition = array[mid]; + + for (; ;) { + while (array[right].compareTo(partition) > 0) + --right; + + while (left < right && array[left].compareTo(partition) <= 0) + ++left; + + if (left < right) { + FieldData tmp = array[left]; + array[left] = array[right]; + array[right] = tmp; + --right; + } else { + break; + } + } + + quickSort(array, lo, left); + quickSort(array, left + 1, hi); + } + /** If there are fields we've seen but did not see again * in the last run, then free them up. Also reduce * postings hash size. */ @@ -1087,6 +1140,7 @@ throws IOException, AbortException { final int numFields = numFieldData; + assert clearLastVectorFieldName(); assert 0 == fdtLocal.length(); @@ -1097,7 +1151,7 @@ // sort the subset of fields that have vectors // enabled; we could save [small amount of] CPU // here. - Arrays.sort(fieldDataArray, 0, numFields); + quickSort(fieldDataArray, 0, numFields-1); // We process the document one field at a time for(int i=0;i 0.95 * ramBufferSize) + && numBytesUsed >= ramBufferSize) balanceRAM(); } @@ -1284,6 +1338,26 @@ pos[posUpto++] = b; } + String lastVectorFieldName; + + // Called only by assert + final boolean clearLastVectorFieldName() { + lastVectorFieldName = null; + return true; + } + + // Called only by assert + final boolean vectorFieldsInOrder(FieldInfo fi) { + try { + if (lastVectorFieldName != null) + return lastVectorFieldName.compareTo(fi.name) < 0; + else + return true; + } finally { + lastVectorFieldName = fi.name; + } + } + PostingVector[] postingsVectors = new PostingVector[1]; int maxPostingsVectors; @@ -1349,7 +1423,6 @@ postingsHash = new Posting[postingsHashSize]; } - /** So Arrays.sort can sort us. */ public int compareTo(Object o) { return fieldInfo.name.compareTo(((FieldData) o).fieldInfo.name); } @@ -1514,9 +1587,9 @@ /** Only called when term vectors are enabled. This * is called the first time we see a given term for - * each * document, to allocate a PostingVector - * instance that * is used to record data needed to - * write the posting * vectors. */ + * each document, to allocate a PostingVector + * instance that is used to record data needed to + * write the posting vectors. */ private PostingVector addNewVector() { if (postingsVectorsUpto == postingsVectors.length) { @@ -1815,6 +1888,7 @@ void writeVectors(FieldInfo fieldInfo) throws IOException { assert fieldInfo.storeTermVector; + assert vectorFieldsInOrder(fieldInfo); vectorFieldNumbers[numVectorFields] = fieldInfo.number; vectorFieldPointers[numVectorFields] = tvfLocal.getFilePointer(); @@ -2628,7 +2702,7 @@ out.writeByte(b); } - byte[] copyByteBuffer = new byte[4096]; + final byte[] copyByteBuffer = new byte[4096]; /** Copy numBytes from srcIn to destIn */ void copyBytes(IndexInput srcIn, IndexOutput destIn, long numBytes) throws IOException { @@ -3005,7 +3079,7 @@ private int postingsFreeCount; /* Allocate more Postings from shared pool */ - private synchronized void getPostings(Posting[] postings) { + synchronized void getPostings(Posting[] postings) { numBytesUsed += postings.length * POSTING_NUM_BYTE; final int numToCopy; if (postingsFreeCount < postings.length) @@ -3026,7 +3100,7 @@ } } - private synchronized void recyclePostings(Posting[] postings, int numPostings) { + synchronized void recyclePostings(Posting[] postings, int numPostings) { // Move all Postings from this ThreadState back to our // free list if (postingsFreeCount + numPostings > postingsFreeList.length) { @@ -3116,7 +3190,7 @@ * the other two. This method just frees allocations from * the pools once we are over-budget, which balances the * pools to match the current docs. */ - private synchronized void balanceRAM() { + synchronized void balanceRAM() { if (ramBufferSize == IndexWriter.DISABLE_AUTO_FLUSH || bufferIsFull) return;