Index: lucene/misc/src/java/org/apache/lucene/index/sorter/Doc.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/Doc.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/Doc.java (working copy) @@ -0,0 +1,56 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.Arrays; + +/** + * This class represents a document that is to be sorted in the index. + * + * @param + * A comparable type according to which the document should be sorted. + * + * @lucene.experimental + */ +public class Doc> implements Comparable> { + private final T comparable; + private final int id; + + /** Returns a permutation on the list of documents from their id's to + * their order when sorted according to T */ + public static int[] old2new(final Doc[] docs) { + Arrays.sort(docs); + final int[] oldToNew = new int[docs.length]; + for (int i = 0; i < docs.length; i++) { + oldToNew[docs[i].id] = i; + } + return oldToNew; + } + + public Doc(final T comparable, final int docId) { + this.comparable = comparable; + this.id = docId; + } + + @Override + public int compareTo(final Doc doc) { + final int compareTo = comparable.compareTo(doc.comparable); + if (compareTo != 0) return compareTo; + return id - doc.id; + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/Doc.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/DocumentSorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/DocumentSorter.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/DocumentSorter.java (working copy) @@ -0,0 +1,48 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.StoredDocument; + +/** + * A {@link Sorter} which sorts documents according to their stored fields. + * + * @lucene.experimental + */ +public class DocumentSorter> implements Sorter { + + private final To to; + + public DocumentSorter(final To to) { + this.to = to; + } + + @Override + public int[] oldToNew(final AtomicReader reader) throws IOException { + final int maxDoc = reader.maxDoc(); + final Doc[] docs = new Doc[maxDoc]; + for (int i = 0; i < maxDoc; i++) { + docs[i] = new Doc(to.from(reader.document(i)), i); + } + return Doc.old2new(docs); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/DocumentSorter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java (working copy) @@ -0,0 +1,50 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.NumericDocValues; + +/** + * A {@link Sorter} which sorts documents according to their DocValues. + * + * @lucene.experimental + */ +public class NumericDocValuesSorter implements Sorter { + + private final String fieldName; + + public NumericDocValuesSorter(final String fieldName) { + this.fieldName = fieldName; + } + + @Override + public int[] oldToNew(final AtomicReader reader) throws IOException { + NumericDocValues ndv = reader.getNumericDocValues(fieldName); + final int maxDoc = reader.maxDoc(); + final Doc[] docs = new Doc[maxDoc]; + for (int i = 0; i < maxDoc; i++) { + long val = ndv.get(i); + docs[i] = new Doc(new Long(val), i); + } + return Doc.old2new(docs); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/PayloadSorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/PayloadSorter.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/PayloadSorter.java (working copy) @@ -0,0 +1,56 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BytesRef; + +/** + * A {@link Sorter} which sorts a document according to it's payload. + * + * @lucene.experimental + */ +public class PayloadSorter> implements Sorter { + + private final To to; + private final Term term; + + public PayloadSorter(final BytesRef term, final String field, final To to) { + this.to = to; + this.term = new Term(field, term); + } + + @Override + public int[] oldToNew(final AtomicReader reader) throws IOException { + final DocsAndPositionsEnum it = reader.termPositionsEnum(term); + final int maxDoc = reader.maxDoc(); + final Doc[] docs = new Doc[maxDoc]; + int i = 0; + while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + it.nextPosition(); + docs[i] = new Doc(to.from(it.getPayload()), i); + i++; + } + return Doc.old2new(docs); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/PayloadSorter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/ReverseDocIdSorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/ReverseDocIdSorter.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/ReverseDocIdSorter.java (working copy) @@ -0,0 +1,40 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; + +/** + * A {@link Sorter} which sorts document in a reverse order to their doc id's + * + * @lucene.experimental + */ +public class ReverseDocIdSorter implements Sorter { + + @Override + public int[] oldToNew(final AtomicReader reader) throws IOException { + final int maxDoc = reader.maxDoc(); + int[] reverseDocs = new int[maxDoc]; + for (int i = 0; i < maxDoc; i++) { + reverseDocs[i] = maxDoc - (i + 1); + } + return reverseDocs; + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/ReverseDocIdSorter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java (working copy) @@ -0,0 +1,35 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.store.Directory; + +/** + * An interface for sorting documents in a {@link Directory}. + * + * @lucene.experimental + */ +public interface Sorter { + + /** Returns a list of document id's of the document in the received directory, + * sorted according to their new order. */ + int[] oldToNew(AtomicReader reader) throws IOException; + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SorterUtil.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SorterUtil.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SorterUtil.java (working copy) @@ -0,0 +1,54 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; + +/*** + * A utility class for index sorting. + * + * @lucene.experimental + */ +public class SorterUtil { + + /** + * @param in A directory of documents to be sorted + * @param out The directory to which the sorted documents will be added + * @param sorter An object that knows how to sort the documents. + */ + public static void sort(Directory in, Directory out, Sorter sorter) throws IOException { + IndexWriter writer = null; + DirectoryReader reader = null; + SortingAtomicReader sortingReader = null; + try { + writer = new IndexWriter(out, new IndexWriterConfig(Version.LUCENE_50, null)); + reader = DirectoryReader.open(in); + sortingReader = new SortingAtomicReader(reader, sorter); + writer.addIndexes(sortingReader); + } finally { + IOUtils.close(writer, reader, sortingReader); + } + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SorterUtil.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java (working copy) @@ -0,0 +1,129 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.util.Bits; + +/** + * An {@link AtomicReader} which supports sorting documents by a given + * {@link Sorter}. + *

+ * NOTE: currently this reader does not support input {@link IndexReader + * readers} with deleted documents. + * + * @lucene.experimental + */ +public class SortingAtomicReader extends AtomicReader { + + private final AtomicReader srcReader; + private final int[] old2new; + + protected SortingAtomicReader(final IndexReader indexReader, final Sorter sorter) throws IOException { + if (indexReader.hasDeletions()) { + throw new UnsupportedOperationException("sorting an index which has deletions is unsupported yet"); + } + srcReader = SlowCompositeReaderWrapper.wrap(indexReader); + old2new = sorter.oldToNew(srcReader); + } + + @Override + public void document(final int docID, final StoredFieldVisitor visitor) throws IOException { + srcReader.document(old2new[docID], visitor); + } + + @Override + public BinaryDocValues getBinaryDocValues(String field) throws IOException { + throw new UnsupportedOperationException("not supported by this reader"); + } + + @Override + public NumericDocValues getNumericDocValues(String field) throws IOException { + final NumericDocValues oldDocValues = srcReader.getNumericDocValues(field); + if (oldDocValues == null) return null; + return new SortingNumericDocValues(oldDocValues, old2new); + } + + @Override + public Fields fields() throws IOException { + return new SortingFields(srcReader.fields(), old2new); + } + + @Override + public FieldInfos getFieldInfos() { + return srcReader.getFieldInfos(); + } + + @Override + public Bits getLiveDocs() { + return null; // default - no deleted docs + } + + @Override + public Fields getTermVectors(final int docID) throws IOException { + return srcReader.getTermVectors(old2new[docID]); + } + + @Override + public boolean hasDeletions() { + return false; + } + + @Override + public int maxDoc() { + return srcReader.maxDoc(); + } + + @Override + public NumericDocValues getNormValues(String field) throws IOException { + final NumericDocValues oldDocValues = srcReader.getNormValues(field); + if (oldDocValues == null) return null; + return new SortingNumericDocValues(oldDocValues, old2new); + } + + @Override + public int numDocs() { + return srcReader.numDocs(); + } + + @Override + protected void doClose() throws IOException { + srcReader.close(); + } + + @Override + public SortedDocValues getSortedDocValues(String field) throws IOException { + throw new UnsupportedOperationException("not supported by this reader"); + } + + @Override + public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { + throw new UnsupportedOperationException("not supported by this reader"); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnum.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnum.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnum.java (working copy) @@ -0,0 +1,193 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.SorterTemplate; + +/** + * Allow {@link DocsAndPositionsEnum} to be read according to old2new permutation. + * + * @lucene.experimental + */ +public class SortingDocsAndPositionsEnum extends DocsAndPositionsEnum { + + /** + * A {@link SorterTemplate} which sorts two parallel arrays of doc IDs and + * offsets in one go. Everytime a doc ID is 'swapped', its correponding offset + * is swapped too. + */ + private static final class DocOffsetSorterTemplate extends SorterTemplate { + + private final int[] docs; + private final long[] offsets; + + private int pivot; + + public DocOffsetSorterTemplate(int[] docs, long[] offsets) { + this.docs = docs; + this.offsets = offsets; + } + + @Override + protected int compare(int i, int j) { + return docs[i] - docs[j]; + } + + @Override + protected int comparePivot(int j) { + return pivot - docs[j]; + } + + @Override + protected void setPivot(int i) { + pivot = docs[i]; + } + + @Override + protected void swap(int i, int j) { + int tmpDoc = docs[i]; + docs[i] = docs[j]; + docs[j] = tmpDoc; + + long tmpOffset = offsets[i]; + offsets[i] = offsets[j]; + offsets[j] = tmpOffset; + } + } + + private int[] docs; + private long[] offsets; + private final int upto; + + private static final String TEMP_FILE = "temp"; + private final RAMDirectory tempDir = new RAMDirectory(); + private final IndexOutput out; + private final IndexInput in; + + private int docIt = -1; + private int pos; + private int startOffset; + private int endOffset; + private final BytesRef payload = new BytesRef(32); + private int currFreq; + + public SortingDocsAndPositionsEnum(final DocsAndPositionsEnum oldDocsAndPositions, final int[] old2new) throws IOException { + int i = 0; + out = tempDir.createOutput(TEMP_FILE, null); + docs = new int[64]; + offsets = new long[64]; + while (oldDocsAndPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + if (i == docs.length) { + docs = ArrayUtil.grow(docs, i + 1); + offsets = ArrayUtil.grow(offsets, i + 1); + } + docs[i] = old2new[oldDocsAndPositions.docID()]; + offsets[i] = out.getFilePointer(); + addPositions(oldDocsAndPositions); + i++; + } + upto = i; + SorterTemplate sorter = new DocOffsetSorterTemplate(docs, offsets); + sorter.quickSort(0, upto - 1); + + out.flush(); + in = tempDir.openInput(TEMP_FILE, IOContext.READ); + } + + private void addPositions(final DocsAndPositionsEnum oldDocsAndPositions) throws IOException { + out.writeVInt(oldDocsAndPositions.freq()); + for (int i = 0; i < oldDocsAndPositions.freq(); i++) { + final int oldPos = oldDocsAndPositions.nextPosition(); + + out.writeVInt(oldPos); + out.writeVInt(oldDocsAndPositions.startOffset()); + out.writeVInt(oldDocsAndPositions.endOffset()); + + BytesRef payload = oldDocsAndPositions.getPayload(); + if (payload != null) { + out.writeVInt(payload.length); + out.writeBytes(payload.bytes, payload.offset, payload.length); + } else { + out.writeVInt(0); + } + } + } + + @Override + public int advance(final int target) throws IOException { + throw new UnsupportedOperationException("advance is not supported"); + } + + @Override + public int docID() { + throw new UnsupportedOperationException("docID is not supported"); + } + + @Override + public int endOffset() throws IOException { + return endOffset; + } + + @Override + public int freq() throws IOException { + return currFreq; + } + + @Override + public BytesRef getPayload() throws IOException { + return payload; + } + + @Override + public int nextDoc() throws IOException { + if (++docIt >= upto) return DocIdSetIterator.NO_MORE_DOCS; + in.seek(offsets[docIt]); + currFreq = in.readVInt(); + return docs[docIt]; + } + + @Override + public int nextPosition() throws IOException { + pos = in.readVInt(); + startOffset = in.readVInt(); + endOffset = in.readVInt(); + int length = in.readVInt(); + if (length >= payload.bytes.length) { + payload.grow(length + 1); + } + in.readBytes(payload.bytes, 0, length); + payload.length = length; + return pos; + } + + @Override + public int startOffset() throws IOException { + return startOffset; + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnum.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsEnum.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsEnum.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsEnum.java (working copy) @@ -0,0 +1,82 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.ArrayUtil; + +/** + * Allow {@link DocsEnum} to be read according to old2new permutation. + * + * @lucene.experimental + */ +public class SortingDocsEnum extends DocsEnum { + + private static final int NEW_DOC_ID_INIT_LENGTH = 0; + private static final int NEW_DOC_ID_GROW_LENGTH = 1; + + private int[] newDocIds = new int[NEW_DOC_ID_INIT_LENGTH]; + private int docIt = -1; + private final int upto; + + public SortingDocsEnum(final DocsEnum docs, final int[] old2new) throws IOException { + int i = 0; + while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS){ + if ( i >= newDocIds.length) { + newDocIds = ArrayUtil.grow(newDocIds, newDocIds.length + NEW_DOC_ID_GROW_LENGTH); + } + newDocIds[i++] = old2new[docs.docID()]; + } + upto = i; + Arrays.sort(this.newDocIds, 0, upto); + } + + /*** + * This operation is not supported. + */ + @Override + public int advance(final int target) throws IOException { + throw new UnsupportedOperationException("advance is not supported"); + } + + /*** + * This operation is not supported. + */ + @Override + public int docID() { + throw new UnsupportedOperationException("docID is not supported"); + } + + /*** + * This operation is not supported. + */ + @Override + public int freq() throws IOException { + throw new UnsupportedOperationException("freq is not supported"); + } + + @Override + public int nextDoc() throws IOException { + if (++docIt >= upto) return NO_MORE_DOCS; + return newDocIds[docIt]; + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsEnum.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingFields.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingFields.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingFields.java (working copy) @@ -0,0 +1,55 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Iterator; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.sorter.SortingTerms; + +/** + * + * Allow {@link Fields} to be read according to old2new permutation. + * @lucene.experimental + */ +public class SortingFields extends Fields { + + private final Fields oldFields; + private final int[] old2new; + + public SortingFields(final Fields oldFields, final int[] old2new) { + this.oldFields = oldFields; + this.old2new = old2new; + } + + @Override + public Iterator iterator() { + return oldFields.iterator(); + } + + @Override + public int size() { + return oldFields.size(); + } + + @Override + public Terms terms(final String field) throws IOException { + return new SortingTerms(oldFields.terms(field), old2new); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingFields.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingNumericDocValues.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingNumericDocValues.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingNumericDocValues.java (working copy) @@ -0,0 +1,42 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.NumericDocValues; + +/** + * Allows a {@link NumericDocValues} to be read according to old2new permutation. + * + * @lucene.experimental + */ +public class SortingNumericDocValues extends NumericDocValues { + + private final NumericDocValues oldDocValues; + private final int[] old2new; + + public SortingNumericDocValues(final NumericDocValues oldDocValues, final int[] old2new) { + this.oldDocValues = oldDocValues; + this.old2new = old2new; + } + + @Override + public long get(int docID) { + return oldDocValues.get(old2new[docID]); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingNumericDocValues.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingTermEnum.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingTermEnum.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingTermEnum.java (working copy) @@ -0,0 +1,98 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +/** + * + * Allow {@link TermsEnum} to be read according to old2new permutation. + * @lucene.experimental + */ +public class SortingTermEnum extends TermsEnum { + + private final TermsEnum oldIterator; + private final int[] old2new; + + public SortingTermEnum(final TermsEnum oldIterator, final int[] old2new) { + this.oldIterator = oldIterator; + this.old2new = old2new; + } + + @Override + public int docFreq() throws IOException { + return oldIterator.docFreq(); + } + + @Override + public DocsEnum docs(final Bits liveDocs, final DocsEnum reuse, final int flags) throws IOException { + return new SortingDocsEnum(oldIterator.docs(liveDocs, reuse, flags), old2new); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(final Bits liveDocs, final DocsAndPositionsEnum reuse, final int flags) throws IOException { + return new SortingDocsAndPositionsEnum(oldIterator.docsAndPositions(liveDocs, reuse, flags), old2new); + } + + @Override + public Comparator getComparator() { + return oldIterator.getComparator(); + } + + @Override + public BytesRef next() throws IOException { + return oldIterator.next(); + } + + /*** + * This operation is not supported. + */ + @Override + public long ord() throws IOException { + throw new UnsupportedOperationException("ord is not supported"); + } + + @Override + public SeekStatus seekCeil(final BytesRef text, final boolean useCache) throws IOException { + return oldIterator.seekCeil(text, useCache); + } + + /*** + * This operation is not supported. + */ + @Override + public void seekExact(final long ord) throws IOException { + throw new UnsupportedOperationException("seekExact is not supported"); + } + + @Override + public BytesRef term() throws IOException { + return oldIterator.term(); + } + + @Override + public long totalTermFreq() throws IOException { + return oldIterator.totalTermFreq(); + } +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingTermEnum.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingTerms.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingTerms.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingTerms.java (working copy) @@ -0,0 +1,84 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Comparator; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; + +/** + * + * Allow {@link Terms} to be read according to old2new permutation. + * @lucene.experimental + */ +public class SortingTerms extends Terms { + + private final Terms oldTerms; + private final int[] old2new; + + public SortingTerms(final Terms oldTerms, final int[] old2new) { + this.oldTerms = oldTerms; + this.old2new = old2new; + } + + @Override + public Comparator getComparator() { + return oldTerms.getComparator(); + } + + @Override + public int getDocCount() throws IOException { + return oldTerms.getDocCount(); + } + + @Override + public long getSumDocFreq() throws IOException { + return oldTerms.getSumDocFreq(); + } + + @Override + public long getSumTotalTermFreq() throws IOException { + return oldTerms.getSumTotalTermFreq(); + } + + @Override + public boolean hasOffsets() { + return oldTerms.hasOffsets(); + } + + @Override + public boolean hasPayloads() { + return oldTerms.hasPayloads(); + } + + @Override + public boolean hasPositions() { + return oldTerms.hasPositions(); + } + + @Override + public TermsEnum iterator(final TermsEnum reuse) throws IOException { + return new SortingTermEnum(oldTerms.iterator(reuse), old2new); + } + + @Override + public long size() throws IOException { + return oldTerms.size(); + } +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingTerms.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/To.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/To.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/To.java (working copy) @@ -0,0 +1,35 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Interface used by index {@link Sorter} + * + * @param + * Object to be sorted. + * @param + * Field according to which the sorting will be done. + * + * @lucene.experimental + */ +public interface To { + + /** Retrieves from S the field according to which the sorting will be done. */ + public T from(S s); + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/To.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/package.html =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/package.html (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/package.html (working copy) @@ -0,0 +1,91 @@ + + + + + + + +Code to sort indices. +

Table Of Contents

+

+

    +
  1. Sorters API
  2. +
  3. Sorting classes
  4. +
+

+ +

Sorters API

+

+All sorters implement the {@link org.apache.lucene.index.sorter.Sorter} interface. +Each sorter implementation sorts the documents in a directory according to the values of a document component such as +StoredFields, DocValues, Payload etc`. The relevant component is evident from the name of the sorter class. +Most sorter classes are generic and take in a parameter T which defines the type of the component +according to which the sorting should be done. This type must be comparable. +For example we can have an integer DocValues sorter, a double DocValues sorters and so on. +In order to use a generic sorter the user must supply the sorter with a {@link org.apache.lucene.index.sorter.To} object. +that can extract an object of a comparable type T according to which the sorting will be done. + +For example, sorting a directory according to an integer DocValues field will be done as follow: +

+  int[] docValuesSort(Directory dir) throws IOException{
+    final DocValuesSorter<Integer> docValSorter = new DocValuesSorter<Integer>("someField", new To<BytesRef, Integer>() {
+
+      @Override
+      public Integer from(final BytesRef bytes) {
+        return DocValuesArraySource.asInt(bytes);
+      }
+    });
+    return docValSorter.oldToNew(dir);    
+  }
+
+Currently the following sorters are implemented: +
    +
  1. {@link org.apache.lucene.index.sorter.DocumentSorter}
  2. +
  3. {@link org.apache.lucene.index.sorter.DocValuesSorter}
  4. +
  5. {@link org.apache.lucene.index.sorter.PayloadSorter}
  6. +
  7. {@link org.apache.lucene.index.sorter.ReverseDocIdSorter}
  8. +
+In addition, once a sorter is defined, the utility class {@link org.apache.lucene.index.sorter.SorterUtil} supplies +a convenient interface to read documents from an input directory, sort them and add them to an output +directory according to their new order. +

+ +

Sorting classes

+

+A sorting class is a class that extends a previously existing index class +and allows the index to be accessed according to a permutation on its original order. +Each sorting class received in its constructor an instance of its superclass +and a permutation +that defines the new order of the index. A sorting class exposes the same interface as its superclass +although not all of the superclasses functionality is currently supported. +
+The following sorting classes are available: +

    +
  1. {@link org.apache.lucene.index.sorter.SortingDocsAndPositionsEnum}
  2. +
  3. {@link org.apache.lucene.index.sorter.SortingDocsEnum}
  4. +
  5. {@link org.apache.lucene.index.sorter.SortingDocValues}
  6. +
  7. {@link org.apache.lucene.index.sorter.SortingFields}
  8. +
  9. {@link org.apache.lucene.index.sorter.SortingIndexReader}
  10. +
  11. {@link org.apache.lucene.index.sorter.SortingSource}
  12. +
  13. {@link org.apache.lucene.index.sorter.SortingTermEnum}
  14. +
  15. {@link org.apache.lucene.index.sorter.SortingTerms}
  16. +
+

+ + + Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/package.html ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/DocValuesSorterTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/DocValuesSorterTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/DocValuesSorterTest.java (working copy) @@ -0,0 +1,69 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class DocValuesSorterTest extends LuceneTestCase { + + private static final String DV_FIELD = "dvf"; + private RAMDirectory dir; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + dir = new RAMDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random()))); + writer.addDocument(doc(3)); + writer.addDocument(doc(2)); + writer.addDocument(doc(1)); + writer.close(); + } + + @Test + public void test() throws IOException { + final NumericDocValuesSorter docValSorter = new NumericDocValuesSorter(DV_FIELD); + DirectoryReader r = DirectoryReader.open(dir); + try { + assertArrayEquals(new int[] { 2, 1, 0 }, docValSorter.oldToNew(SlowCompositeReaderWrapper.wrap(r))); + } finally { + r.close(); + } + } + + private Document doc(final int val) { + final Document doc = new Document(); + doc.add(new NumericDocValuesField(DV_FIELD, val)); + return doc; + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/DocValuesSorterTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/DocumentSorterTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/DocumentSorterTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/DocumentSorterTest.java (working copy) @@ -0,0 +1,78 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.StoredDocument; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class DocumentSorterTest extends LuceneTestCase { + + private static final String VAL = "val"; + private RAMDirectory dir; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + dir = new RAMDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random()))); + writer.addDocument(doc(1)); + writer.addDocument(doc(3)); + writer.addDocument(doc(2)); + writer.addDocument(doc(1)); + writer.close(); + } + + @Test + public void test() throws IOException { + final DocumentSorter documentSorter = new DocumentSorter(new To() { + + @Override + public Integer from(final StoredDocument doc) { + return doc.getField(VAL).numericValue().intValue(); + } + }); + + DirectoryReader r = DirectoryReader.open(dir); + try { + assertArrayEquals(new int[] { 0, 3, 2, 1 }, documentSorter.oldToNew(SlowCompositeReaderWrapper.wrap(r))); + } finally { + r.close(); + } + } + + private Document doc(final int val) { + final Document doc = new Document(); + doc.add(new IntField(VAL, val, Store.YES)); + return doc; + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/DocumentSorterTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/PayloadSorterTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/PayloadSorterTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/PayloadSorterTest.java (working copy) @@ -0,0 +1,119 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class PayloadSorterTest extends LuceneTestCase{ + + private static final String PAYLOAD_FIELD = "payloadField"; + private static final String TERM = "term"; + private RAMDirectory dir; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + dir = new RAMDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random()))); + writer.addDocument(doc(3)); + writer.addDocument(doc(2)); + writer.addDocument(doc(1)); + writer.close(); + } + + @Test + public void test() throws IOException { + Term term = new Term(PAYLOAD_FIELD, TERM); + final PayloadSorter payloadSorter = new PayloadSorter(term.bytes(), term.field(), new To() { + + @Override + public Integer from(final BytesRef bytes) { + return ((bytes.bytes[bytes.offset] & 0xFF) << 24) + | ((bytes.bytes[bytes.offset + 1] & 0xFF) << 16) + | ((bytes.bytes[bytes.offset + 2] & 0xFF) << 8) + | (bytes.bytes[bytes.offset + 3] & 0xFF); + } + }); + + DirectoryReader r = DirectoryReader.open(dir); + try { + assertArrayEquals(new int[] { 2, 1, 0 }, payloadSorter.oldToNew(SlowCompositeReaderWrapper.wrap(r))); + } finally { + r.close(); + } + } + + private void addPayload(final Document doc, final Integer id) { + final FieldType fieldType = new FieldType(); + fieldType.setIndexed(true); + fieldType.freeze(); + doc.add(new Field(PAYLOAD_FIELD, new TokenStream() { + int positionsCount = 2; + private final CharTermAttribute attCharTerm = addAttribute(CharTermAttribute.class); + private final PayloadAttribute attPayload = addAttribute(PayloadAttribute.class); + private final OffsetAttribute attOffset = addAttribute(OffsetAttribute.class); + + @Override + public boolean incrementToken() throws IOException { + if (positionsCount-- == 0) return false; + attCharTerm.setEmpty().append(TERM); + byte[] bytes = new byte[4]; + bytes[0] = (byte)(id >> 24); + bytes[1] = (byte)(id >> 16); + bytes[2] = (byte)(id >> 8); + bytes[3] = (byte) id.intValue(); + attPayload.setPayload(new BytesRef(bytes)); + attOffset.setOffset(id, id); + return true; + } + + }, fieldType)); + } + + private Document doc(final Integer val) { + final Document doc = new Document(); + doc.add(new StringField("stringFields", val.toString(), Store.YES)); + doc.add(new IntField("intField", val, Store.YES)); + addPayload(doc, val); + return doc; + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/PayloadSorterTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/ReverseDocIdSorterTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/ReverseDocIdSorterTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/ReverseDocIdSorterTest.java (working copy) @@ -0,0 +1,69 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class ReverseDocIdSorterTest extends LuceneTestCase { + + private static final String VAL = "val"; + private RAMDirectory dir; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + dir = new RAMDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random()))); + writer.addDocument(doc(3)); + writer.addDocument(doc(2)); + writer.addDocument(doc(1)); + writer.close(); + } + + @Test + public void test() throws IOException { + final ReverseDocIdSorter documentSorter = new ReverseDocIdSorter(); + DirectoryReader r = DirectoryReader.open(dir); + try { + assertArrayEquals(new int[] { 2, 1, 0 }, documentSorter.oldToNew(SlowCompositeReaderWrapper.wrap(r))); + } finally { + r.close(); + } + } + + private Document doc(final int val) { + final Document doc = new Document(); + doc.add(new IntField(VAL, val, Store.YES)); + return doc; + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/ReverseDocIdSorterTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestUtils.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestUtils.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestUtils.java (working copy) @@ -0,0 +1,36 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import static org.junit.Assert.*; + +public class SorterTestUtils { + + public static void compareTerms(Terms unsortedTerms, Terms sortingTerms) throws IOException { + assertEquals(unsortedTerms.size(), sortingTerms.size()); + TermsEnum oldTermsIt = unsortedTerms.iterator(null); + TermsEnum sortingTermsIt = sortingTerms.iterator(null); + for (int i = 0; i < unsortedTerms.size(); i++) { + assertEquals(sortingTermsIt.next(), oldTermsIt.next()); + } + assertNull(sortingTermsIt.next()); + assertNull(oldTermsIt.next()); + } +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestUtils.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SorterUtilTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SorterUtilTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SorterUtilTest.java (working copy) @@ -0,0 +1,76 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.StoredDocument; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class SorterUtilTest extends LuceneTestCase{ + + private static final String VAL = "val"; + private RAMDirectory in; + private RAMDirectory out; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + in = new RAMDirectory(); + final IndexWriter writer = new IndexWriter(in, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random()))); + writer.addDocument(doc(1)); + writer.addDocument(doc(3)); + writer.addDocument(doc(2)); + writer.close(); + out = new RAMDirectory(); + } + + @Test + public void test() throws IOException { + final DocumentSorter documentSorter = new DocumentSorter(new To() { + @Override + public Integer from(final StoredDocument doc) { + return doc.getField(VAL).numericValue().intValue(); + } + }); + SorterUtil.sort(in, out, documentSorter); + final int[] expected = { 1, 2, 3 }; + final DirectoryReader outReader = DirectoryReader.open(out); + for (int i = 0; i < outReader.maxDoc(); i++) { + assertEquals(expected[i], outReader.document(i).getField(VAL).numericValue().intValue()); + } + } + + private Document doc(final int val) { + final Document doc = new Document(); + doc.add(new IntField(VAL, val, Store.YES)); + return doc; + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/SorterUtilTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnumTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnumTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnumTest.java (working copy) @@ -0,0 +1,235 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class SortingDocsAndPositionsEnumTest extends LuceneTestCase { + + private DirectoryReader dirReader; + + static final IndexWriterConfig INDEX_WRITER_CONFIG = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random())); + private final int[] oldToNew = { 2, 1, 0 }; + + private static final String PAYLOAD_TEST_CONTENT = "payload"; + + private SortingDocsAndPositionsEnum sortingEnum; + private DocsAndPositionsEnum unSortedEnum; + + private void addPayload(final Document doc, final Integer id) { + final FieldType fieldType = new FieldType(); + fieldType.setIndexed(true); + fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + fieldType.freeze(); + + doc.add(new Field(PAYLOAD_TEST_CONTENT, new TokenStream() { + int positionsCount = id + 1; + private final CharTermAttribute attCharTerm = addAttribute(CharTermAttribute.class); + private final PayloadAttribute attPayload = addAttribute(PayloadAttribute.class); + private final OffsetAttribute attOffset = addAttribute(OffsetAttribute.class); + + @Override + public boolean incrementToken() throws IOException { + if (positionsCount-- == 0) return false; + + attCharTerm.setEmpty().append(PAYLOAD_TEST_CONTENT); + attPayload.setPayload(new BytesRef(id.toString())); + attOffset.setOffset(id, id); + return true; + } + + }, fieldType)); + } + + private Document doc(final Integer id) { + final Document doc = new Document(); + addPayload(doc, id); + return doc; + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + Directory dir = new RAMDirectory(); + final IndexWriter indexWriter = new IndexWriter(dir, INDEX_WRITER_CONFIG); + + indexWriter.addDocument(doc(0)); + indexWriter.addDocument(doc(1)); + indexWriter.addDocument(doc(2)); + indexWriter.close(); + + dirReader = DirectoryReader.open(dir); + unSortedEnum = MultiFields.getTermPositionsEnum(dirReader, + null, PAYLOAD_TEST_CONTENT, new BytesRef(PAYLOAD_TEST_CONTENT)); + + DocsAndPositionsEnum oldEnum = MultiFields + .getTermPositionsEnum(dirReader, null, PAYLOAD_TEST_CONTENT, new BytesRef(PAYLOAD_TEST_CONTENT)); + sortingEnum = new SortingDocsAndPositionsEnum(oldEnum, oldToNew); + } + + @Test + public void testNextDoc() throws Exception { + assertEquals(0, sortingEnum.nextDoc()); + assertEquals(1, sortingEnum.nextDoc()); + assertEquals(2, sortingEnum.nextDoc()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, sortingEnum.nextDoc()); + } + + @Test + public void testFreq() throws Exception { + assertEquals(sortingEnum.nextDoc(), 0); + assertEquals(sortingEnum.freq(), 3); + assertEquals(sortingEnum.nextDoc(), 1); + assertEquals(sortingEnum.freq(), 2); + assertEquals(sortingEnum.nextDoc(), 2); + assertEquals(sortingEnum.freq(), 1); + } + + @Test + public void testNextPosition() throws Exception { + int[][] unsortedPositions = new int[3][]; + int[][] sortedPositions = new int[3][]; + for ( int i=0; i < 3; i++) { + unSortedEnum.nextDoc(); + unsortedPositions[i] = new int[unSortedEnum.freq()]; + for (int j = 0; j < unsortedPositions[i].length; j++) { + unsortedPositions[i][j] = unSortedEnum.nextPosition(); + } + + sortingEnum.nextDoc(); + sortedPositions[i] = new int[sortingEnum.freq()]; + for (int j = 0; j < sortedPositions[i].length; j++) { + sortedPositions[i][j] = sortingEnum.nextPosition(); + } + } + assertEquals(unSortedEnum.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); + assertEquals(sortingEnum.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); + + for (int i=0; i < sortedPositions.length; i++) { + assertArrayEquals(sortedPositions[i], unsortedPositions[2-i]); + } + } + + @Test + public void testStartOffset() throws Exception { + int[][] unsortedOffsets = new int[3][]; + int[][] sortedOffsets = new int[3][]; + + for ( int i=0; i < 3; i++) { + unSortedEnum.nextDoc(); + unsortedOffsets[i] = new int[unSortedEnum.freq()]; + for (int j = 0; j < unsortedOffsets[i].length; j++) { + unSortedEnum.nextPosition(); + unsortedOffsets[i][j] = unSortedEnum.startOffset(); + } + + sortingEnum.nextDoc(); + sortedOffsets[i] = new int[sortingEnum.freq()]; + for (int j = 0; j < sortedOffsets[i].length; j++) { + sortingEnum.nextPosition(); + sortedOffsets[i][j] = sortingEnum.startOffset(); + } + } + assertEquals(unSortedEnum.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); + assertEquals(sortingEnum.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); + + for (int i=0; i < sortedOffsets.length; i++) { + assertArrayEquals(sortedOffsets[i], unsortedOffsets[2-i]); + } + } + + @Test + public void testEndOffset() throws Exception { + int[][] unsortedOffsets = new int[3][]; + int[][] sortedOffsets = new int[3][]; + + for ( int i=0; i < 3; i++) { + unSortedEnum.nextDoc(); + unsortedOffsets[i] = new int[unSortedEnum.freq()]; + for (int j = 0; j < unsortedOffsets[i].length; j++) { + unSortedEnum.nextPosition(); + unsortedOffsets[i][j] = unSortedEnum.endOffset(); + } + + sortingEnum.nextDoc(); + sortedOffsets[i] = new int[sortingEnum.freq()]; + for (int j = 0; j < sortedOffsets[i].length; j++) { + sortingEnum.nextPosition(); + sortedOffsets[i][j] = sortingEnum.endOffset(); + } + } + assertEquals(unSortedEnum.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); + assertEquals(sortingEnum.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); + + for (int i=0; i < sortedOffsets.length; i++) { + assertArrayEquals(sortedOffsets[i], unsortedOffsets[2-i]); + } + } + + @Test + public void testGetPayload() throws Exception { + BytesRef[][] unsortedPayloads = new BytesRef[3][]; + BytesRef[][] sortedPayloads = new BytesRef[3][]; + + for ( int i=0; i < 3; i++) { + unSortedEnum.nextDoc(); + unsortedPayloads[i] = new BytesRef[unSortedEnum.freq()]; + for (int j = 0; j < unsortedPayloads[i].length; j++) { + unSortedEnum.nextPosition(); + unsortedPayloads[i][j] = BytesRef.deepCopyOf(unSortedEnum.getPayload()); + } + + sortingEnum.nextDoc(); + sortedPayloads[i] = new BytesRef[sortingEnum.freq()]; + for (int j = 0; j < sortedPayloads[i].length; j++) { + sortingEnum.nextPosition(); + sortedPayloads[i][j] = BytesRef.deepCopyOf(sortingEnum.getPayload()); + } + } + assertEquals(unSortedEnum.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); + assertEquals(sortingEnum.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); + + for (int i=0; i < sortedPayloads.length; i++) { + assertArrayEquals(sortedPayloads[i], unsortedPayloads[2-i]); + } + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnumTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingDocsEnumTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SortingDocsEnumTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SortingDocsEnumTest.java (working copy) @@ -0,0 +1,118 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class SortingDocsEnumTest extends LuceneTestCase{ + + private SortingDocsEnum sortingDocsEnum; + + static final IndexWriterConfig INDEX_WRITER_CONFIG = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random())); + private final int[] oldToNew = { 2, 1, 0 }; + + private static final String PAYLOAD_TEST_CONTENT = "payload"; + private static final String VAL = "Val"; + + private void addPayload(final Document doc, final Integer id) { + final FieldType fieldType = new FieldType(); + fieldType.setIndexed(true); + fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + fieldType.freeze(); + + doc.add(new Field(PAYLOAD_TEST_CONTENT, new TokenStream() { + int positionsCount = id+1; + private final CharTermAttribute attCharTerm = addAttribute(CharTermAttribute.class); + private final PayloadAttribute attPayload = addAttribute(PayloadAttribute.class); + private final OffsetAttribute attOffset = addAttribute(OffsetAttribute.class); + + @Override + public boolean incrementToken() throws IOException { + if (positionsCount-- == 0) return false; + + attCharTerm.setEmpty().append(PAYLOAD_TEST_CONTENT); + attPayload.setPayload(new BytesRef(id.toString())); + attOffset.setOffset(id, id); + return true; + } + + }, fieldType)); + } + + private Document doc(final Integer id) { + final Document doc = new Document(); + + doc.add(new IntField(VAL, id, Store.YES)); + addPayload(doc, id); + return doc; + } + +@Override +@Before + public void setUp() throws Exception { + super.setUp(); + Directory dir = new RAMDirectory(); + final IndexWriter indexWriter = new IndexWriter(dir, INDEX_WRITER_CONFIG); + + indexWriter.addDocument(doc(0)); + indexWriter.addDocument(doc(1)); + indexWriter.addDocument(doc(2)); + indexWriter.close(); + + final DirectoryReader dirReader = DirectoryReader.open(dir); + DocsEnum docsEnum = MultiFields.getTermDocsEnum(dirReader, null, PAYLOAD_TEST_CONTENT, new BytesRef(PAYLOAD_TEST_CONTENT)); + sortingDocsEnum = new SortingDocsEnum(docsEnum, oldToNew); + } + + + @Test + public void testNextDoc() { + try { + assertEquals(0, sortingDocsEnum.nextDoc()); + assertEquals(1, sortingDocsEnum.nextDoc()); + assertEquals(2, sortingDocsEnum.nextDoc()); + } catch (IOException e) { + fail("Exception"); + } + + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingDocsEnumTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingFieldsTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SortingFieldsTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SortingFieldsTest.java (working copy) @@ -0,0 +1,95 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.Iterator; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class SortingFieldsTest extends LuceneTestCase{ + + AtomicReader atomicReader; + + static final IndexWriterConfig INDEX_WRITER_CONFIG = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random())); + private final int[] oldToNew = { 2, 1, 0 }; + + private static final String VAL = "Val"; + private SortingFields sortingFields; + + private Fields unsortedFields; + + private Document doc(final Integer id) { + final Document doc = new Document(); + doc.add(new IntField(VAL, id, Store.YES)); + return doc; + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + Directory dir = new RAMDirectory(); + final IndexWriter indexWriter = new IndexWriter(dir, INDEX_WRITER_CONFIG); + + indexWriter.addDocument(doc(0)); + indexWriter.addDocument(doc(1)); + indexWriter.addDocument(doc(2)); + indexWriter.close(); + + final DirectoryReader dirReader = DirectoryReader.open(dir); + atomicReader = new SlowCompositeReaderWrapper(dirReader); + unsortedFields = atomicReader.fields(); + sortingFields = new SortingFields(unsortedFields, oldToNew); + } + + @Test + public void testSize() { + assertEquals(unsortedFields.size(), sortingFields.size()); + } + + @Test + public void testIterator() { + Iterator i1 = unsortedFields.iterator(); + Iterator i2 = sortingFields.iterator(); + while (i1.hasNext()) { + assertEquals(i1.next(), i2.next()); + } + assertFalse(i1.hasNext()); + assertFalse(i2.hasNext()); + } + + @Test + public void testTermsString() throws Exception { + SorterTestUtils.compareTerms(unsortedFields.terms(VAL), sortingFields.terms(VAL)); + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingFieldsTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingIndexReaderTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SortingIndexReaderTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SortingIndexReaderTest.java (working copy) @@ -0,0 +1,217 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Iterator; +import java.util.Map.Entry; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DocumentStoredFieldVisitor; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.FieldType.NumericType; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.CompositeReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.similarities.DefaultSimilarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class SortingIndexReaderTest extends LuceneTestCase { + + private static final FieldType FIELD_TYPE = new FieldType(); + static { + FIELD_TYPE.setIndexed(true); + FIELD_TYPE.setStored(true); + FIELD_TYPE.setStoreTermVectors(true); + FIELD_TYPE.setOmitNorms(false); + FIELD_TYPE.setNumericType(NumericType.INT); + FIELD_TYPE.freeze(); + } + + private AtomicReader atomicReader; + + private static final String VAL = "val"; + private static final String DV_FIELD = "docVal"; + + private SortingAtomicReader sortingIndexReader; + private CompositeReader unsortedIndexReader; + private CompositeReader oldIndexReader; + + private Document doc(final Integer id) { + final Document doc = new Document(); + doc.add(new NumericDocValuesField(DV_FIELD, id)); + // for the norm values test. We want each doc to have a different norm value + // therefore we add a different number of terms to each doc. + for (int i = 0; i <= id; i++) { + doc.add(new IntField(VAL, id, FIELD_TYPE)); + } + return doc; + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + Directory dir = new RAMDirectory(); + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random())).setSimilarity(new DefaultSimilarity()); + final IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random()))); + + indexWriter.addDocument(doc(0)); + indexWriter.addDocument(doc(1)); + indexWriter.addDocument(doc(2)); + indexWriter.close(); + + final DirectoryReader dirReader = DirectoryReader.open(dir); + atomicReader = new SlowCompositeReaderWrapper(dirReader); + unsortedIndexReader = DirectoryReader.open(dir); + oldIndexReader = DirectoryReader.open(dir); + sortingIndexReader = new SortingAtomicReader(oldIndexReader, new Sorter() { + + @Override + public int[] oldToNew(AtomicReader reader) throws IOException { + return new int[] { 2, 1, 0 }; + } + }); + } + + @Test + public void testNumDocs() { + assertEquals(unsortedIndexReader.numDocs(), sortingIndexReader.numDocs()); + } + + @Test + public void testMaxDoc() { + assertEquals(unsortedIndexReader.maxDoc(), sortingIndexReader.maxDoc()); + } + + @Test + public void testDocumentIntStoredFieldVisitor() throws Exception { + DocumentStoredFieldVisitor sortedVisitor = new DocumentStoredFieldVisitor(); + DocumentStoredFieldVisitor oldVisitor = new DocumentStoredFieldVisitor(); + unsortedIndexReader.document(0, oldVisitor); + sortingIndexReader.document(2, sortedVisitor); + assertEquals(oldVisitor.getDocument().toString(), sortedVisitor.getDocument().toString()); + + unsortedIndexReader.document(1, oldVisitor); + sortingIndexReader.document(1, sortedVisitor); + assertEquals(oldVisitor.getDocument().toString(), sortedVisitor.getDocument().toString()); + + unsortedIndexReader.document(2, oldVisitor); + sortingIndexReader.document(0, sortedVisitor); + assertEquals(oldVisitor.getDocument().toString(), sortedVisitor.getDocument().toString()); + } + + @Test + public void testNumericDocValues() throws Exception { + assertNull(sortingIndexReader.getNumericDocValues("")); + final NumericDocValues docValues = sortingIndexReader.getNumericDocValues(DV_FIELD); + assertEquals(2, docValues.get(0)); + assertEquals(1, docValues.get(1)); + assertEquals(0, docValues.get(2)); + } + + @Test + public void testFields() throws Exception { + Fields sortedFields = sortingIndexReader.fields(); + Terms sortedTerms = sortedFields.terms(VAL); + Terms oldTerms = atomicReader.fields().terms(VAL); + TermsEnum sortedTermsIt = sortedTerms.iterator(null); + TermsEnum oldTermsIt = oldTerms.iterator(null); + assertEquals(oldTerms.size(), sortedTerms.size()); + for (int i = 0; i < oldTerms.size(); i++) { + assertEquals(oldTermsIt.next(), sortedTermsIt.next()); + } + } + + @Test + public void testGetLiveDocs() { + assertNull(sortingIndexReader.getLiveDocs()); + } + + @Test + public void testGetFieldInfos() { + FieldInfos oldReaderInfo = atomicReader.getFieldInfos(); + FieldInfos sortingReaderInfo = sortingIndexReader.getFieldInfos(); + Iterator oldInfoIt = oldReaderInfo.iterator(); + Iterator sortingInfoIt = sortingReaderInfo.iterator(); + while (sortingInfoIt.hasNext()) { + FieldInfo oldF = oldInfoIt.next(); + FieldInfo sortedF = sortingInfoIt.next(); + assertEquals(oldF.name, sortedF.name); + assertEquals(oldF.number, sortedF.number); + assertEquals(oldF.hasDocValues(), sortedF.hasDocValues()); + assertEquals(oldF.hasNorms(), sortedF.hasNorms()); + assertEquals(oldF.hasPayloads(), sortedF.hasPayloads()); + assertEquals(oldF.hasVectors(), sortedF.hasVectors()); + assertEquals(oldF.isIndexed(), sortedF.isIndexed()); + assertEquals(oldF.omitsNorms(), sortedF.omitsNorms()); + assertEquals(oldF.getDocValuesType(), sortedF.getDocValuesType()); + assertEquals(oldF.getIndexOptions(), sortedF.getIndexOptions()); + assertEquals(oldF.getNormType(), sortedF.getNormType()); + if (sortedF.attributes() == null) { + assertNull(oldF.attributes()); + } else { + Iterator> sortedMap = sortedF.attributes().entrySet().iterator(); + Iterator> oldMap = oldF.attributes().entrySet().iterator(); + + while (sortedMap.hasNext()) { + Entry sortedEntry = sortedMap.next(); + Entry oldEntry = oldMap.next(); + assertEquals(sortedEntry.getKey(), oldEntry.getKey()); + assertEquals(sortedEntry.getValue(), oldEntry.getValue()); + } + assertFalse(oldMap.hasNext()); + } + } + assertFalse(oldInfoIt.hasNext()); + } + + @Test + public void testGetTermVectorsInt() throws Exception { + SorterTestUtils.compareTerms(unsortedIndexReader.getTermVector(2, VAL), sortingIndexReader.getTermVector(0, VAL)); + SorterTestUtils.compareTerms(unsortedIndexReader.getTermVector(1, VAL), sortingIndexReader.getTermVector(1, VAL)); + SorterTestUtils.compareTerms(unsortedIndexReader.getTermVector(0, VAL), sortingIndexReader.getTermVector(2, VAL)); + } + + @Test + public void testNormValuesString() throws Exception { + assertNull(sortingIndexReader.getNormValues("")); + final NumericDocValues sortedNormValues = sortingIndexReader.getNormValues(VAL); + final NumericDocValues oldNormValues = atomicReader.getNormValues(VAL); + assertEquals(oldNormValues.get(2), sortedNormValues.get(0)); + assertEquals(oldNormValues.get(1), sortedNormValues.get(1)); + assertEquals(oldNormValues.get(0), sortedNormValues.get(2)); + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingIndexReaderTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingNumericDocValuesTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SortingNumericDocValuesTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SortingNumericDocValuesTest.java (working copy) @@ -0,0 +1,78 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class SortingNumericDocValuesTest extends LuceneTestCase{ + + AtomicReader atomicReader; + + static final IndexWriterConfig INDEX_WRITER_CONFIG = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random())); + private final int[] oldToNew = { 2, 1, 0 }; + + private static final String DV_FIELD = "Val"; + private SortingNumericDocValues sortingDocValues; + private NumericDocValues unsortedDocValues; + + private Document doc(final Integer id) { + final Document doc = new Document(); + doc.add(new NumericDocValuesField(DV_FIELD, id)); + return doc; + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + Directory dir = new RAMDirectory(); + final IndexWriter indexWriter = new IndexWriter(dir, INDEX_WRITER_CONFIG); + + indexWriter.addDocument(doc(0)); + indexWriter.addDocument(doc(1)); + indexWriter.addDocument(doc(2)); + indexWriter.close(); + + final DirectoryReader dirReader = DirectoryReader.open(dir); + atomicReader = new SlowCompositeReaderWrapper(dirReader); + unsortedDocValues = atomicReader.getNumericDocValues(DV_FIELD); + sortingDocValues = new SortingNumericDocValues(unsortedDocValues, oldToNew); + } + + @Test + public void testGetDirectSource() throws Exception{ + assertEquals(unsortedDocValues.get(2), sortingDocValues.get(0)); + assertEquals(unsortedDocValues.get(1), sortingDocValues.get(1)); + assertEquals(unsortedDocValues.get(0), sortingDocValues.get(2)); + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingNumericDocValuesTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingTermEnumTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SortingTermEnumTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SortingTermEnumTest.java (working copy) @@ -0,0 +1,163 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; + +public class SortingTermEnumTest extends LuceneTestCase{ + + static final IndexWriterConfig INDEX_WRITER_CONFIG = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random())); + + private TermsEnum unsortedTermsEnum; + private SortingTermEnum sortingTermEnum; + private final int[] oldToNew = { 2, 1, 0 }; + AtomicReader atomicReader; + + private static final String VAL = "Val"; + + + private Document doc(final Integer id) { + final Document doc = new Document(); + FieldType ft = new FieldType(); + ft.setIndexed(true); + ft.setStored(true); + for ( int i=0; i <= id; i++) { + doc.add(new Field(VAL,Integer.toString(i), ft)); + } + return doc; + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + Directory dir = new RAMDirectory(); + final IndexWriter indexWriter = new IndexWriter(dir, INDEX_WRITER_CONFIG); + + indexWriter.addDocument(doc(0)); + indexWriter.addDocument(doc(1)); + indexWriter.addDocument(doc(2)); + indexWriter.close(); + + final DirectoryReader dirReader = DirectoryReader.open(dir); + atomicReader = new SlowCompositeReaderWrapper(dirReader); + unsortedTermsEnum = atomicReader.terms(VAL).iterator(null); + sortingTermEnum = new SortingTermEnum(atomicReader.terms(VAL).iterator(null), oldToNew); + } + + + @Test + public void testDocFreq() throws IOException { + TermsEnum unsortedEnum = atomicReader.fields().terms(VAL).iterator(null); + for (int i = 0; i < 3 ; i++) { + sortingTermEnum.next(); + unsortedEnum.next(); + assertEquals(unsortedEnum.docFreq(), sortingTermEnum.docFreq()); + } + assertNull(sortingTermEnum.next()); + assertNull(unsortedEnum.next()); + } + + @Test + public void testTotalTermFreq() throws Exception { + TermsEnum unsortedTermEnum = atomicReader.terms(VAL).iterator(null); + unsortedTermEnum.next(); + sortingTermEnum.next(); + assertEquals(unsortedTermEnum.totalTermFreq(), sortingTermEnum.totalTermFreq()); + } + + + @Test + public void testDocs() throws Exception { + BytesRef term = sortingTermEnum.next(); + while (term != null) { + DocsEnum sortedDocs = sortingTermEnum.docs(null, null); + for ( int j = 0; j