Index: lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java (working copy) @@ -0,0 +1,50 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.NumericDocValues; + +/** + * A {@link Sorter} which sorts documents according to their DocValues. + * + * @lucene.experimental + */ +public class NumericDocValuesSorter implements Sorter { + + private final String fieldName; + + public NumericDocValuesSorter(final String fieldName) { + this.fieldName = fieldName; + } + + @Override + public int[] oldToNew(final AtomicReader reader) throws IOException { + NumericDocValues ndv = reader.getNumericDocValues(fieldName); + final int maxDoc = reader.maxDoc(); + final SortDoc[] docs = new SortDoc[maxDoc]; + for (int i = 0; i < maxDoc; i++) { + long val = ndv.get(i); + docs[i] = new SortDoc(new Long(val), i); + } + return SortDoc.old2new(docs); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/NumericDocValuesSorter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/PayloadSorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/PayloadSorter.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/PayloadSorter.java (working copy) @@ -0,0 +1,60 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BytesRef; + +/** + * A {@link Sorter} which sorts a document according to its payload. + * + * @lucene.experimental + */ +public abstract class PayloadSorter> implements Sorter { + + private final Term term; + + public PayloadSorter(Term term) { + this.term = term; + } + + /** + * Parse the given {@link BytesRef} into a {@link Comparable} by which + * to sort the documents in the index. + */ + protected abstract T parse(BytesRef payload); + + @Override + public int[] oldToNew(final AtomicReader reader) throws IOException { + final DocsAndPositionsEnum it = reader.termPositionsEnum(term); + final int maxDoc = reader.maxDoc(); + final SortDoc[] docs = new SortDoc[maxDoc]; + int i = 0; + while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + it.nextPosition(); + docs[i] = new SortDoc(parse(it.getPayload()), i); + i++; + } + return SortDoc.old2new(docs); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/PayloadSorter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/ReverseDocIdSorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/ReverseDocIdSorter.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/ReverseDocIdSorter.java (working copy) @@ -0,0 +1,40 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; + +/** + * A {@link Sorter} which sorts document in a reverse order to their doc id's + * + * @lucene.experimental + */ +public class ReverseDocIdSorter implements Sorter { + + @Override + public int[] oldToNew(final AtomicReader reader) throws IOException { + final int maxDoc = reader.maxDoc(); + int[] reverseDocs = new int[maxDoc]; + for (int i = 0; i < maxDoc; i++) { + reverseDocs[i] = maxDoc - (i + 1); + } + return reverseDocs; + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/ReverseDocIdSorter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortDoc.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortDoc.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortDoc.java (working copy) @@ -0,0 +1,62 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.Arrays; + +/** + * This class represents a document that is to be sorted in the index. + * + * @lucene.experimental + */ +public class SortDoc> implements Comparable> { + + private final T value; + private final int id; + + /** + * Returns a permutation on the list of documents from their id's to their + * order when sorted according to their value. + */ + public static int[] old2new(final SortDoc[] docs) { + Arrays.sort(docs); + final int[] oldToNew = new int[docs.length]; + for (int i = 0; i < docs.length; i++) { + oldToNew[i] = docs[i].id; + } + return oldToNew; + } + + public SortDoc(final T value, final int docId) { + this.value = value; + this.id = docId; + } + + @Override + public int compareTo(final SortDoc doc) { + final int compareTo = value.compareTo(doc.value); + if (compareTo != 0) return compareTo; + // break ties by doc ID + return id - doc.id; + } + + @Override + public String toString() { + return "id=" + id + ",value=" + value; + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortDoc.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java (working copy) @@ -0,0 +1,35 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.store.Directory; + +/** + * An interface for sorting documents in a {@link Directory}. + * + * @lucene.experimental + */ +public interface Sorter { + + /** Returns a list of document id's of the document in the received directory, + * sorted according to their new order. */ + int[] oldToNew(AtomicReader reader) throws IOException; + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/Sorter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SorterUtil.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SorterUtil.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SorterUtil.java (working copy) @@ -0,0 +1,55 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; + +/*** + * A utility class for index sorting. + * + * @lucene.experimental + */ +public class SorterUtil { + + /** + * @param in A directory of documents to be sorted + * @param out The directory to which the sorted documents will be added + * @param sorter An object that knows how to sort the documents. + */ + public static void sort(Directory in, Directory out, Sorter sorter) throws IOException { + IndexWriter writer = null; + DirectoryReader reader = null; + SortingAtomicReader sortingReader = null; + try { + writer = new IndexWriter(out, new IndexWriterConfig(Version.LUCENE_50, null)); + reader = DirectoryReader.open(in); + sortingReader = new SortingAtomicReader(SlowCompositeReaderWrapper.wrap(reader), sorter); + writer.addIndexes(sortingReader); + } finally { + IOUtils.close(writer, reader, sortingReader); + } + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SorterUtil.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java (working copy) @@ -0,0 +1,197 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FilterAtomicReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Bits; + +/** + * An {@link AtomicReader} which supports sorting documents by a given + * {@link Sorter}. + *

+ * NOTE: currently this reader does not support input {@link IndexReader + * readers} with deleted documents. + * + * @lucene.experimental + */ +public class SortingAtomicReader extends FilterAtomicReader { + + private static class SortingFields extends FilterFields { + + private final int[] old2new; + + public SortingFields(final Fields in, final int[] old2new) { + super(in); + this.old2new = old2new; + } + + @Override + public Terms terms(final String field) throws IOException { + Terms terms = in.terms(field); + if (terms == null) { + return null; + } else { + return new SortingTerms(terms, old2new); + } + } + + } + + private static class SortingTerms extends FilterTerms { + + private final int[] old2new; + + public SortingTerms(final Terms in, final int[] old2new) { + super(in); + this.old2new = old2new; + } + + @Override + public TermsEnum iterator(final TermsEnum reuse) throws IOException { + return new SortingTermsEnum(in.iterator(reuse), old2new); + } + + } + + private static class SortingTermsEnum extends FilterTermsEnum { + + private final int[] old2new; + + public SortingTermsEnum(final TermsEnum in, final int[] old2new) { + super(in); + this.old2new = old2new; + } + + @Override + public DocsEnum docs(final Bits liveDocs, final DocsEnum reuse, final int flags) throws IOException { + return new SortingDocsEnum(in.docs(liveDocs, reuse, flags), old2new); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(final Bits liveDocs, final DocsAndPositionsEnum reuse, final int flags) throws IOException { + DocsAndPositionsEnum positions = in.docsAndPositions(liveDocs, reuse, flags); + if (positions == null) { + return null; + } else { + return new SortingDocsAndPositionsEnum(positions, old2new); + } + } + + } + + private final int[] old2new; + + public SortingAtomicReader(final AtomicReader in, final Sorter sorter) throws IOException { + super(in); + if (in.hasDeletions()) { + throw new UnsupportedOperationException("sorting an index which has deletions is unsupported yet"); + } + old2new = sorter.oldToNew(in); + } + + @Override + public void document(final int docID, final StoredFieldVisitor visitor) throws IOException { + in.document(old2new[docID], visitor); + } + + @Override + public BinaryDocValues getBinaryDocValues(String field) throws IOException { + BinaryDocValues oldDocValues = in.getBinaryDocValues(field); + if (oldDocValues == null) { + return null; + } else { + return new SortingBinaryDocValues(oldDocValues, old2new); + } + } + + @Override + public NumericDocValues getNumericDocValues(String field) throws IOException { + final NumericDocValues oldDocValues = in.getNumericDocValues(field); + if (oldDocValues == null) return null; + return new SortingNumericDocValues(oldDocValues, old2new); + } + + @Override + public Fields fields() throws IOException { + Fields fields = in.fields(); + if (fields == null) { + return null; + } else { + return new SortingFields(fields, old2new); + } + } + + @Override + public Bits getLiveDocs() { + return null; // default - no deleted docs + } + + @Override + public Fields getTermVectors(final int docID) throws IOException { + return in.getTermVectors(old2new[docID]); + } + + @Override + public boolean hasDeletions() { + return false; + } + + @Override + public NumericDocValues getNormValues(String field) throws IOException { + final NumericDocValues norm = in.getNormValues(field); + if (norm == null) { + return null; + } else { + return new SortingNumericDocValues(norm, old2new); + } + } + + @Override + public SortedDocValues getSortedDocValues(String field) throws IOException { + SortedDocValues sortedDV = in.getSortedDocValues(field); + if (sortedDV == null) { + return null; + } else { + return new SortingSortedDocValues(sortedDV, old2new); + } + } + + @Override + public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { + SortedSetDocValues sortedSetDV = in.getSortedSetDocValues(field); + if (sortedSetDV == null) { + return null; + } else { + return new SortingSortedSetDocValues(sortedSetDV, old2new); + } + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingBinaryDocValues.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingBinaryDocValues.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingBinaryDocValues.java (working copy) @@ -0,0 +1,39 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.util.BytesRef; + +/** + * Allows a {@link BinaryDocValues} to be read according to old2new permutation. + */ +class SortingBinaryDocValues extends BinaryDocValues { + private final BinaryDocValues in; + private final int old2new[]; + + SortingBinaryDocValues(BinaryDocValues in, int old2new[]) { + this.in = in; + this.old2new = old2new; + } + + @Override + public void get(int docID, BytesRef result) { + in.get(old2new[docID], result); + } +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingBinaryDocValues.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnum.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnum.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnum.java (working copy) @@ -0,0 +1,195 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMFile; +import org.apache.lucene.store.RAMInputStream; +import org.apache.lucene.store.RAMOutputStream; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.SorterTemplate; + +/** + * Allow {@link DocsAndPositionsEnum} to be read according to old2new permutation. + */ +class SortingDocsAndPositionsEnum extends DocsAndPositionsEnum { + + /** + * A {@link SorterTemplate} which sorts two parallel arrays of doc IDs and + * offsets in one go. Everytime a doc ID is 'swapped', its correponding offset + * is swapped too. + */ + private static final class DocOffsetSorterTemplate extends SorterTemplate { + + private final int[] docs; + private final long[] offsets; + + private int pivot; + + public DocOffsetSorterTemplate(int[] docs, long[] offsets) { + this.docs = docs; + this.offsets = offsets; + } + + @Override + protected int compare(int i, int j) { + return docs[i] - docs[j]; + } + + @Override + protected int comparePivot(int j) { + return pivot - docs[j]; + } + + @Override + protected void setPivot(int i) { + pivot = docs[i]; + } + + @Override + protected void swap(int i, int j) { + int tmpDoc = docs[i]; + docs[i] = docs[j]; + docs[j] = tmpDoc; + + long tmpOffset = offsets[i]; + offsets[i] = offsets[j]; + offsets[j] = tmpOffset; + } + } + + private int[] docs; + private long[] offsets; + private final int upto; + + private final IndexInput in; + + private int docIt = -1; + private int pos; + private int startOffset; + private int endOffset; + private final BytesRef payload = new BytesRef(32); + private int currFreq; + + public SortingDocsAndPositionsEnum(final DocsAndPositionsEnum in, final int[] old2new) throws IOException { + final RAMFile file = new RAMFile(); + final IndexOutput out = new RAMOutputStream(file); + docs = new int[32]; + offsets = new long[32]; + int doc; + int i = 0; + while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (i == docs.length) { + docs = ArrayUtil.grow(docs, i + 1); + offsets = ArrayUtil.grow(offsets, i + 1); + } + docs[i] = old2new[doc]; + offsets[i] = out.getFilePointer(); + addPositions(in, out); + i++; + } + upto = i; + SorterTemplate sorter = new DocOffsetSorterTemplate(docs, offsets); + sorter.quickSort(0, upto - 1); + out.close(); + this.in = new RAMInputStream("", file); + } + + private void addPositions(final DocsAndPositionsEnum oldDocsAndPositions, final IndexOutput out) throws IOException { + out.writeVInt(oldDocsAndPositions.freq()); + for (int i = 0; i < oldDocsAndPositions.freq(); i++) { + final int oldPos = oldDocsAndPositions.nextPosition(); + + out.writeVInt(oldPos); + out.writeVInt(oldDocsAndPositions.startOffset()); + out.writeVInt(oldDocsAndPositions.endOffset()); + + BytesRef payload = oldDocsAndPositions.getPayload(); + if (payload != null) { + out.writeVInt(payload.length); + out.writeBytes(payload.bytes, payload.offset, payload.length); + } else { + out.writeVInt(0); + } + } + } + + @Override + public int advance(final int target) throws IOException { + // need to support it for checkIndex, but in practice it won't be called, so + // don't bother to implement efficiently for now. + while (nextDoc() < target) {} + return docID(); + } + + @Override + public int docID() { + return docIt >= upto ? NO_MORE_DOCS : docs[docIt]; + } + + @Override + public int endOffset() throws IOException { + return endOffset; + } + + @Override + public int freq() throws IOException { + return currFreq; + } + + @Override + public BytesRef getPayload() throws IOException { + return payload.length == 0 ? null : payload; + } + + @Override + public int nextDoc() throws IOException { + if (++docIt >= upto) return DocIdSetIterator.NO_MORE_DOCS; + in.seek(offsets[docIt]); + currFreq = in.readVInt(); + return docs[docIt]; + } + + @Override + public int nextPosition() throws IOException { + pos = in.readVInt(); + startOffset = in.readVInt(); + endOffset = in.readVInt(); + int length = in.readVInt(); + if (length > 0) { + if (length >= payload.bytes.length) { + payload.grow(length + 1); + } + in.readBytes(payload.bytes, 0, length); + } + payload.length = length; + return pos; + } + + @Override + public int startOffset() throws IOException { + return startOffset; + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsAndPositionsEnum.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsEnum.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsEnum.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsEnum.java (working copy) @@ -0,0 +1,74 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.ArrayUtil; + +/** + * A {@link DocsEnum} which sorts another enum according to and old2new + * permutation. Assumes that frequencies are not needed. + */ +class SortingDocsEnum extends DocsEnum { + + private int[] docs = new int[64]; + private int docIt = -1; + private final int upto; + + public SortingDocsEnum(final DocsEnum in, final int[] old2new) throws IOException { + int i = 0; + while (in.nextDoc() != DocIdSetIterator.NO_MORE_DOCS){ + if (i >= docs.length) { + docs = ArrayUtil.grow(docs, docs.length + 1); + } + docs[i++] = old2new[in.docID()]; + } + upto = i; + Arrays.sort(this.docs, 0, upto); + } + + /** This operation is not supported. */ + @Override + public int advance(final int target) throws IOException { + // need to support it for checkIndex, but in practice it won't be called, so + // don't bother to implement efficiently for now. + while (nextDoc() < target) {} + return docID(); + } + + @Override + public int docID() { + return docIt >= upto ? NO_MORE_DOCS : docs[docIt]; + } + + /** This operation is not supported. */ + @Override + public int freq() throws IOException { + throw new UnsupportedOperationException("freq is not supported"); + } + + @Override + public int nextDoc() throws IOException { + if (++docIt >= upto) return NO_MORE_DOCS; + return docs[docIt]; + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingDocsEnum.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingNumericDocValues.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingNumericDocValues.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingNumericDocValues.java (working copy) @@ -0,0 +1,40 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.NumericDocValues; + +/** + * Allows a {@link NumericDocValues} to be read according to old2new permutation. + */ +class SortingNumericDocValues extends NumericDocValues { + + private final NumericDocValues oldDocValues; + private final int[] old2new; + + public SortingNumericDocValues(final NumericDocValues oldDocValues, final int[] old2new) { + this.oldDocValues = oldDocValues; + this.old2new = old2new; + } + + @Override + public long get(int docID) { + return oldDocValues.get(old2new[docID]); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingNumericDocValues.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingSortedDocValues.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingSortedDocValues.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingSortedDocValues.java (working copy) @@ -0,0 +1,59 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.util.BytesRef; + +/** + * Allows a {@link SortedDocValues} to be read according to old2new permutation. + */ +class SortingSortedDocValues extends SortedDocValues { + private final SortedDocValues in; + private final int old2new[]; + + SortingSortedDocValues(SortedDocValues in, int old2new[]) { + this.in = in; + this.old2new = old2new; + } + + @Override + public int getOrd(int docID) { + return in.getOrd(old2new[docID]); + } + + @Override + public void lookupOrd(int ord, BytesRef result) { + in.lookupOrd(ord, result); + } + + @Override + public int getValueCount() { + return in.getValueCount(); + } + + @Override + public void get(int docID, BytesRef result) { + in.get(old2new[docID], result); + } + + @Override + public int lookupTerm(BytesRef key) { + return in.lookupTerm(key); + } +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingSortedDocValues.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingSortedSetDocValues.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/SortingSortedSetDocValues.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/SortingSortedSetDocValues.java (working copy) @@ -0,0 +1,61 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; + +/** + * Allows a {@link SortedSetDocValues} to be read according to old2new permutation. + */ +class SortingSortedSetDocValues extends SortedSetDocValues { + + private final SortedSetDocValues in; + private final int old2new[]; + + SortingSortedSetDocValues(SortedSetDocValues in, int old2new[]) { + this.in = in; + this.old2new = old2new; + } + + @Override + public long nextOrd() { + return in.nextOrd(); + } + + @Override + public void setDocument(int docID) { + in.setDocument(old2new[docID]); + } + + @Override + public void lookupOrd(long ord, BytesRef result) { + in.lookupOrd(ord, result); + } + + @Override + public long getValueCount() { + return in.getValueCount(); + } + + @Override + public long lookupTerm(BytesRef key) { + return in.lookupTerm(key); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/SortingSortedSetDocValues.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/StoredDocumentSorter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/StoredDocumentSorter.java (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/StoredDocumentSorter.java (working copy) @@ -0,0 +1,48 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.StoredDocument; + +/** + * A {@link Sorter} which sorts documents according to their stored fields. + * + * @lucene.experimental + */ +public abstract class StoredDocumentSorter> implements Sorter { + + /** + * Parse the given {@link StoredDocument} into a {@link Comparable} by which + * to sort the documents in the index. + */ + protected abstract T parse(StoredDocument doc); + + @Override + public int[] oldToNew(final AtomicReader reader) throws IOException { + final int maxDoc = reader.maxDoc(); + final SortDoc[] docs = new SortDoc[maxDoc]; + for (int i = 0; i < maxDoc; i++) { + docs[i] = new SortDoc(parse(reader.document(i)), i); + } + return SortDoc.old2new(docs); + } + +} Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/StoredDocumentSorter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/java/org/apache/lucene/index/sorter/package.html =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/sorter/package.html (revision 0) +++ lucene/misc/src/java/org/apache/lucene/index/sorter/package.html (working copy) @@ -0,0 +1,27 @@ + + + + +Provides offline index sorting capablities. The application can use one of the +pre-existing Sorter implementations, e.g. to sort by a stored field, a numeric +DocValues, a payload or reverse the order of the documents. Additionally, the +application can implement the Sorter interface which returns a permutation on +a source Directory's document IDs, to sort the input documents by additional +values. + + Property changes on: lucene/misc/src/java/org/apache/lucene/index/sorter/package.html ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTest.java (working copy) @@ -0,0 +1,194 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LogByteSizeMergePolicy; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.StoredDocument; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class SorterTest extends LuceneTestCase{ + + private static final String ID_FIELD = "id"; + private static final String PAYLOAD_FIELD = "payload"; + private static final String PAYLOAD_TERM = "$all$"; + private static final String NUMERIC_DV_FIELD = "numeric"; + + static final class PayloadTokenStream extends TokenStream { + + private final CharTermAttribute term; + private final PayloadAttribute payload; + + private boolean done = false; + private int id; + + public PayloadTokenStream() { + term = addAttribute(CharTermAttribute.class); + term.append(PAYLOAD_TERM); + payload = addAttribute(PayloadAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (done) { + return false; + } + + payload.setPayload(new BytesRef(Integer.toString(id))); + done = true; + return true; + } + + void setId(int id) { + this.id = id; + done = false; + } + } + + private static Directory dir; + private static PayloadTokenStream payloads; + private static AtomicReader reader; + private static int[] expOldToNew; + + private static Document doc(final int id) { + final Document doc = new Document(); + doc.add(new StoredField(ID_FIELD, id)); + payloads.setId(id); + doc.add(new TextField(PAYLOAD_FIELD, payloads)); + doc.add(new NumericDocValuesField(NUMERIC_DV_FIELD, id)); + return doc; + } + + @BeforeClass + public static void beforeClassSorterTest() throws Exception { + payloads = new PayloadTokenStream(); + dir = newDirectory(); + int numDocs = atLeast(20); + List ids = new ArrayList(); + for (int i = 0; i < numDocs; i++) { + ids.add(i); + } + Collections.shuffle(ids, random()); + expOldToNew = new int[numDocs]; + int idx = 0; + for (Integer id : ids) { + expOldToNew[id.intValue()] = idx++; + } + // use LogMergePolicy because we rely on docs order + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) + .setMaxBufferedDocs(4).setMergePolicy(new LogByteSizeMergePolicy())); + for (Integer id : ids) { + writer.addDocument(doc(id)); + } + writer.close(); + + reader = new SlowCompositeReaderWrapper(DirectoryReader.open(dir)); + } + + @AfterClass + public static void afterClassSorterTest() throws Exception { + reader.close(); + dir.close(); + } + + @Test + public void testPayload() throws IOException { + Term term = new Term(PAYLOAD_FIELD, PAYLOAD_TERM); + final PayloadSorter payloadSorter = new PayloadSorter(term) { + @Override + protected Integer parse(final BytesRef bytes) { + return Integer.valueOf(bytes.utf8ToString()); + } + }; + + DirectoryReader r = DirectoryReader.open(dir); + try { + assertArrayEquals(expOldToNew, payloadSorter.oldToNew(SlowCompositeReaderWrapper.wrap(r))); + } finally { + r.close(); + } + } + + @Test + public void testNumericDocValues() throws IOException { + final NumericDocValuesSorter docValSorter = new NumericDocValuesSorter(NUMERIC_DV_FIELD); + DirectoryReader r = DirectoryReader.open(dir); + try { + assertArrayEquals(expOldToNew, docValSorter.oldToNew(SlowCompositeReaderWrapper.wrap(r))); + } finally { + r.close(); + } + } + + @Test + public void testStoredDocument() throws IOException { + final StoredDocumentSorter documentSorter = new StoredDocumentSorter() { + + @Override + protected Integer parse(final StoredDocument doc) { + return Integer.valueOf(doc.getField(ID_FIELD).numericValue().intValue()); + } + }; + + DirectoryReader r = DirectoryReader.open(dir); + try { + assertArrayEquals(expOldToNew, documentSorter.oldToNew(SlowCompositeReaderWrapper.wrap(r))); + } finally { + r.close(); + } + } + + @Test + public void testReverseDocId() throws IOException { + final ReverseDocIdSorter documentSorter = new ReverseDocIdSorter(); + DirectoryReader r = DirectoryReader.open(dir); + try { + int[] expReverse = new int[expOldToNew.length]; + for (int i = 0; i < expReverse.length; i++) { + expReverse[expReverse.length - i - 1] = i; + } + assertArrayEquals(expReverse, documentSorter.oldToNew(SlowCompositeReaderWrapper.wrap(r))); + } finally { + r.close(); + } + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SorterUtilTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SorterUtilTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SorterUtilTest.java (working copy) @@ -0,0 +1,124 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LogByteSizeMergePolicy; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.StoredDocument; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.sorter.SorterTest.PayloadTokenStream; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.Test; + +public class SorterUtilTest extends LuceneTestCase{ + + private static final String ID_FIELD = "id"; + private static final String PAYLOAD_FIELD = "payload"; + private static final String PAYLOAD_TERM = "$all$"; + private static final String NUMERIC_DV_FIELD = "numeric"; + + private static final Sorter[] SORTERS = new Sorter[3]; + static { + SORTERS[0] = new StoredDocumentSorter() { + @Override + protected Integer parse(final StoredDocument doc) { + return Integer.valueOf(doc.getField(ID_FIELD).numericValue().intValue()); + } + }; + SORTERS[1] = new NumericDocValuesSorter(NUMERIC_DV_FIELD); + SORTERS[2] = new PayloadSorter(new Term(PAYLOAD_FIELD, PAYLOAD_TERM)) { + @Override + protected Integer parse(final BytesRef bytes) { + return Integer.valueOf(bytes.utf8ToString()); + } + }; + } + + private static Document doc(final int id, PayloadTokenStream payloads) { + final Document doc = new Document(); + doc.add(new StoredField(ID_FIELD, id)); + payloads.setId(id); + doc.add(new TextField(PAYLOAD_FIELD, payloads)); + doc.add(new NumericDocValuesField(NUMERIC_DV_FIELD, id)); + return doc; + } + + private int[] createIndex(Directory dir) throws IOException { + PayloadTokenStream payloads = new PayloadTokenStream(); + int numDocs = atLeast(20); + List ids = new ArrayList(); + for (int i = 0; i < numDocs; i++) { + ids.add(i); + } + Collections.shuffle(ids, random()); + int[] oldToNew = new int[numDocs]; + int idx = 0; + for (Integer id : ids) { + oldToNew[id.intValue()] = idx++; + } + // use LogMergePolicy because we rely on docs order + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) + .setMaxBufferedDocs(4).setMergePolicy(new LogByteSizeMergePolicy())); + for (Integer id : ids) { + writer.addDocument(doc(id, payloads)); + } + writer.close(); + return oldToNew; + } + + @Test + public void test() throws IOException { + Directory src = newDirectory(); + Directory target = newDirectory(); + + createIndex(src); + Sorter sorter = SORTERS[random().nextInt(SORTERS.length)]; + SorterUtil.sort(src, target, sorter); + + DirectoryReader r = DirectoryReader.open(target); + try { + int maxDoc = r.maxDoc(); + for (int i = 0; i < maxDoc; i++) { + assertEquals(i, r.document(i).getField(ID_FIELD).numericValue().intValue()); + } + } finally { + r.close(); + } + + // checkIndex too + _TestUtil.checkIndex(target, true); + IOUtils.close(src, target); + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/SorterUtilTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java (revision 0) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java (working copy) @@ -0,0 +1,394 @@ +package org.apache.lucene.index.sorter; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.search.CollectionStatistics; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.lucene.util._TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +@SuppressCodecs({"Lucene40", "Lucene41"}) +public class SortingAtomicReaderTest extends LuceneTestCase { + + private static final String ID_FIELD = "id"; + private static final String DOCS_ENUM_FIELD = "docs"; + private static final String DOCS_ENUM_TERM = "$all$"; + private static final String DOC_POSITIONS_FIELD = "positions"; + private static final String DOC_POSITIONS_TERM = "$all$"; + private static final String NUMERIC_DV_FIELD = "numeric"; + private static final String NORMS_FIELD = "norm"; + private static final String BINARY_DV_FIELD = "binary"; + private static final String SORTED_DV_FIELD = "sorted"; + private static final String SORTED_SET_DV_FIELD = "sorted_set"; + private static final String TERM_VECTORS_FIELD = "term_vectors"; + + private static final FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED); + static { + TERM_VECTORS_TYPE.setStoreTermVectors(true); + TERM_VECTORS_TYPE.freeze(); + } + + private static class PositionsTokenStream extends TokenStream { + + private final CharTermAttribute term; + private final PayloadAttribute payload; + private final OffsetAttribute offset; + + private int pos; + + public PositionsTokenStream() { + term = addAttribute(CharTermAttribute.class); + term.append(DOC_POSITIONS_TERM); + payload = addAttribute(PayloadAttribute.class); + + offset = addAttribute(OffsetAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (pos == 0) { + return false; + } + + payload.setPayload(new BytesRef(Integer.toString(pos))); + offset.setOffset(pos, pos); + --pos; + return true; + } + + void setId(int id) { + pos = id + 1; + } + } + + private static class NormsSimilarity extends Similarity { + + private final Similarity in; + + public NormsSimilarity(Similarity in) { + this.in = in; + } + + @Override + public long computeNorm(FieldInvertState state) { + if (state.getName().equals(NORMS_FIELD)) { + return Float.floatToIntBits(state.getBoost()); + } else { + return in.computeNorm(state); + } + } + + @Override + public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) { + return in.computeWeight(queryBoost, collectionStats, termStats); + } + + @Override + public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException { + return in.exactSimScorer(weight, context); + } + + @Override + public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException { + return in.sloppySimScorer(weight, context); + } + + } + + private static Directory dir; + private static PositionsTokenStream positions; + private static AtomicReader reader, sortingReader; + private static int[] oldToNew; + + private static Document doc(final int id) { + final Document doc = new Document(); + doc.add(new IntField(ID_FIELD, id, Store.YES)); + doc.add(new StringField(DOCS_ENUM_FIELD, DOCS_ENUM_TERM, Store.NO)); + positions.setId(id); + doc.add(new TextField(DOC_POSITIONS_FIELD, positions)); + doc.add(new NumericDocValuesField(NUMERIC_DV_FIELD, id)); + TextField norms = new TextField(NORMS_FIELD, Integer.toString(id), Store.NO); + norms.setBoost(Float.intBitsToFloat(id)); + doc.add(norms); + doc.add(new BinaryDocValuesField(BINARY_DV_FIELD, new BytesRef(Integer.toString(id)))); + doc.add(new SortedDocValuesField(SORTED_DV_FIELD, new BytesRef(Integer.toString(id)))); + doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id)))); + doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id + 1)))); + doc.add(new Field(TERM_VECTORS_FIELD, Integer.toString(id), TERM_VECTORS_TYPE)); + return doc; + } + + @BeforeClass + public static void beforeClassSortingAtomicReaderTest() throws Exception { + positions = new PositionsTokenStream(); + dir = newDirectory(); + int numDocs = atLeast(20); + List ids = new ArrayList(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + conf.setMaxBufferedDocs(4); // create some segments + conf.setSimilarity(new NormsSimilarity(conf.getSimilarity())); // for testing norms field + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); + for (int i = 0; i < numDocs; i++) { + writer.addDocument(doc(i)); + ids.add(i); + } + writer.close(); + + // crappy way to shuffle .. if only Arrays had shuffle + Collections.shuffle(ids, random()); + oldToNew = new int[numDocs]; + int idx = 0; + for (Integer id : ids) { + oldToNew[idx++] = id.intValue(); + } + reader = new SlowCompositeReaderWrapper(DirectoryReader.open(dir)); + sortingReader = new SortingAtomicReader(reader, new Sorter() { + @SuppressWarnings("synthetic-access") + @Override + public int[] oldToNew(AtomicReader reader) throws IOException { + return oldToNew; + } + }); + // checking reader without doing crossCheckTermVectors because it doesn't + // work with the sorted index. What happens is that when you ask for tv(0), + // you may get tv(2) (if 0 maps to 2). The terms that are associated with + // doc 2 are mapped to whatever doc 2 is mapped 2, which may not be 0 at + // all. for example, if you harcodes the test to oldToNew=[2,0,1] and turn + // cross checking on, the test fails because 0 maps to 2, but 2 is mapped to + // doc 1. + _TestUtil.checkReader(sortingReader, false); + } + + @AfterClass + public static void afterClassSortingAtomicReaderTest() throws Exception { + reader.close(); + dir.close(); + } + + @Test + public void testDocsEnum() throws Exception { + Term term = new Term(DOCS_ENUM_FIELD, DOCS_ENUM_TERM); + DocsEnum docs = sortingReader.termDocsEnum(term); + for (int i = 0; i < oldToNew.length; i++) { + assertEquals(i, docs.nextDoc()); + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, docs.nextDoc()); + } + + @Test + public void testDocsAndPositionsEnum() throws Exception { + Term term = new Term(DOC_POSITIONS_FIELD, DOC_POSITIONS_TERM); + DocsAndPositionsEnum unsortedPositions = reader.termPositionsEnum(term); + DocsAndPositionsEnum sortedPositions = sortingReader.termPositionsEnum(term); + + int[] expFreqs = new int[oldToNew.length]; + int[][] expPositions = new int[oldToNew.length][]; + int[][] expStartOffsets = new int[oldToNew.length][]; + int[][] expEndOffsets = new int[oldToNew.length][]; + String[][] expPayloads = new String[oldToNew.length][]; + + int doc; + while ((doc = unsortedPositions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + int idx = oldToNew[doc]; + int freq = unsortedPositions.freq(); + expFreqs[idx] = freq; + expPositions[idx] = new int[freq]; + expStartOffsets[idx] = new int[freq]; + expEndOffsets[idx] = new int[freq]; + expPayloads[idx] = new String[freq]; + for (int i = 0; i < freq; i++) { + expPositions[idx][i] = unsortedPositions.nextPosition(); + expStartOffsets[idx][i] = unsortedPositions.startOffset(); + expEndOffsets[idx][i] = unsortedPositions.endOffset(); + expPayloads[idx][i] = unsortedPositions.getPayload().utf8ToString(); + } + } + + while ((doc = sortedPositions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + int freq = sortedPositions.freq(); + assertEquals("incorrect freq for doc=" + doc, expFreqs[doc], freq); + for (int i = 0; i < freq; i++) { + assertEquals("incorrect position for doc=" + doc, expPositions[doc][i], sortedPositions.nextPosition()); + assertEquals("incorrect startOffset for doc=" + doc, expStartOffsets[doc][i], sortedPositions.startOffset()); + assertEquals("incorrect endOffset for doc=" + doc, expEndOffsets[doc][i], sortedPositions.endOffset()); + assertEquals("incorrect payload for doc=" + doc, expPayloads[doc][i], sortedPositions.getPayload().utf8ToString()); + } + } + } + + @Test + public void testNumericDocValuesField() throws Exception { + NumericDocValues dv = reader.getNumericDocValues(NUMERIC_DV_FIELD); + NumericDocValues sdv = sortingReader.getNumericDocValues(NUMERIC_DV_FIELD); + + long[] expValues = new long[oldToNew.length]; + int maxDoc = reader.maxDoc(); + for (int i = 0; i < maxDoc; i++) { + expValues[i] = dv.get(oldToNew[i]); + } + + for (int i = 0; i < maxDoc; i++) { + assertEquals("incorrect numeric DocValues for doc " + i, expValues[i], sdv.get(i)); + } + } + + @Test + public void testNormValues() throws Exception { + NumericDocValues dv = reader.getNormValues(NORMS_FIELD); + NumericDocValues sdv = sortingReader.getNormValues(NORMS_FIELD); + + long[] expValues = new long[oldToNew.length]; + int maxDoc = reader.maxDoc(); + for (int i = 0; i < maxDoc; i++) { + expValues[i] = dv.get(oldToNew[i]); + } + + for (int i = 0; i < maxDoc; i++) { + assertEquals("incorrect norm value for doc " + i, expValues[i], sdv.get(i)); + } + } + + @Test + public void testTermVectors() throws Exception { + int maxDoc = reader.maxDoc(); + String[] expTerms = new String[maxDoc]; + for (int i = 0; i < maxDoc; i++) { + Terms terms = reader.getTermVector(oldToNew[i], TERM_VECTORS_FIELD); + assertNotNull("term vectors not found for doc " + oldToNew[i] + " field [" + TERM_VECTORS_FIELD + "]", terms); + expTerms[i] = terms.iterator(null).next().utf8ToString(); + } + + for (int i = 0; i < maxDoc; i++) { + Terms terms = sortingReader.getTermVector(i, TERM_VECTORS_FIELD); + assertNotNull("term vectors not found for doc " + i + " field [" + TERM_VECTORS_FIELD + "]", terms); + assertEquals("incorrect term vector for doc " + i, expTerms[i], terms.iterator(null).next().utf8ToString()); + } + } + + @Test + public void testBinaryDocValuesField() throws Exception { + BinaryDocValues dv = reader.getBinaryDocValues(BINARY_DV_FIELD); + BinaryDocValues sdv = sortingReader.getBinaryDocValues(BINARY_DV_FIELD); + + String[] expValues = new String[oldToNew.length]; + int maxDoc = reader.maxDoc(); + BytesRef bytes = new BytesRef(); + for (int i = 0; i < maxDoc; i++) { + dv.get(oldToNew[i], bytes); + expValues[i] = bytes.utf8ToString(); + } + + for (int i = 0; i < maxDoc; i++) { + sdv.get(i, bytes); + assertEquals("incorrect binary DocValues for doc " + i, expValues[i], bytes.utf8ToString()); + } + } + + @Test + public void testSortedDocValuesField() throws Exception { + SortedDocValues dv = reader.getSortedDocValues(SORTED_DV_FIELD); + SortedDocValues sdv = sortingReader.getSortedDocValues(SORTED_DV_FIELD); + + String[] expValues = new String[oldToNew.length]; + int maxDoc = reader.maxDoc(); + BytesRef bytes = new BytesRef(); + for (int i = 0; i < maxDoc; i++) { + dv.get(oldToNew[i], bytes); + expValues[i] = bytes.utf8ToString(); + } + + for (int i = 0; i < maxDoc; i++) { + sdv.get(i, bytes); + assertEquals("incorrect sorted DocValues for doc " + i, expValues[i], bytes.utf8ToString()); + } + } + + @Test + public void testSortedSetDocValuesField() throws Exception { + SortedSetDocValues dv = reader.getSortedSetDocValues(SORTED_SET_DV_FIELD); + SortedSetDocValues sdv = sortingReader.getSortedSetDocValues(SORTED_SET_DV_FIELD); + + String[][] expValues = new String[oldToNew.length][2]; + int maxDoc = reader.maxDoc(); + BytesRef bytes = new BytesRef(); + for (int i = 0; i < maxDoc; i++) { + dv.setDocument(oldToNew[i]); + long ord; + int idx = 0; + while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + dv.lookupOrd(ord, bytes); + expValues[i][idx++] = bytes.utf8ToString(); + } + } + + for (int i = 0; i < maxDoc; i++) { + sdv.setDocument(i); + long ord; + int idx = 0; + while ((ord = sdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + sdv.lookupOrd(ord, bytes); + assertEquals("incorrect sorted-set DocValues for doc " + i, expValues[i][idx++], bytes.utf8ToString()); + } + } + } + +} Property changes on: lucene/misc/src/test/org/apache/lucene/index/sorter/SortingAtomicReaderTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property