diff -r e4226f9efcdc lucene/contrib/benchmark/conf/createLineFile.alg --- a/lucene/contrib/benchmark/conf/createLineFile.alg Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/contrib/benchmark/conf/createLineFile.alg Tue Nov 02 18:19:28 2010 -0400 @@ -29,10 +29,11 @@ # # Where to get documents from: -content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource +content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource # Where to write the line file output: -line.file.out=work/reuters.lines.txt +line.file.out=/x/lucene/enwiki-20100302-lines.txt +docs.file = /x/lucene/enwiki-20100302-pages-articles.xml.bz2 # Stop after processing the document feed once: content.source.forever=false diff -r e4226f9efcdc lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java Tue Nov 02 18:19:28 2010 -0400 @@ -20,10 +20,13 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.util.Properties; import java.util.HashMap; import java.util.Map; +import java.util.Random; import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.tasks.GatherFieldValuesTask; import org.apache.lucene.util.ThreadInterruptedException; import org.xml.sax.Attributes; import org.xml.sax.InputSource; @@ -275,6 +278,10 @@ } } } + + private static String[] COUNTRIES = SortableSingleDocSource.COUNTRIES; + private Random r = new Random(17); + private int counter; @Override public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { @@ -284,9 +291,34 @@ docData.setBody(tuple[BODY]); docData.setDate(tuple[DATE]); docData.setTitle(tuple[TITLE]); + + // random country + System.out.println("CHECK"); + if (GatherFieldValuesTask.values != null) { + Properties props = new Properties(); + docData.setProps(props); + + System.out.println("COUNTRIES = " + COUNTRIES.length); + props.put("country", COUNTRIES[r.nextInt(COUNTRIES.length)]); + docData.setProps(props); + + addRandom(props, 10); + addRandom(props, 100); + addRandom(props, 1000); + addRandom(props, 10000); + addRandom(props, 100000); + addRandom(props, 1000000); + counter++; + } + + return docData; } + private void addRandom(Properties props, int n) { + props.put("unique" + n, GatherFieldValuesTask.values.get(counter % (Math.min(n, GatherFieldValuesTask.values.size())))); + } + @Override public void resetInputs() throws IOException { super.resetInputs(); diff -r e4226f9efcdc lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java Tue Nov 02 18:19:28 2010 -0400 @@ -22,9 +22,12 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.util.Properties; +import java.util.Random; import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask; import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.tasks.GatherFieldValuesTask; /** * A {@link ContentSource} reading one line at a time as a @@ -68,6 +71,10 @@ reader = null; } } + + private static String[] COUNTRIES = SortableSingleDocSource.COUNTRIES; + private Random r = new Random(17); + private int counter; @Override public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { @@ -99,9 +106,31 @@ docData.setBody(line.substring(1 + spot2, line.length())); docData.setTitle(line.substring(0, spot)); docData.setDate(line.substring(1 + spot, spot2)); + + if (GatherFieldValuesTask.values != null) { + Properties props = new Properties(); + docData.setProps(props); + + //System.out.println("COUNTRIES = " + COUNTRIES.length); + props.put("country", COUNTRIES[r.nextInt(COUNTRIES.length)]); + docData.setProps(props); + + addRandom(props, 10); + addRandom(props, 100); + addRandom(props, 1000); + addRandom(props, 10000); + addRandom(props, 100000); + addRandom(props, 1000000); + counter++; + } + return docData; } + private void addRandom(Properties props, int n) { + props.put("unique" + n, GatherFieldValuesTask.values.get(counter % (Math.min(n, GatherFieldValuesTask.values.size())))); + } + @Override public void resetInputs() throws IOException { super.resetInputs(); diff -r e4226f9efcdc lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SortableSingleDocSource.java --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SortableSingleDocSource.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SortableSingleDocSource.java Tue Nov 02 18:19:28 2010 -0400 @@ -34,7 +34,7 @@ */ public class SortableSingleDocSource extends SingleDocSource { - private static String[] COUNTRIES = new String[] { + public static final String[] COUNTRIES = new String[] { "European Union", "United States", "Japan", "Germany", "China (PRC)", "United Kingdom", "France", "Italy", "Spain", "Canada", "Brazil", "Russia", "India", "South Korea", "Australia", "Mexico", "Netherlands", "Turkey", diff -r e4226f9efcdc lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java Tue Nov 02 18:19:28 2010 -0400 @@ -57,10 +57,12 @@ r = IndexReader.open(OpenReaderTask.findIndexCommit(dir, commitUserData), deletionPolicy, readOnly); + System.out.println("OpenReader commit=" + commitUserData + " reader=" + r); } else { r = IndexReader.open(dir, deletionPolicy, readOnly); + System.out.println("OpenReader reader=" + r); } getRunData().setIndexReader(r); // We transfer reference to the run data @@ -94,6 +96,7 @@ String ud = null; if (map != null) { ud = map.get(USER_DATA); + System.out.println("got " + ud); } if (ud != null && ud.equals(userData)) { return ic; diff -r e4226f9efcdc lucene/contrib/db/bdb-je/lib/je-3.3.93.jar Binary file lucene/contrib/db/bdb-je/lib/je-3.3.93.jar has changed diff -r e4226f9efcdc lucene/contrib/db/bdb/lib/db-4.7.25.jar Binary file lucene/contrib/db/bdb/lib/db-4.7.25.jar has changed diff -r e4226f9efcdc lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedBulkPostingsEnum.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedBulkPostingsEnum.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,268 @@ +package org.apache.lucene.store.instantiated; + +/** + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.BulkPostingsEnum; + +public class InstantiatedBulkPostingsEnum extends BulkPostingsEnum { + + private final DocDeltasReader docDeltasReader; + private final FreqsReader freqsReader; + private final PositionDeltasReader positionDeltasReader; + private final String field; + + private InstantiatedTerm term; + + public InstantiatedBulkPostingsEnum(String field, boolean doFreq, boolean doPositions) { + this.field = field; + docDeltasReader = new DocDeltasReader(); + if (doFreq) { + freqsReader = new FreqsReader(); + } else { + freqsReader = null; + } + + if (doPositions) { + positionDeltasReader = new PositionDeltasReader(); + } else { + positionDeltasReader = null; + } + } + + public boolean canReuse(String field, boolean doFreq, boolean doPositions) { + return field.equals(this.field) && (doFreq == (freqsReader != null)) && (doPositions == (positionDeltasReader != null)); + } + + private class DocDeltasReader extends BlockReader { + private final int[] buffer = new int[64]; + private InstantiatedTermDocumentInformation[] docs; + private int docUpto; + private int lastDocID; + private int limit; + + public void reset(InstantiatedTerm term) { + docUpto = 0; + lastDocID = 0; + docs = term.getAssociatedDocuments(); + fill(); + } + + public void jump(int docUpto, int lastDocID) { + this.lastDocID = lastDocID; + this.docUpto = docUpto; + this.limit = 0; + } + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int offset() { + return 0; + } + + @Override + public void setOffset(int v) { + throw new UnsupportedOperationException(); + } + + @Override + public int end() { + return limit; + } + + @Override + public int fill() { + final int chunk = Math.min(buffer.length, docs.length-docUpto); + for(int i=0;i getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } diff -r e4226f9efcdc lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Tue Nov 02 18:19:28 2010 -0400 @@ -43,6 +43,7 @@ import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermPositionVector; @@ -907,6 +908,17 @@ } @Override + public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) { + MemoryBulkPostingsEnum postingsEnum; + if (reuse == null || !(reuse instanceof MemoryBulkPostingsEnum) || !((MemoryBulkPostingsEnum) reuse).canReuse(info, doFreqs, doPositions)) { + postingsEnum = new MemoryBulkPostingsEnum(info, doFreqs, doPositions); + } else { + postingsEnum = (MemoryBulkPostingsEnum) reuse; + } + return postingsEnum.reset(info.sortedTerms[termUpto].getValue()); + } + + @Override public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } @@ -1004,6 +1016,173 @@ return null; } } + + private class MemoryBulkPostingsEnum extends BulkPostingsEnum { + + private final DocDeltasReader docDeltasReader; + private final FreqsReader freqsReader; + private final PositionDeltasReader positionDeltasReader; + private final Info info; + + public MemoryBulkPostingsEnum(Info info, boolean doFreqs, boolean doPositions) { + this.info = info; + docDeltasReader = new DocDeltasReader(); + if (doFreqs) { + freqsReader = new FreqsReader(); + } else { + freqsReader = null; + } + + if (doPositions) { + positionDeltasReader = new PositionDeltasReader(); + } else { + positionDeltasReader = null; + } + } + + public boolean canReuse(Info info, boolean doFreq, boolean doPositions) { + return this.info == info && (doFreq == (freqsReader != null)) && (doPositions == (positionDeltasReader != null)); + } + + private class DocDeltasReader extends BlockReader { + private final int[] buffer = new int[1]; + + public void reset() { + } + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int offset() { + return 0; + } + + @Override + public void setOffset(int offset) { + throw new UnsupportedOperationException(); + } + + @Override + public int end() { + return 1; + } + + @Override + public int fill() { + return 1; + } + } + + private class FreqsReader extends BlockReader { + private final int[] buffer = new int[1]; + + public void reset(int freq) { + buffer[0] = freq; + } + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int offset() { + return 0; + } + + @Override + public void setOffset(int offset) { + throw new UnsupportedOperationException(); + } + + @Override + public int end() { + return 1; + } + + @Override + public int fill() { + return 1; + } + } + + private class PositionDeltasReader extends BlockReader { + private final int[] buffer = new int[64]; + private ArrayIntList positions; + private int posUpto; + private int limit; + + public void reset(ArrayIntList positions) { + posUpto = 0; + this.positions = positions; + fill(); + } + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int offset() { + return 0; + } + + @Override + public void setOffset(int offset) { + throw new UnsupportedOperationException(); + } + + @Override + public int end() { + return limit; + } + + @Override + public int fill() { + final int chunk = Math.min(buffer.length, positions.size() - posUpto); + for(int i=0;i= end) { + offset = 0; + end = fill(); + if (offset >= end) { + // nocommit cleanup + throw new IOException("no more ints"); + } + } + setOffset(1+offset); + return buffer[offset]; + } + + /** Reads long as 1 or 2 ints, and can only use 61 of + * the 64 long bits. */ + public long readVLong() throws IOException { + int offset = offset(); + + final int v = next(); + if ((v & 1) == 0) { + return v >> 1; + } else { + final long v2 = next(); + return (v2 << 30) | (v >> 1); + } + } + } + + public abstract BlockReader getDocDeltasReader() throws IOException; + + /** Returns null if per-document term freq is not indexed */ + public abstract BlockReader getFreqsReader() throws IOException; + + /** Returns null if positions are not indexed */ + public abstract BlockReader getPositionDeltasReader() throws IOException; + + public static class JumpResult { + public int count; + public int docID; + } + + /** Only call this if the docID you seek is after the last + * document in the buffer. This call does not position + * exactly; instead, it jumps forward when possible, + * returning the docID and ord it had jumped to, seeking + * all of the BlockReaders accordingly. Note that if a + * seek did occur, you must call .offset() and .limit() + * on each BlockReader. If null is returned then + * skipping is not possible, ie you should just scan + * yourself). */ + abstract public JumpResult jump(int target, int curCount) throws IOException; +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/CheckIndex.java --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java Tue Nov 02 18:19:28 2010 -0400 @@ -768,7 +768,7 @@ } if (totDocCount != totDocCount2) { - throw new RuntimeException("search to seek terms produced wrong number of hits: " + totDocCount + " vs " + totDocCount2); + throw new RuntimeException("search by seek term produced wrong number of hits: " + totDocCount + " vs " + totDocCount2 + " field=" + field); } } } diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/DocsEnum.java --- a/lucene/src/java/org/apache/lucene/index/DocsEnum.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/DocsEnum.java Tue Nov 02 18:19:28 2010 -0400 @@ -44,6 +44,8 @@ return atts; } + // nocommit -- delete all after here: + // TODO: maybe add bulk read only docIDs (for eventual // match-only scoring) @@ -83,6 +85,7 @@ *

NOTE: the default impl simply delegates to {@link * #nextDoc}, but subclasses may do this more * efficiently. */ + // nocommit -- remove this public int read() throws IOException { int count = 0; final int[] docs = bulkResult.docs.ints; diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/FilterIndexReader.java --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Tue Nov 02 18:19:28 2010 -0400 @@ -170,11 +170,18 @@ } @Override + public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException { + return in.bulkPostings(reuse, doFreqs, doPositions); + } + + @Override public Comparator getComparator() throws IOException { return in.getComparator(); } } + // nocommit need FilteredBulkDocsEnum + /** Base class for filtering {@link DocsEnum} implementations. */ public static class FilterDocsEnum extends DocsEnum { protected DocsEnum in; diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/IndexReader.java --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java Tue Nov 02 18:19:28 2010 -0400 @@ -1012,6 +1012,22 @@ } } + // nocommit jdocs + public BulkPostingsEnum bulkTermPostingsEnum(String field, BytesRef term, boolean doFreqs, boolean doPositions) throws IOException { + assert field != null; + assert term != null; + final Fields fields = fields(); + if (fields == null) { + return null; + } + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.bulkPostings(term, null, doFreqs, doPositions); + } else { + return null; + } + } + /** Returns {@link DocsAndPositionsEnum} for the specified * field & term. This may return null, if either the * field or term does not exist, or, positions were not diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/MultiBulkPostingsEnum.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/index/MultiBulkPostingsEnum.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,274 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; + +public final class MultiBulkPostingsEnum extends BulkPostingsEnum { + private EnumWithSlice[] subs; + int numSubs; + + private final DocDeltasReader docDeltasReader = new DocDeltasReader(); + private final FreqsReader freqsReader = new FreqsReader(); + private final PositionsReader positionsReader = new PositionsReader(); + + MultiBulkPostingsEnum reset(final EnumWithSlice[] subs, final int numSubs, boolean doFreqs, boolean doPositions) throws IOException { + this.numSubs = numSubs; + this.subs = new EnumWithSlice[subs.length]; + for(int i=0;i offset) { + return doCopy(offset, limit); + } + } + } + int limit = current.fill(); + //int offset = current.offset(); + return doCopy(0, limit); + } + } + + protected abstract BlockReader getBlockReader(int upto) throws IOException; + protected void onFill() {}; + } + + private class DocDeltasReader extends MultiBlockReader { + int lastDocID; + int lastSeg; + + @Override + protected int getBufferSize() throws IOException { + int maxBufferSize = 0; + for(int sub=0;sub 0; - + seekDir(in, dirOffset); // Read directory @@ -367,7 +367,6 @@ int lo = 0; // binary search int hi = numIndexTerms - 1; assert totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval; - while (hi >= lo) { int mid = (lo + hi) >>> 1; diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java --- a/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java Tue Nov 02 18:19:28 2010 -0400 @@ -21,6 +21,7 @@ import java.io.Closeable; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.store.IndexInput; @@ -50,6 +51,12 @@ * TermState may be reused. */ public abstract DocsEnum docs(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsEnum reuse) throws IOException; + // nocommit jdocs + // nocommit make abstract + public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, TermState state, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException { + throw new UnsupportedOperationException(); + } + /** Must fully consume state, since after this call that * TermState may be reused. */ public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java --- a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java Tue Nov 02 18:19:28 2010 -0400 @@ -25,6 +25,7 @@ import java.util.Comparator; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -319,9 +320,9 @@ @Override public SeekStatus seek(BytesRef term, boolean useCache) throws IOException { // Check cache - fieldTerm.term = term; TermState cachedState; if (useCache) { + fieldTerm.term = term; cachedState = termsCache.get(fieldTerm); if (cachedState != null) { state.copy(cachedState); @@ -387,7 +388,6 @@ while(next() != null) { final int cmp = termComp.compare(bytesReader.term, term); if (cmp == 0) { - if (doSeek && useCache) { // Store in cache FieldAndTerm entryKey = new FieldAndTerm(fieldTerm); @@ -396,7 +396,6 @@ cachedState.filePointer = in.getFilePointer(); termsCache.put(entryKey, cachedState); } - return SeekStatus.FOUND; } else if (cmp > 0) { return SeekStatus.NOT_FOUND; @@ -500,6 +499,12 @@ } @Override + public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException { + BulkPostingsEnum postingsEnum = postingsReader.bulkPostings(fieldInfo, state, reuse, doFreqs, doPositions); + return postingsEnum; + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { if (fieldInfo.omitTermFreqAndPositions) { return null; diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java --- a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java Tue Nov 02 18:19:28 2010 -0400 @@ -178,7 +178,7 @@ termWriter.write(text); out.writeVInt(numDocs); - + //System.out.println("term=" + text.utf8ToString() + " df=" + numDocs); postingsWriter.finishTerm(numDocs, isIndexTerm); numTerms++; } diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java --- a/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java Tue Nov 02 18:19:28 2010 -0400 @@ -24,8 +24,8 @@ import java.io.IOException; import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.IntsRef; /** Abstract base class that reads fixed-size blocks of ints * from an IndexInput. While this is a simple approach, a @@ -42,7 +42,9 @@ public FixedIntBlockIndexInput(final IndexInput in) throws IOException { this.in = in; - blockSize = in.readVInt(); + //blockSize = in.readVInt(); + blockSize = in.readInt(); + //System.out.println("BLOCK size " + blockSize); } @Override @@ -67,80 +69,72 @@ public interface BlockReader { public void readBlock() throws IOException; + // nocommit -- need seek here so mmapdir "knows" } - private static class Reader extends IntIndexInput.Reader { + private static class Reader extends BulkPostingsEnum.BlockReader { private final IndexInput in; protected final int[] pending; - int upto; + private int offset; - private boolean seekPending; - private long pendingFP; - private int pendingUpto; private long lastBlockFP; private final BlockReader blockReader; private final int blockSize; - private final IntsRef bulkResult = new IntsRef(); public Reader(final IndexInput in, final int[] pending, final BlockReader blockReader) - throws IOException { + throws IOException { this.in = in; this.pending = pending; this.blockSize = pending.length; - bulkResult.ints = pending; this.blockReader = blockReader; - upto = blockSize; } - void seek(final long fp, final int upto) { - pendingFP = fp; - pendingUpto = upto; - seekPending = true; - } - - private void maybeSeek() throws IOException { - if (seekPending) { - if (pendingFP != lastBlockFP) { - // need new block - in.seek(pendingFP); - lastBlockFP = pendingFP; - blockReader.readBlock(); - } - upto = pendingUpto; - seekPending = false; + void seek(final long fp, final int upto) throws IOException { + offset = upto; + if (fp != lastBlockFP) { + // Seek to new block; this may in fact be the next + // block ie when caller is doing sequential scan (eg + // PrefixQuery) + //System.out.println(" seek block fp=" + fp + " vs last=" + lastBlockFP + " upto=" + upto); + in.seek(fp); + fill(); + } else { + // Seek within current block + //System.out.println(" seek in-block fp=" + fp + " upto=" + offset); } } @Override - public int next() throws IOException { - this.maybeSeek(); - if (upto == blockSize) { - lastBlockFP = in.getFilePointer(); - blockReader.readBlock(); - upto = 0; - } - - return pending[upto++]; + public int[] getBuffer() { + return pending; } @Override - public IntsRef read(final int count) throws IOException { - this.maybeSeek(); - if (upto == blockSize) { - blockReader.readBlock(); - upto = 0; - } - bulkResult.offset = upto; - if (upto + count < blockSize) { - bulkResult.length = count; - upto += count; - } else { - bulkResult.length = blockSize - upto; - upto = blockSize; - } + public int end() { + return blockSize; + } - return bulkResult; + @Override + public int offset() { + return offset; + } + + @Override + public void setOffset(int offset) { + this.offset = offset; + } + + @Override + public int fill() throws IOException { + //System.out.println("fii.fill seekPending=" + seekPending + " set lastFP=" + pendingFP + " this=" + this); + // nocommit -- not great that we do this on each + // fill -- but we need it to detect seek w/in block + // case: + // nocommit: can't we += blockNumBytes instead? + lastBlockFP = in.getFilePointer(); + blockReader.readBlock(); + return blockSize; } } @@ -150,10 +144,14 @@ @Override public void read(final IndexInput indexIn, final boolean absolute) throws IOException { + // nocommit -- somehow we should share the "upto" for + // doc & freq since they will always be "in sync" if (absolute) { fp = indexIn.readVLong(); upto = indexIn.readVInt(); } else { + // nocommit -- can't this be more efficient? read a + // single byte and check a bit? block size is 128... final long delta = indexIn.readVLong(); if (delta == 0) { // same block @@ -168,7 +166,7 @@ } @Override - public void read(final IntIndexInput.Reader indexIn, final boolean absolute) throws IOException { + public void read(final BulkPostingsEnum.BlockReader indexIn, final boolean absolute) throws IOException { if (absolute) { fp = indexIn.readVLong(); upto = indexIn.next(); @@ -187,7 +185,7 @@ } @Override - public void seek(final IntIndexInput.Reader other) throws IOException { + public void seek(final BulkPostingsEnum.BlockReader other) throws IOException { ((Reader) other).seek(fp, upto); } @@ -205,5 +203,10 @@ other.upto = upto; return other; } + + @Override + public String toString() { + return "FixedBlockIndex(fp=" + fp + " offset=" + upto + ")"; + } } } diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java --- a/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java Tue Nov 02 18:19:28 2010 -0400 @@ -44,7 +44,7 @@ protected FixedIntBlockIndexOutput(IndexOutput out, int fixedBlockSize) throws IOException { blockSize = fixedBlockSize; this.out = out; - out.writeVInt(blockSize); + out.writeInt(blockSize); buffer = new int[blockSize]; } @@ -111,6 +111,11 @@ lastUpto = upto; lastFP = fp; } + + @Override + public String toString() { + return "fp=" + fp + " idx=" + upto; + } } @Override diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java --- a/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Tue Nov 02 18:19:28 2010 -0400 @@ -23,9 +23,9 @@ import java.io.IOException; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.codecs.sep.IntIndexInput; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.IntsRef; // TODO: much of this can be shared code w/ the fixed case @@ -72,94 +72,97 @@ public void seek(long pos) throws IOException; } - public static class Reader extends IntIndexInput.Reader { + public static class Reader extends BulkPostingsEnum.BlockReader { private final IndexInput in; public final int[] pending; - int upto; private boolean seekPending; private long pendingFP; - private int pendingUpto; + private int offset; private long lastBlockFP; private int blockSize; private final BlockReader blockReader; - private final IntsRef bulkResult = new IntsRef(); + private int limit; public Reader(final IndexInput in, final int[] pending, final BlockReader blockReader) throws IOException { this.in = in; this.pending = pending; - bulkResult.ints = pending; this.blockReader = blockReader; } void seek(final long fp, final int upto) throws IOException { + //System.out.println("vintb seek fp=" + fp + " upto=" + upto); // TODO: should we do this in real-time, not lazy? pendingFP = fp; - pendingUpto = upto; - assert pendingUpto >= 0: "pendingUpto=" + pendingUpto; - seekPending = true; + offset = upto; + assert offset >= 0: "pendingUpto=" + offset; + if (pendingFP != lastBlockFP) { + // Clear current block + seekPending = true; + // System.out.println(" seekPending=true now fill"); + fill(); + } else { + //System.out.println(" no seekPending"); + } + //System.out.println(" now offset=" + offset + " limit=" + limit); + + // This is necessary for int encoders that are + // non-causal, ie must see future int values to + // encode the current ones. + while(offset >= limit) { + offset -= limit; + //System.out.println(" non-causal fill"); + fill(); + } + //System.out.println(" after skip bock offset=" + offset); } - private final void maybeSeek() throws IOException { + @Override + public int[] getBuffer() { + return pending; + } + + @Override + public int end() { + return limit; + } + + @Override + public int offset() { + return offset; + } + + @Override + public void setOffset(int offset) { + this.offset = offset; + } + + @Override + public int fill() throws IOException { if (seekPending) { - if (pendingFP != lastBlockFP) { - // need new block - in.seek(pendingFP); - blockReader.seek(pendingFP); - lastBlockFP = pendingFP; - blockSize = blockReader.readBlock(); - } - upto = pendingUpto; + seekPending = false; + in.seek(pendingFP); + blockReader.seek(pendingFP); + lastBlockFP = pendingFP; + blockSize = blockReader.readBlock(); // TODO: if we were more clever when writing the // index, such that a seek point wouldn't be written // until the int encoder "committed", we could avoid // this (likely minor) inefficiency: - // This is necessary for int encoders that are - // non-causal, ie must see future int values to - // encode the current ones. - while(upto >= blockSize) { - upto -= blockSize; - lastBlockFP = in.getFilePointer(); - blockSize = blockReader.readBlock(); - } - seekPending = false; - } - } + //System.out.println("varintblock.fill offset=" + offset + " vs blockSize=" + blockSize); - @Override - public int next() throws IOException { - this.maybeSeek(); - if (upto == blockSize) { + } else { + // nocommit -- not great that we do this on each + // fill -- but we need it to detect seek w/in block + // case: lastBlockFP = in.getFilePointer(); blockSize = blockReader.readBlock(); - upto = 0; } - - return pending[upto++]; - } - - @Override - public IntsRef read(final int count) throws IOException { - this.maybeSeek(); - if (upto == blockSize) { - lastBlockFP = in.getFilePointer(); - blockSize = blockReader.readBlock(); - upto = 0; - } - bulkResult.offset = upto; - if (upto + count < blockSize) { - bulkResult.length = count; - upto += count; - } else { - bulkResult.length = blockSize - upto; - upto = blockSize; - } - - return bulkResult; + return limit = blockSize; } } @@ -189,7 +192,7 @@ } @Override - public void read(final IntIndexInput.Reader indexIn, final boolean absolute) throws IOException { + public void read(final BulkPostingsEnum.BlockReader indexIn, final boolean absolute) throws IOException { if (absolute) { fp = indexIn.readVLong(); upto = indexIn.next()&0xFF; @@ -212,7 +215,7 @@ } @Override - public void seek(final IntIndexInput.Reader other) throws IOException { + public void seek(final BulkPostingsEnum.BlockReader other) throws IOException { ((Reader) other).seek(fp, upto); } diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORFactory.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORFactory.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,43 @@ +package org.apache.lucene.index.codecs.pfordelta; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.index.codecs.sep.IntStreamFactory; +import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.index.codecs.sep.IntIndexOutput; + +import java.io.IOException; + +public class FORFactory extends IntStreamFactory { + private final int blockSize; + + /** blockSize is only used when creating the + * IntIndexOutput */ + public FORFactory(int blockSize) { + this.blockSize = blockSize; + } + + public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException { + return new FORIndexInput(dir, fileName, readBufferSize); + } + + public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException { + return new FORIndexOutput(dir, fileName, blockSize); + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexInput.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexInput.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,175 @@ +package org.apache.lucene.index.codecs.pfordelta; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.MMapDirectory.MMapIndexInput; +import org.apache.lucene.store.MMapDirectory.MultiMMapIndexInput; +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput; +import org.apache.lucene.util.pfor.FrameOfRef; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; + +public class FORIndexInput extends FixedIntBlockIndexInput { + + public FORIndexInput(Directory dir, String fileName, int readBufferSize) throws IOException { + super(dir.openInput(fileName, readBufferSize)); + } + + private static class BlockReader implements FixedIntBlockIndexInput.BlockReader { + private final IndexInput in; + private final int[] buffer; + private final FrameOfRef decompressor; + private final byte[] input; + private final IntBuffer intInput; + + public BlockReader(IndexInput in, int[] buffer) { + this.in = in; + this.buffer = buffer; + + decompressor = new FrameOfRef(); + // nocommit -- can't hardwire 1024; it's a function of blockSize + ByteBuffer byteBuffer = ByteBuffer.allocate(1024); + input = byteBuffer.array(); + intInput = byteBuffer.asIntBuffer(); + decompressor.setCompressedBuffer(intInput); + decompressor.setUnCompressedData(buffer, 0, buffer.length); + } + + public void seek(long pos) throws IOException { + //System.out.println("for: seek pos=" + pos); + } + + public void readBlock() throws IOException { + // nocommit -- we don't need this numBytes header -- + // it's a waste. we need something like the zip + // interface -- the decompressor asks for more bytes + // if it needs it + //System.out.println("for: read @ fp=" + in.getFilePointer()); + int numBytes = in.readInt(); + // nocommit -- how to avoid this copy? plus, the copy + // inside BII. if mmapdir how can we directly access + // underlying ram w/ no copy? + in.readBytes(input, 0, numBytes); + intInput.rewind(); + decompressor.decompress(); + //System.out.println(" FOR.readBlock"); + } + } + + // nocommit -- hacked up -- maybe move + // IntIndexInput/Output into store? + private static class MMapBlockReader implements FixedIntBlockIndexInput.BlockReader { + private final MMapIndexInput in; + private final int[] buffer; + private final FrameOfRef decompressor; + private final ByteBuffer byteBufferIn; + private final IntBuffer intBufferIn; + + public MMapBlockReader(MMapIndexInput in, int[] buffer) { + this.in = in; + this.buffer = buffer; + + // nocommit -- we seem to be creating this class + // alot... how come? + decompressor = new FrameOfRef(); + + byteBufferIn = in.getBuffer(); + //System.out.println("pos start=" + byteBufferIn.position()); + // nocommit -- this sets pos relative to current pos + // of byteBuffer (confusing) -- this is why we -1 in + // seek (pos is always 4 here) + intBufferIn = byteBufferIn.asIntBuffer(); + + decompressor.setCompressedBuffer(intBufferIn); + decompressor.setUnCompressedData(buffer, 0, buffer.length); + //System.out.println("mmmap"); + } + + public void seek(long pos) throws IOException { + assert pos%4 == 0; + //System.out.println("this=" + this + " seek=" + pos); + intBufferIn.position((int) (pos/4)-1); + } + + public void readBlock() throws IOException { + // nocommit -- we don't need this numBytes header -- + // it's a waste. we need something like the zip + // interface -- the decompressor asks for more bytes + // if it needs it + //int numBytes = in.readInt(); + int numBytes = intBufferIn.get(); + //System.out.println("NB=" + numBytes); + // nocommit -- how to avoid this copy? plus, the copy + // inside BII. if mmapdir how can we directly access + // underlying ram w/ no copy? + //in.readBytes(input, 0, numBytes); + decompressor.decompress(); + } + } + + // nocommit -- hacked up -- maybe move + // IntIndexInput/Output into store? + private static class MultiMMapBlockReader implements FixedIntBlockIndexInput.BlockReader { + private final MultiMMapIndexInput in; + private final int[] buffer; + private final FrameOfRef decompressor; + private final ByteBuffer byteBufferIn; + private final IntBuffer intBufferIn; + + public MultiMMapBlockReader(MultiMMapIndexInput in, int[] buffer) { + this.in = in; + this.buffer = buffer; + + decompressor = new FrameOfRef(); + + byteBufferIn = in.getBuffer(); + intBufferIn = byteBufferIn.asIntBuffer(); + + decompressor.setCompressedBuffer(intBufferIn); + decompressor.setUnCompressedData(buffer, 0, buffer.length); + } + + public void seek(long pos) throws IOException { + // + + } + + public void readBlock() throws IOException { + int numBytes = in.readInt(); + // nocommit -- how to avoid this copy? plus, the copy + // inside BII. if mmapdir how can we directly access + // underlying ram w/ no copy? + //in.readBytes(input, 0, numBytes); + //decompressor.decompress(); + } + } + + protected FixedIntBlockIndexInput.BlockReader getBlockReader(IndexInput in, int[] buffer) { + if (in instanceof MMapIndexInput) { + return new MMapBlockReader((MMapIndexInput) in, buffer); + } else if (false && in instanceof MultiMMapIndexInput) { + return new MultiMMapBlockReader((MultiMMapIndexInput) in, buffer); + } else { + return new BlockReader(in, buffer); + } + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexOutput.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexOutput.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,66 @@ +package org.apache.lucene.index.codecs.pfordelta; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Naive int block API that writes vInts. This is + * expected to give poor performance; it's really only for + * testing the pluggability. One should typically use pfor instead. */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput; +import org.apache.lucene.util.pfor.FrameOfRef; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** Don't use this class!! It naively encodes ints one vInt + * at a time. Use it only for testing. */ +public class FORIndexOutput extends FixedIntBlockIndexOutput { + + public final static String CODEC = "P_FOR_DELTA"; + public final static int VERSION_START = 0; + public final static int VERSION_CURRENT = VERSION_START; + private final FrameOfRef compressor; + private final byte[] output; + + // nocommit + private final String fileName; + + public FORIndexOutput(Directory dir, String fileName, int blockSize) throws IOException { + super(dir.createOutput(fileName), blockSize); + + // nocommit + this.fileName = fileName; + compressor = new FrameOfRef(); + // nocommit -- can't hardwire 1024; it's a function of blockSize + ByteBuffer byteBuffer = ByteBuffer.allocate(1024); + output = byteBuffer.array(); + compressor.setCompressedBuffer(byteBuffer.asIntBuffer()); + } + + @Override + protected void flushBlock() throws IOException { + compressor.setUnCompressedData(buffer, 0, buffer.length); + final int numFrameBits = compressor.frameBitsForCompression(); + compressor.compress(); + final int numBytes = compressor.compressedSize() * 4; + assert numBytes <= 1024; + out.writeInt(numBytes); + out.writeBytes(output, numBytes); + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,138 @@ +package org.apache.lucene.index.codecs.pfordelta; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Set; +import java.io.IOException; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; +import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; +import org.apache.lucene.index.codecs.standard.StandardCodec; +import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.PrefixCodedTermsReader; +import org.apache.lucene.index.codecs.TermsIndexWriterBase; +import org.apache.lucene.index.codecs.TermsIndexReaderBase; +import org.apache.lucene.index.codecs.PostingsReaderBase; +import org.apache.lucene.index.codecs.PostingsWriterBase; +import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; + +public class FrameOfRefCodec extends Codec { + + public FrameOfRefCodec() { + name = "FrameOfRef"; + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new FORFactory(128)); + + boolean success = false; + TermsIndexWriterBase indexWriter; + try { + indexWriter = new FixedGapTermsIndexWriter(state); + success = true; + } finally { + if (!success) { + postingsWriter.close(); + } + } + + success = false; + try { + FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); + success = true; + return ret; + } finally { + if (!success) { + try { + postingsWriter.close(); + } finally { + indexWriter.close(); + } + } + } + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir, + state.segmentInfo, + state.readBufferSize, + new FORFactory(128)); + + TermsIndexReaderBase indexReader; + boolean success = false; + try { + indexReader = new FixedGapTermsIndexReader(state.dir, + state.fieldInfos, + state.segmentInfo.name, + state.termsIndexDivisor, + BytesRef.getUTF8SortedAsUnicodeComparator()); + success = true; + } finally { + if (!success) { + postingsReader.close(); + } + } + + success = false; + try { + FieldsProducer ret = new PrefixCodedTermsReader(indexReader, + state.dir, + state.fieldInfos, + state.segmentInfo.name, + postingsReader, + state.readBufferSize, + BytesRef.getUTF8SortedAsUnicodeComparator(), + StandardCodec.TERMS_CACHE_SIZE); + success = true; + return ret; + } finally { + if (!success) { + try { + postingsReader.close(); + } finally { + indexReader.close(); + } + } + } + } + + @Override + public void files(Directory dir, SegmentInfo segmentInfo, Set files) { + SepPostingsReaderImpl.files(segmentInfo, files); + PrefixCodedTermsReader.files(dir, segmentInfo, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, files); + } + + @Override + public void getExtensions(Set extensions) { + SepPostingsWriterImpl.getExtensions(extensions); + PrefixCodedTermsReader.getExtensions(extensions); + FixedGapTermsIndexReader.getIndexExtensions(extensions); + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaFactory.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaFactory.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,43 @@ +package org.apache.lucene.index.codecs.pfordelta; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.index.codecs.sep.IntStreamFactory; +import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.index.codecs.sep.IntIndexOutput; + +import java.io.IOException; + +public class PForDeltaFactory extends IntStreamFactory { + private final int blockSize; + + /** blockSize is only used when creating the + * IntIndexOutput */ + public PForDeltaFactory(int blockSize) { + this.blockSize = blockSize; + } + + public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException { + return new PForDeltaIndexInput(dir, fileName, readBufferSize); + } + + public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException { + return new PForDeltaIndexOutput(dir, fileName, blockSize); + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaIndexInput.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaIndexInput.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,73 @@ +package org.apache.lucene.index.codecs.pfordelta; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput; +import org.apache.lucene.util.pfor.PFor; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; + +public class PForDeltaIndexInput extends FixedIntBlockIndexInput { + + public PForDeltaIndexInput(Directory dir, String fileName, int readBufferSize) throws IOException { + super(dir.openInput(fileName, readBufferSize)); + } + + private static class BlockReader implements FixedIntBlockIndexInput.BlockReader { + private final IndexInput in; + private final int[] buffer; + private final PFor decompressor; + private final byte[] input; + private final IntBuffer intInput; + + public BlockReader(IndexInput in, int[] buffer) { + this.in = in; + this.buffer = buffer; + + decompressor = new PFor(); + // nocommit -- can't hardwire 1024; it's a function of blockSize + ByteBuffer byteBuffer = ByteBuffer.allocate(1024); + input = byteBuffer.array(); + intInput = byteBuffer.asIntBuffer(); + decompressor.setCompressedBuffer(intInput); + decompressor.setUnCompressedData(buffer, 0, buffer.length); + } + + public void seek(long pos) throws IOException { + // + } + + public void readBlock() throws IOException { + int numBytes = in.readInt(); + //System.out.println("nb=" + numBytes); + // nocommit -- how to avoid this copy? + in.readBytes(input, 0, numBytes); + intInput.rewind(); + decompressor.decompress(); + } + } + + protected BlockReader getBlockReader(IndexInput in, int[] buffer) { + return new BlockReader(in, buffer); + } +} + diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaIndexOutput.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaIndexOutput.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,56 @@ +package org.apache.lucene.index.codecs.pfordelta; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput; +import org.apache.lucene.util.pfor.PFor; + +import java.io.IOException; +import java.nio.ByteBuffer; + +public class PForDeltaIndexOutput extends FixedIntBlockIndexOutput { + + public final static String CODEC = "P_FOR_DELTA"; + public final static int VERSION_START = 0; + public final static int VERSION_CURRENT = VERSION_START; + private final PFor compressor; + private final byte[] output; + + public PForDeltaIndexOutput(Directory dir, String fileName, int blockSize) throws IOException { + super(dir.createOutput(fileName), blockSize); + + compressor = new PFor(); + // nocommit -- can't hardwire 1024; it's a function of blockSize + ByteBuffer byteBuffer = ByteBuffer.allocate(1024); + output = byteBuffer.array(); + compressor.setCompressedBuffer(byteBuffer.asIntBuffer()); + } + + @Override + protected void flushBlock() throws IOException { + compressor.setUnCompressedData(buffer, 0, buffer.length); + final int numFrameBits = compressor.frameBitsForCompression(); + compressor.compress(); + final int numBytes = compressor.compressedSize() * 4; + assert numBytes <= 1024; + out.writeInt(numBytes); + out.writeBytes(output, numBytes); + } +} + diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,138 @@ +package org.apache.lucene.index.codecs.pfordelta; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Set; +import java.io.IOException; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; +import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; +import org.apache.lucene.index.codecs.standard.StandardCodec; +import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.PrefixCodedTermsReader; +import org.apache.lucene.index.codecs.TermsIndexWriterBase; +import org.apache.lucene.index.codecs.TermsIndexReaderBase; +import org.apache.lucene.index.codecs.PostingsReaderBase; +import org.apache.lucene.index.codecs.PostingsWriterBase; +import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; + +public class PatchedFrameOfRefCodec extends Codec { + + public PatchedFrameOfRefCodec() { + name = "PatchedFrameOfRef"; + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new PForDeltaFactory(128)); + + boolean success = false; + TermsIndexWriterBase indexWriter; + try { + indexWriter = new FixedGapTermsIndexWriter(state); + success = true; + } finally { + if (!success) { + postingsWriter.close(); + } + } + + success = false; + try { + FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); + success = true; + return ret; + } finally { + if (!success) { + try { + postingsWriter.close(); + } finally { + indexWriter.close(); + } + } + } + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir, + state.segmentInfo, + state.readBufferSize, + new PForDeltaFactory(128)); + + TermsIndexReaderBase indexReader; + boolean success = false; + try { + indexReader = new FixedGapTermsIndexReader(state.dir, + state.fieldInfos, + state.segmentInfo.name, + state.termsIndexDivisor, + BytesRef.getUTF8SortedAsUnicodeComparator()); + success = true; + } finally { + if (!success) { + postingsReader.close(); + } + } + + success = false; + try { + FieldsProducer ret = new PrefixCodedTermsReader(indexReader, + state.dir, + state.fieldInfos, + state.segmentInfo.name, + postingsReader, + state.readBufferSize, + BytesRef.getUTF8SortedAsUnicodeComparator(), + StandardCodec.TERMS_CACHE_SIZE); + success = true; + return ret; + } finally { + if (!success) { + try { + postingsReader.close(); + } finally { + indexReader.close(); + } + } + } + } + + @Override + public void files(Directory dir, SegmentInfo segmentInfo, Set files) { + SepPostingsReaderImpl.files(segmentInfo, files); + PrefixCodedTermsReader.files(dir, segmentInfo, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, files); + } + + @Override + public void getExtensions(Set extensions) { + SepPostingsWriterImpl.getExtensions(extensions); + PrefixCodedTermsReader.getExtensions(extensions); + FixedGapTermsIndexReader.getIndexExtensions(extensions); + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Tue Nov 02 18:19:28 2010 -0400 @@ -27,6 +27,7 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldsEnum; @@ -42,6 +43,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.index.codecs.standard.DefaultSkipListReader; /** Exposes flex API on a pre-flex index, as a codec. * @lucene.experimental */ @@ -956,6 +958,17 @@ } return docsPosEnum.reset(termEnum, skipDocs); } + + @Override + public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException { + PreBulkPostingsEnum postingsEnum; + if (reuse == null || !(reuse instanceof PreBulkPostingsEnum) || !((PreBulkPostingsEnum) reuse).canReuse(fieldInfo, freqStream, doFreqs, doPositions)) { + postingsEnum = new PreBulkPostingsEnum(fieldInfo.omitTermFreqAndPositions, doFreqs, doPositions); + } else { + postingsEnum = (PreBulkPostingsEnum) reuse; + } + return postingsEnum.reset(fieldInfo, termEnum); + } } private final class PreDocsEnum extends DocsEnum { @@ -1088,4 +1101,324 @@ return payload; } } + + static final int BULK_BUFFER_SIZE = 64; + + // Bulk postings API + private final class PreBulkPostingsEnum extends BulkPostingsEnum { + private final IndexInput freqIn; + private final IndexInput proxIn; + + final IndexInput startFreqIn; + private final boolean omitTF; + + boolean storePayloads; // does current field store payloads? + + int ord; // how many docs we've read + int docFreq; + + long freqOffset; + long proxOffset; + int skipOffset; + + boolean skipped; + DefaultSkipListReader skipper; + private int payloadLength; + + private final DocDeltasReader docDeltasReader; + private final FreqsReader freqsReader; + private final PositionsReader positionDeltasReader; + + private boolean docsPending, freqsPending; + + public PreBulkPostingsEnum(boolean omitTF, boolean doFreqs, boolean doPositions) throws IOException { + startFreqIn = PreFlexFields.this.freqStream; + this.freqIn = (IndexInput) PreFlexFields.this.freqStream.clone(); + this.omitTF = omitTF; + + docDeltasReader = new DocDeltasReader(); + if (doFreqs && !omitTF) { + freqsReader = new FreqsReader(); + } else { + freqsReader = null; + } + + if (doPositions && !omitTF) { + this.proxIn = (IndexInput) PreFlexFields.this.proxStream.clone(); + positionDeltasReader = new PositionsReader(); + } else { + this.proxIn = null; + positionDeltasReader = null; + } + } + + public boolean canReuse(FieldInfo fieldInfo, IndexInput freqin, boolean doFreqs, boolean doPositions) { + return freqIn == startFreqIn && + (!doFreqs || freqsReader == null) && + (!doPositions || positionDeltasReader == null) && + (omitTF == fieldInfo.omitTermFreqAndPositions); + } + + final void read() throws IOException { + try { + if (freqsReader == null) { + // Consumer only wants doc deltas + assert !docsPending; + if (omitTF) { + // Index only stores doc deltas + for(int i=0;i>> 1; + if ((code & 1) == 0) { + freqIn.readVInt(); + } + } + } + docsPending = true; + } else { + // Consumer wants both + assert !docsPending; + assert !freqsPending; + for(int i=0;i>> 1; + if ((code & 1) == 0) { + freqsReader.buffer[i] = freqIn.readVInt(); + } else { + freqsReader.buffer[i] = 1; + } + } + docsPending = true; + freqsPending = true; + } + ord += BULK_BUFFER_SIZE; + } catch (IOException ioe) { + if (freqIn.getFilePointer() != freqIn.length()) { + throw ioe; + } + } + } + + class DocDeltasReader extends BulkPostingsEnum.BlockReader { + private final int[] buffer = new int[BULK_BUFFER_SIZE]; + private int limit; + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int end() { + return limit; + } + + @Override + public int fill() throws IOException { + if (!docsPending) { + read(); + } + docsPending = false; + limit = BULK_BUFFER_SIZE; + return BULK_BUFFER_SIZE; + } + + @Override + public int offset() { + return 0; + } + + @Override + public void setOffset(int offset) { + assert offset == 0; + } + } + + class FreqsReader extends BulkPostingsEnum.BlockReader { + private final int[] buffer = new int[BULK_BUFFER_SIZE]; + private int limit; + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int end() { + return limit; + } + + @Override + public int fill() throws IOException { + if (!freqsPending) { + read(); + } + freqsPending = false; + limit = BULK_BUFFER_SIZE; + return BULK_BUFFER_SIZE; + } + + @Override + public int offset() { + return 0; + } + + @Override + public void setOffset(int offset) { + assert offset == 0; + } + } + + class PositionsReader extends BulkPostingsEnum.BlockReader { + final int[] buffer = new int[BULK_BUFFER_SIZE]; + int limit; + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int end() { + return limit; + } + + @Override + public int fill() throws IOException { + // nocommit -- must "handle" EOF here -- cannot + // change old index format! + if (storePayloads) { + for(int i=0;i>> 1; + if ((code & 1) != 0) { + payloadLength = proxIn.readVInt(); + } + if (payloadLength != 0) { + // skip payload + proxIn.seek(proxIn.getFilePointer()+payloadLength); + } + } + } else { + for(int i=0;i 0) { + + // There are enough docs in the posting to have + // skip data + + if (skipper == null) { + // This is the first time this enum has ever been used for skipping -- do lazy init + skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), tis.getMaxSkipLevels(), tis.getSkipInterval()); + } + + if (!skipped) { + + // This is the first time this posting has + // skipped since reset() was called, so now we + // load the skip data for this posting + skipper.init(freqOffset + skipOffset, + freqOffset, proxOffset, + docFreq, storePayloads); + + skipped = true; + } + + final int newOrd = skipper.skipTo(target); + + // nocommit rename ord -> count + assert curCount == ord: "ord=" + ord + " curCount=" + curCount; + + if (newOrd > ord) { + + // Skipper moved + freqIn.seek(skipper.getFreqPointer()); + if (freqsReader != null) { + freqsReader.limit = 0; + } + docDeltasReader.limit = 0; + + if (positionDeltasReader != null) { + positionDeltasReader.limit = 0; + proxIn.seek(skipper.getProxPointer()); + } + + jumpResult.count = ord = newOrd; + jumpResult.docID = skipper.getDoc(); + + return jumpResult; + } + } + + // no jump occurred + return null; + } + } } diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java --- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Tue Nov 02 18:19:28 2010 -0400 @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.codecs.TermState; @@ -30,6 +31,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.ArrayUtil; /** Concrete class that reads the current doc/freq/skip * postings format @@ -200,6 +202,27 @@ } } + // TODO: we could actually reuse, by having TL that + // holds the last wrapped reuse, and vice-versa + @Override + public BulkPostingsEnum bulkPostings(FieldInfo field, TermState _termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException { + PulsingTermState termState = (PulsingTermState) _termState; + if (termState.docFreq <= maxPulsingDocFreq) { + if (reuse instanceof PulsingBulkPostingsEnum && ((PulsingBulkPostingsEnum) reuse).docDeltas.length == maxPulsingDocFreq) { + return ((PulsingBulkPostingsEnum) reuse).reset(termState, doFreqs, doPositions); + } else { + PulsingBulkPostingsEnum postingsEnum = new PulsingBulkPostingsEnum(maxPulsingDocFreq); + return postingsEnum.reset(termState, doFreqs, doPositions); + } + } else { + if (reuse instanceof PulsingBulkPostingsEnum) { + return wrappedPostingsReader.bulkPostings(field, termState.wrappedTermState, null, doFreqs, doPositions); + } else { + return wrappedPostingsReader.bulkPostings(field, termState.wrappedTermState, reuse, doFreqs, doPositions); + } + } + } + // TODO: -- not great that we can't always reuse @Override public DocsAndPositionsEnum docsAndPositions(FieldInfo field, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { @@ -226,8 +249,6 @@ private Document doc; private PulsingTermState state; - public void close() {} - PulsingDocsEnum reset(Bits skipDocs, PulsingTermState termState) { // TODO: -- not great we have to clone here -- // merging is wasteful; TermRangeQuery too @@ -290,6 +311,159 @@ } } + static class PulsingBulkPostingsEnum extends BulkPostingsEnum { + private Document doc; + private PulsingTermState state; + private int numDocs; + private final int[] docDeltas; + private final int[] freqs; + private int[] positionDeltas; + private int numPositions; + private boolean doFreqs; + private boolean doPositions; + + public PulsingBulkPostingsEnum(int maxFreq) { + docDeltas = new int[maxFreq]; + freqs = new int[maxFreq]; + positionDeltas = new int[maxFreq]; + } + + PulsingBulkPostingsEnum reset(PulsingTermState termState, boolean doFreqs, boolean doPositions) { + numDocs = termState.docFreq; + this.doFreqs = doFreqs; + this.doPositions = doPositions; + assert numDocs <= docDeltas.length; + int lastDocID = 0; + numPositions = 0; + for(int i=0;i 0; + if (doPositions) { + final Position[] positions = termState.docs[i].positions; + int lastPos = 0; + for(int posIndex=0;posIndex> 1; - } else { - final long v2 = next(); - return (v2 << 30) | (v >> 1); - } - } - - /** Reads next chunk of ints */ - private IntsRef bulkResult; - - /** Read up to count ints. */ - public IntsRef read(int count) throws IOException { - if (bulkResult == null) { - bulkResult = new IntsRef(); - bulkResult.ints = new int[count]; - } else { - bulkResult.grow(count); - } - for(int i=0;i= docDeltaLimit) { + docDeltaLimit = docReader.fill(); + } if (!omitTF) { freqIndex.read(docReader, true); freqIndex.seek(freqReader); + freqUpto = freqReader.offset(); + freqLimit = freqReader.end(); + if (freqUpto >= freqLimit) { + freqLimit = freqReader.fill(); + } + //System.out.println(" freqIndex=" + freqIndex + " posIndex=" + posIndex); posIndex.read(docReader, true); + // nocommit -- only store this if storePayloads is true // skip payload offset docReader.readVLong(); } else { freq = 1; } + skipOffset = docReader.readVLong(); + docDeltaUpto = docReader.offset(); + docDeltaLimit = docReader.end(); + docFreq = termState.docFreq; + assert docFreq > 0; count = 0; doc = 0; skipped = false; + //System.out.println(" docFreq=" + docFreq); return this; } + public boolean canReuse(IntIndexInput docsIn) { + return startDocIn == docsIn; + } + @Override public int nextDoc() throws IOException { + //System.out.println(" sep.nextDoc"); while(true) { if (count == docFreq) { return doc = NO_MORE_DOCS; } + assert docDeltaUpto <= docDeltaLimit: "docDeltaUpto=" + docDeltaUpto + " docDeltaLimit=" + docDeltaLimit; + + if (docDeltaUpto == docDeltaLimit) { + // refill + //System.out.println(" fill docs"); + docDeltaLimit = docReader.fill(); + docDeltaUpto = 0; + } + count++; // Decode next doc - doc += docReader.next(); + doc += docDeltaBuffer[docDeltaUpto++]; + //System.out.println(" doc="+ doc + " docDeltaUpto=" + (docDeltaUpto-1) + " skipDocs=" + skipDocs + " deleted?=" + (skipDocs != null && skipDocs.get(doc))); if (!omitTF) { - freq = freqReader.next(); + if (freqUpto == freqLimit) { + // refill + //System.out.println(" fill freqs"); + freqLimit = freqReader.fill(); + freqUpto = 0; + } + + freq = freqBuffer[freqUpto++]; } if (skipDocs == null || !skipDocs.get(doc)) { @@ -303,27 +355,246 @@ } @Override - public int read() throws IOException { - // TODO: -- switch to bulk read api in IntIndexInput - final int[] docs = bulkResult.docs.ints; - final int[] freqs = bulkResult.freqs.ints; - int i = 0; - final int length = docs.length; - while (i < length && count < docFreq) { - count++; - // manually inlined call to next() for speed - doc += docReader.next(); - if (!omitTF) { - freq = freqReader.next(); + public int freq() { + return freq; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int advance(int target) throws IOException { + //System.out.println("SepDocsEnum.advance target=" + target); + + // TODO: jump right to next() if target is < X away + // from where we are now? + //System.out.println("SepDocsEnum.advance target=" + target); + + if (docFreq >= skipInterval) { + + // There are enough docs in the posting to have + // skip data + + if (skipper == null) { + // This DocsEnum has never done any skipping + //System.out.println(" init skipper"); + skipper = new SepSkipListReader((IndexInput) skipIn.clone(), + freqIn, + docIn, + posIn, + maxSkipLevels, skipInterval); + } - if (skipDocs == null || !skipDocs.get(doc)) { - docs[i] = doc; - freqs[i] = freq; - i++; + if (!skipped) { + //System.out.println(" init skipper2"); + // We haven't yet skipped for this posting + skipper.init(skipOffset, + docIndex, + freqIndex, + posIndex, + 0, + docFreq, + storePayloads); + skipper.setOmitTF(omitTF); + + skipped = true; + } + + final int newCount = skipper.skipTo(target); + + if (newCount > count) { + // Skipper did move + if (!omitTF) { + skipper.getFreqIndex().seek(freqReader); + freqUpto = freqReader.offset(); + freqLimit = freqReader.end(); + if (freqUpto >= freqLimit) { + freqLimit = freqReader.fill(); + } + } + skipper.getDocIndex().seek(docReader); + docDeltaUpto = docReader.offset(); + docDeltaLimit = docReader.end(); + if (docDeltaUpto >= docDeltaLimit) { + docDeltaLimit = docReader.fill(); + } + + count = newCount; + doc = skipper.getDoc(); + //System.out.println(" did move count=" + newCount + " doc=" + doc); } } - return i; + + // Now, linear scan for the rest: + do { + if (nextDoc() == NO_MORE_DOCS) { + return NO_MORE_DOCS; + } + } while (target > doc); + + return doc; + } + } + + class SepDocsAndPositionsEnum extends DocsAndPositionsEnum { + int docFreq; + int doc; + int count; + int freq; + + private boolean storePayloads; + private Bits skipDocs; + private final BulkPostingsEnum.BlockReader docReader; + private final int[] docDeltaBuffer; + private int docDeltaUpto; + private int docDeltaLimit; + private final BulkPostingsEnum.BlockReader freqReader; + private final int[] freqBuffer; + private int freqUpto; + private int freqLimit; + private final BulkPostingsEnum.BlockReader posReader; + private final int[] posBuffer; + private int posUpto; + private int posLimit; + private long skipOffset; + private long payloadOffset; + + private final IndexInput payloadIn; + + private final IntIndexInput.Index docIndex; + private final IntIndexInput.Index freqIndex; + private final IntIndexInput.Index posIndex; + private final IntIndexInput startDocIn; + + private int pendingPosCount; + private int position; + private int payloadLength; + private long pendingPayloadBytes; + private boolean payloadPending; + private boolean posSeekPending; + + boolean skipped; + SepSkipListReader skipper; + + public SepDocsAndPositionsEnum() throws IOException { + startDocIn = docIn; + docReader = docIn.reader(); + docDeltaBuffer = docReader.getBuffer(); + docIndex = docIn.index(); + freqReader = freqIn.reader(); + freqBuffer = freqReader.getBuffer(); + freqIndex = freqIn.index(); + posReader = posIn.reader(); + posBuffer = posReader.getBuffer(); + posIndex = posIn.index(); + payloadIn = (IndexInput) SepPostingsReaderImpl.this.payloadIn.clone(); + } + + // nocommit -- somehow we have to prevent re-decode of + // the same block if we have just .next()'d to next term + // in the terms dict -- this is an O(N^2) cost to eg + // TermRangeQuery when it steps through low freq terms!! + SepDocsAndPositionsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits skipDocs) throws IOException { + this.skipDocs = skipDocs; + //System.out.println("sep d&p init"); + assert !fieldInfo.omitTermFreqAndPositions; + storePayloads = fieldInfo.storePayloads; + + // TODO: can't we only do this if consumer + // skipped consuming the previous docs? + docIndex.set(termState.docIndex); + // nocommit -- verify, during merge, this seek is + // sometimes w/in block: + docIndex.seek(docReader); + docDeltaLimit = docReader.end(); + docDeltaUpto = docReader.offset(); + if (docDeltaUpto >= docDeltaLimit) { + docDeltaLimit = docReader.fill(); + } + + freqIndex.read(docReader, true); + freqIndex.seek(freqReader); + freqLimit = freqReader.end(); + freqUpto = freqReader.offset(); + if (freqUpto >= freqLimit) { + //System.out.println(" re-fill freqs freqMax=" + freqLimit); + freqLimit = freqReader.fill(); + } + //System.out.println(" freqIndex=" + freqIndex); + + posIndex.read(docReader, true); + posSeekPending = true; + payloadPending = false; + + payloadOffset = docReader.readVLong(); + //System.out.println(" payloadOffset=" + payloadOffset); + skipOffset = docReader.readVLong(); + //System.out.println(" skipOffset=" + skipOffset); + + docDeltaLimit = docReader.end(); + docDeltaUpto = docReader.offset(); + /* + if (docDeltaUpto >= docDeltaLimit) { + // nocommit -- needed anymore? + docDeltaLimit = docReader.fill(); + docDeltaUpto = 0; + } + */ + + docFreq = termState.docFreq; + assert docFreq > 0; + count = 0; + doc = 0; + pendingPosCount = 0; + pendingPayloadBytes = 0; + skipped = false; + + //System.out.println(" docUpto=" + docDeltaUpto + " docMax=" + docDeltaLimit + " freqUpto=" + freqUpto + " freqMax=" + freqLimit); + + return this; + } + + public boolean canReuse(IntIndexInput docsIn) { + return startDocIn == docsIn; + } + + @Override + public int nextDoc() throws IOException { + while(true) { + if (count == docFreq) { + return doc = NO_MORE_DOCS; + } + + if (docDeltaUpto == docDeltaLimit) { + // refill + docDeltaLimit = docReader.fill(); + docDeltaUpto = 0; + } + + count++; + + // Decode next doc + doc += docDeltaBuffer[docDeltaUpto++]; + + if (freqUpto == freqLimit) { + // refill + freqLimit = freqReader.fill(); + freqUpto = 0; + } + + freq = freqBuffer[freqUpto++]; + pendingPosCount += freq; + + if (skipDocs == null || !skipDocs.get(doc)) { + break; + } + } + + position = 0; + return doc; } @Override @@ -363,180 +634,9 @@ docIndex, freqIndex, posIndex, - 0, - docFreq, - storePayloads); - skipper.setOmitTF(omitTF); - - skipped = true; - } - - final int newCount = skipper.skipTo(target); - - if (newCount > count) { - - // Skipper did move - if (!omitTF) { - skipper.getFreqIndex().seek(freqReader); - } - skipper.getDocIndex().seek(docReader); - count = newCount; - doc = skipper.getDoc(); - } - } - - // Now, linear scan for the rest: - do { - if (nextDoc() == NO_MORE_DOCS) { - return NO_MORE_DOCS; - } - } while (target > doc); - - return doc; - } - } - - class SepDocsAndPositionsEnum extends DocsAndPositionsEnum { - int docFreq; - int doc; - int count; - int freq; - long freqStart; - - private boolean storePayloads; - private Bits skipDocs; - private final IntIndexInput.Reader docReader; - private final IntIndexInput.Reader freqReader; - private final IntIndexInput.Reader posReader; - private final IndexInput payloadIn; - private long skipOffset; - - private final IntIndexInput.Index docIndex; - private final IntIndexInput.Index freqIndex; - private final IntIndexInput.Index posIndex; - private final IntIndexInput startDocIn; - - private long payloadOffset; - - private int pendingPosCount; - private int position; - private int payloadLength; - private long pendingPayloadBytes; - - private boolean skipped; - private SepSkipListReader skipper; - private boolean payloadPending; - private boolean posSeekPending; - - SepDocsAndPositionsEnum() throws IOException { - startDocIn = docIn; - docReader = docIn.reader(); - docIndex = docIn.index(); - freqReader = freqIn.reader(); - freqIndex = freqIn.index(); - posReader = posIn.reader(); - posIndex = posIn.index(); - payloadIn = (IndexInput) SepPostingsReaderImpl.this.payloadIn.clone(); - } - - SepDocsAndPositionsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits skipDocs) throws IOException { - this.skipDocs = skipDocs; - storePayloads = fieldInfo.storePayloads; - - // TODO: can't we only do this if consumer - // skipped consuming the previous docs? - docIndex.set(termState.docIndex); - docIndex.seek(docReader); - - freqIndex.read(docReader, true); - freqIndex.seek(freqReader); - - posIndex.read(docReader, true); - posSeekPending = true; - payloadPending = false; - - payloadOffset = docReader.readVLong(); - skipOffset = docReader.readVLong(); - - docFreq = termState.docFreq; - count = 0; - doc = 0; - pendingPosCount = 0; - pendingPayloadBytes = 0; - skipped = false; - - return this; - } - - @Override - public int nextDoc() throws IOException { - - while(true) { - if (count == docFreq) { - return doc = NO_MORE_DOCS; - } - - count++; - - // TODO: maybe we should do the 1-bit trick for encoding - // freq=1 case? - - // Decode next doc - doc += docReader.next(); - - freq = freqReader.next(); - - pendingPosCount += freq; - - if (skipDocs == null || !skipDocs.get(doc)) { - break; - } - } - - position = 0; - return doc; - } - - @Override - public int freq() { - return freq; - } - - @Override - public int docID() { - return doc; - } - - @Override - public int advance(int target) throws IOException { - - // TODO: jump right to next() if target is < X away - // from where we are now? - - if (docFreq >= skipInterval) { - - // There are enough docs in the posting to have - // skip data - - if (skipper == null) { - // This DocsEnum has never done any skipping - skipper = new SepSkipListReader((IndexInput) skipIn.clone(), - freqIn, - docIn, - posIn, - maxSkipLevels, skipInterval); - } - - if (!skipped) { - // We haven't yet skipped for this posting - skipper.init(skipOffset, - docIndex, - freqIndex, - posIndex, payloadOffset, docFreq, storePayloads); - skipped = true; } @@ -546,13 +646,24 @@ // Skipper did move skipper.getFreqIndex().seek(freqReader); + freqUpto = freqReader.offset(); + freqLimit = freqReader.end(); + if (freqUpto >= freqLimit) { + freqLimit = freqReader.fill(); + } + skipper.getDocIndex().seek(docReader); - //skipper.getPosIndex().seek(posReader); + docDeltaUpto = docReader.offset(); + docDeltaLimit = docReader.end(); + if (docDeltaUpto >= docDeltaLimit) { + docDeltaLimit = docReader.fill(); + } + posIndex.set(skipper.getPosIndex()); posSeekPending = true; count = newCount; doc = skipper.getDoc(); - //payloadIn.seek(skipper.getPayloadPointer()); + payloadOffset = skipper.getPayloadPointer(); pendingPosCount = 0; pendingPayloadBytes = 0; @@ -575,6 +686,11 @@ public int nextPosition() throws IOException { if (posSeekPending) { posIndex.seek(posReader); + posLimit = posReader.end(); + posUpto = posReader.offset(); + if (posUpto >= posLimit) { + posLimit = posReader.fill(); + } payloadIn.seek(payloadOffset); posSeekPending = false; } @@ -582,10 +698,12 @@ // scan over any docs that were iterated without their // positions while (pendingPosCount > freq) { - final int code = posReader.next(); + + final int code = nextPosInt(); + if (storePayloads && (code & 1) != 0) { // Payload length has changed - payloadLength = posReader.next(); + payloadLength = nextPosInt(); assert payloadLength >= 0; } pendingPosCount--; @@ -593,11 +711,12 @@ pendingPayloadBytes += payloadLength; } - final int code = posReader.next(); + final int code = nextPosInt(); + if (storePayloads) { if ((code & 1) != 0) { // Payload length has changed - payloadLength = posReader.next(); + payloadLength = nextPosInt(); assert payloadLength >= 0; } position += code >> 1; @@ -612,6 +731,14 @@ return position; } + private int nextPosInt() throws IOException { + if (posUpto == posLimit) { + posLimit = posReader.fill(); + posUpto = 0; + } + return posBuffer[posUpto++]; + } + private BytesRef payload; @Override @@ -645,4 +772,261 @@ return payloadPending && payloadLength > 0; } } + + class SepBulkPostingsEnum extends BulkPostingsEnum { + private int docFreq; + + private final BulkPostingsEnum.BlockReader docReader; + private final IntIndexInput.Index docIndex; + + private final BulkPostingsEnum.BlockReader freqReader; + private final IntIndexInput.Index freqIndex; + + private final BulkPostingsEnum.BlockReader posReader; + private final IntIndexInput.Index posIndex; + + private final boolean storePayloads; + private final boolean omitTF; + private long skipOffset; + + private final IntIndexInput startDocIn; + + private boolean skipped; + private SepSkipListReader skipper; + + public SepBulkPostingsEnum(FieldInfo fieldInfo, boolean doFreq, boolean doPos) throws IOException { + this.storePayloads = fieldInfo.storePayloads; + this.omitTF = fieldInfo.omitTermFreqAndPositions; + startDocIn = docIn; + docReader = docIn.reader(); + docIndex = docIn.index(); + + if (doFreq && !omitTF) { + freqReader = freqIn.reader(); + } else { + freqReader = null; + } + + if (doPos && !omitTF) { + if (storePayloads) { + // Must rewrite each posDelta: + posReader = new PosPayloadReader(posIn.reader()); + } else { + // Pass through + posReader = posIn.reader(); + } + } else { + posReader = null; + } + + if (!omitTF) { + // we have to pull these even if doFreq is false + // just so we can decode the index from the docs + // file + freqIndex = freqIn.index(); + posIndex = posIn.index(); + } else { + posIndex = null; + freqIndex = null; + } + } + + public boolean canReuse(FieldInfo fieldInfo, IntIndexInput docIn, boolean doFreq, boolean doPos) { + return fieldInfo.storePayloads == storePayloads && + startDocIn == docIn && + (freqReader != null || !doFreq) && + (posReader != null || !doPos); + } + + // nocommit -- make sure this is tested!! + + // Only used when payloads were stored -- we cannot do + // pass-through read for this since the payload lengths + // are also encoded into the position deltas + private final class PosPayloadReader extends BulkPostingsEnum.BlockReader { + final BulkPostingsEnum.BlockReader other; + private int pendingOffset; + private int limit; + private boolean skipNext; + + public PosPayloadReader(BulkPostingsEnum.BlockReader other) { + this.other = other; + } + + void doAfterSeek() {} + + @Override + public int[] getBuffer() { + return other.getBuffer(); + } + + // nocommit -- make sure this works correctly in the + // "reuse"/seek case + @Override + public int offset() { + pendingOffset = other.offset(); + return 0; + } + + @Override + public void setOffset(int offset) { + throw new UnsupportedOperationException(); + } + + @Override + public int fill() throws IOException { + // Translate code back to pos deltas, and filter out + // any changes in payload length. NOTE: this is a + // perf hit on indices that encode payloads, even if + // they use "normal" positional queries + final int otherLimit = other.fill(); + limit = 0; + final int[] buffer = other.getBuffer(); + for(int i=pendingOffset;i>> 1; + if ((code & 1) != 0) { + // skip the payload length + skipNext = true; + } + } + } + pendingOffset = 0; + + return limit; + } + + @Override + public int end() { + return limit; + } + } + + /** Position readers to the specified term */ + SepBulkPostingsEnum init(SepTermState termState) throws IOException { + + // nocommit -- make sure seek w/in buffer is efficient + // here: + + // TODO: can't we only do this if consumer + // skipped consuming the previous docs? + docIndex.set(termState.docIndex); + docIndex.seek(docReader); + //System.out.println("sep init offset=" + docReader.offset() + " limit=" + docReader.end() + " omitTF=" + omitTF); + //System.out.println(" v[0]=" + docReader.getBuffer()[0]); + + if (!omitTF) { + freqIndex.read(docReader, true); + if (freqReader != null) { + freqIndex.seek(freqReader); + } + posIndex.read(docReader, true); + // skip payload offset -- nocommit only store this + // if field has payloads + docReader.readVLong(); + } + + skipOffset = docReader.readVLong(); + //System.out.println("skipOffset=" + skipOffset); + + if (posReader != null) { + if (storePayloads) { + PosPayloadReader posPayloadReader = (PosPayloadReader) posReader; + posIndex.seek(posPayloadReader.other); + posPayloadReader.doAfterSeek(); + } else { + posIndex.seek(posReader); + } + } + + if (docReader.offset() >= docReader.end()) { + docReader.fill(); + docReader.setOffset(0); + } + + docFreq = termState.docFreq; + skipped = false; + + return this; + } + + @Override + public BulkPostingsEnum.BlockReader getDocDeltasReader() { + // Maximize perf -- just pass through the underlying + // intblock reader: + return docReader; + } + + @Override + public BulkPostingsEnum.BlockReader getFreqsReader() { + // Maximize perf -- just pass through the underlying + // intblock reader: + return freqReader; + } + + @Override + public BulkPostingsEnum.BlockReader getPositionDeltasReader() { + // Maximize perf -- just pass through the underlying + // intblock reader (if payloads were not indexed): + return posReader; + } + + private final JumpResult jumpResult = new JumpResult(); + + @Override + public JumpResult jump(int target, int curCount) throws IOException { + + if (docFreq >= skipInterval) { + + // There are enough docs in the posting to have + // skip data + + if (skipper == null) { + // This enum has never done any skipping + skipper = new SepSkipListReader((IndexInput) skipIn.clone(), + freqIn, + docIn, + posIn, + maxSkipLevels, skipInterval); + } + + if (!skipped) { + // We haven't yet skipped for this particular posting + skipper.init(skipOffset, + docIndex, + freqIndex, + posIndex, + 0, + docFreq, + storePayloads); + skipper.setOmitTF(omitTF); + skipped = true; + } + + final int newCount = skipper.skipTo(target); + //System.out.println(" sep skip newCount=" + newCount + " vs count=" + curCount); + + if (newCount > curCount) { + + // Skipper did move -- seek all readers: + skipper.getDocIndex().seek(docReader); + + if (freqReader != null) { + skipper.getFreqIndex().seek(freqReader); + } + if (posReader != null) { + skipper.getPosIndex().seek(posReader); + } + + jumpResult.count = newCount; + jumpResult.docID = skipper.getDoc(); + return jumpResult; + } + } + return null; + } + } } diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Tue Nov 02 18:19:28 2010 -0400 @@ -181,6 +181,7 @@ posIndex.write(docOut, true); docOut.writeVLong(payloadStart); } + // nocommit -- only write if docFreq > skipInterval? docOut.writeVLong(skipOut.getFilePointer()); firstDoc = false; } @@ -199,6 +200,7 @@ } lastDocID = docID; + //System.out.println("sepw: write docID=" + docID); docOut.write(delta); if (!omitTF) { freqOut.write(termDocFreq); diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java Tue Nov 02 18:19:28 2010 -0400 @@ -108,10 +108,10 @@ for(int i=0;i getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } @@ -447,6 +459,189 @@ } } + private class SimpleTextBulkPostingsEnum extends BulkPostingsEnum { + private final IndexInput inStart; + private final IndexInput in; + private final LineCountReader docDeltasReader; + private final FreqsReader freqsReader; + private final LineCountReader positionDeltasReader; + + public SimpleTextBulkPostingsEnum(boolean doFreq, boolean doPositions) { + this.inStart = SimpleTextFieldsReader.this.in; + this.in = (IndexInput) this.inStart.clone(); + docDeltasReader = new LineCountReader(DOC); + if (doFreq) { + freqsReader = new FreqsReader(); + } else { + freqsReader = null; + } + + if (doPositions) { + positionDeltasReader = new LineCountReader(POS); + } else { + positionDeltasReader = null; + } + } + + public boolean canReuse(IndexInput in, boolean doFreq, boolean doPositions) { + return in == inStart && (doFreq == (freqsReader != null)) && (doPositions == (positionDeltasReader != null)); + } + + // reads docDeltas & positionDeltas + private class LineCountReader extends BlockReader { + private final BytesRef prefix; + private final int[] buffer = new int[64]; + private final IndexInput in; + private final BytesRef scratch = new BytesRef(10); + private int lastValue; + private int limit; + + public LineCountReader(BytesRef prefix) { + this.prefix = prefix; + this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone(); + } + + public void reset(long fp) throws IOException { + lastValue = 0; + in.seek(fp); + fill(); + } + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int offset() { + return 0; + } + + @Override + public void setOffset(int offset) { + assert offset == 0; + } + + @Override + public int end() { + return limit; + } + + @Override + public int fill() throws IOException { + int upto = 0; + while(upto < buffer.length) { + readLine(in, scratch); + if (scratch.startsWith(TERM) || scratch.startsWith(FIELD) || scratch.equals(END)) { + break; + } else if (scratch.startsWith(prefix)) { + final int value = Integer.parseInt(new String(scratch.bytes, scratch.offset+prefix.length, scratch.length-prefix.length)); + buffer[upto++] = value - lastValue; + lastValue = value; + } + } + return limit = upto; + } + } + + private class FreqsReader extends BlockReader { + private final int[] buffer = new int[64]; + private final IndexInput in; + private final BytesRef scratch = new BytesRef(10); + private int limit; + private boolean omitTF; + + public FreqsReader() { + this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone(); + } + + public void reset(long fp, boolean omitTF) throws IOException { + in.seek(fp); + this.omitTF = omitTF; + fill(); + } + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int offset() { + return 0; + } + + @Override + public void setOffset(int offset) { + assert offset == 0; + } + + @Override + public int end() { + return limit; + } + + @Override + public int fill() throws IOException { + int upto = 0; + int freq = -1; + long lastFP = in.getFilePointer(); + while(upto < buffer.length) { + lastFP = in.getFilePointer(); + readLine(in, scratch); + if (scratch.startsWith(TERM) || scratch.startsWith(FIELD) || scratch.equals(END)) { + if (freq != -1) { + buffer[upto++] = omitTF ? 1 : freq; + } + break; + } else if (scratch.startsWith(DOC)) { + if (freq != -1) { + buffer[upto++] = omitTF ? 1: freq; + } + freq = 0; + } else if (scratch.startsWith(POS)) { + freq++; + } + } + in.seek(lastFP); + return limit = upto; + } + } + + public SimpleTextBulkPostingsEnum reset(long fp, boolean omitTF) throws IOException { + + docDeltasReader.reset(fp); + + if (freqsReader != null) { + freqsReader.reset(fp, omitTF); + } + if (positionDeltasReader != null) { + positionDeltasReader.reset(fp); + } + return this; + } + + @Override + public BlockReader getDocDeltasReader() { + return docDeltasReader; + } + + @Override + public BlockReader getPositionDeltasReader() { + return positionDeltasReader; + } + + @Override + public BlockReader getFreqsReader() { + return freqsReader; + } + + @Override + public JumpResult jump(int target, int curCount) { + return null; + } + } + private class SimpleTextTerms extends Terms { private final String field; private final long termsStart; diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java --- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Tue Nov 02 18:19:28 2010 -0400 @@ -24,6 +24,7 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.codecs.PostingsReaderBase; @@ -171,6 +172,17 @@ } @Override + public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, TermState termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException { + SegmentBulkPostingsEnum postingsEnum; + if (reuse == null || !(reuse instanceof SegmentBulkPostingsEnum) || !((SegmentBulkPostingsEnum) reuse).canReuse(fieldInfo, freqIn, doFreqs, doPositions)) { + postingsEnum = new SegmentBulkPostingsEnum(fieldInfo.omitTermFreqAndPositions, doFreqs, doPositions); + } else { + postingsEnum = (SegmentBulkPostingsEnum) reuse; + } + return postingsEnum.reset(fieldInfo, (DocTermState) termState); + } + + @Override public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { if (fieldInfo.omitTermFreqAndPositions) { return null; @@ -231,6 +243,7 @@ // cases freqIn.seek(termState.freqOffset); limit = termState.docFreq; + assert limit > 0; ord = 0; doc = 0; @@ -265,7 +278,6 @@ break; } } - return doc; } @@ -341,13 +353,14 @@ } final int newOrd = skipper.skipTo(target); - + //System.out.println(" newOrd=" + newOrd + " vs ord=" + ord); + if (newOrd > ord) { // Skipper moved - ord = newOrd; doc = skipper.getDoc(); freqIn.seek(skipper.getFreqPointer()); + //System.out.println(" go lastDoc=" + doc); } } @@ -360,6 +373,330 @@ } } + static final int BULK_BUFFER_SIZE = 64; + + // Bulk postings API + private final class SegmentBulkPostingsEnum extends BulkPostingsEnum { + private final IndexInput freqIn; + private final IndexInput proxIn; + + final IndexInput startFreqIn; + private final boolean omitTF; + + boolean storePayloads; // does current field store payloads? + + int ord; // how many docs we've read + int docFreq; + + long freqOffset; + long proxOffset; + int skipOffset; + + boolean skipped; + DefaultSkipListReader skipper; + private int payloadLength; + + private final DocDeltasReader docDeltasReader; + private final FreqsReader freqsReader; + private final PositionsReader positionDeltasReader; + + private boolean docsPending, freqsPending; + + public SegmentBulkPostingsEnum(boolean omitTF, boolean doFreqs, boolean doPositions) throws IOException { + //System.out.println("bulk init"); + startFreqIn = StandardPostingsReader.this.freqIn; + this.freqIn = (IndexInput) StandardPostingsReader.this.freqIn.clone(); + this.omitTF = omitTF; + + docDeltasReader = new DocDeltasReader(); + if (doFreqs && !omitTF) { + freqsReader = new FreqsReader(); + } else { + freqsReader = null; + } + + if (doPositions && !omitTF) { + this.proxIn = (IndexInput) StandardPostingsReader.this.proxIn.clone(); + positionDeltasReader = new PositionsReader(); + } else { + this.proxIn = null; + positionDeltasReader = null; + } + } + + public boolean canReuse(FieldInfo fieldInfo, IndexInput freqin, boolean doFreqs, boolean doPositions) { + return freqIn == startFreqIn && + (!doFreqs || freqsReader == null) && + (!doPositions || positionDeltasReader == null) && + (omitTF == fieldInfo.omitTermFreqAndPositions); + } + + final void read() throws IOException { + if (freqsReader == null) { + // Consumer only wants doc deltas + assert !docsPending; + if (omitTF) { + // Index only stores doc deltas + for(int i=0;i>> 1; + if ((code & 1) == 0) { + freqIn.readVInt(); + } + } + } + ord += BULK_BUFFER_SIZE; + docsPending = true; + } else { + // Consumer wants both + assert !docsPending; + assert !freqsPending; + for(int i=0;i>> 1; + if ((code & 1) == 0) { + freqsReader.buffer[i] = freqIn.readVInt(); + } else { + freqsReader.buffer[i] = 1; + } + } + ord += BULK_BUFFER_SIZE; + docsPending = true; + freqsPending = true; + } + } + + private class DocDeltasReader extends BulkPostingsEnum.BlockReader { + final int[] buffer = new int[BULK_BUFFER_SIZE]; + int limit; + int offset; + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int end() { + return limit; + } + + @Override + public int fill() throws IOException { + if (!docsPending) { + read(); + } + docsPending = false; + limit = BULK_BUFFER_SIZE; + offset = 0; + //System.out.println("spr: doc deltas read limit=" + limit); + return BULK_BUFFER_SIZE; + } + + @Override + public int offset() { + return offset; + } + + @Override + public void setOffset(int offset) { + this.offset = offset; + } + } + + private class FreqsReader extends BulkPostingsEnum.BlockReader { + final int[] buffer = new int[BULK_BUFFER_SIZE]; + int limit; + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int end() { + return limit; + } + + @Override + public int fill() throws IOException { + if (!freqsPending) { + read(); + } + freqsPending = false; + limit = BULK_BUFFER_SIZE; + return BULK_BUFFER_SIZE; + } + + @Override + public int offset() { + return 0; + } + + @Override + public void setOffset(int offset) { + throw new UnsupportedOperationException(); + } + } + + private class PositionsReader extends BulkPostingsEnum.BlockReader { + final int[] buffer = new int[BULK_BUFFER_SIZE]; + int limit; + + @Override + public int[] getBuffer() { + return buffer; + } + + @Override + public int end() { + return limit; + } + + @Override + public int fill() throws IOException { + // nocommit -- must flush prx file w/ extra 127 0 + // positions -- index change!! + if (storePayloads) { + for(int i=0;i>> 1; + if ((code & 1) != 0) { + payloadLength = proxIn.readVInt(); + } + if (payloadLength != 0) { + // skip payload + proxIn.seek(proxIn.getFilePointer()+payloadLength); + } + } + } else { + for(int i=0;i 0; + + ord = 0; + skipped = false; + + return this; + } + + private final JumpResult jumpResult = new JumpResult(); + + @Override + public JumpResult jump(int target, int curCount) throws IOException { + + // TODO: jump right to next() if target is < X away + // from where we are now? + + if (skipOffset > 0) { + + // There are enough docs in the posting to have + // skip data + + if (skipper == null) { + // This is the first time this enum has ever been used for skipping -- do lazy init + skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval); + } + + if (!skipped) { + + // This is the first time this posting has + // skipped since reset() was called, so now we + // load the skip data for this posting + skipper.init(freqOffset + skipOffset, + freqOffset, proxOffset, + docFreq, storePayloads); + + skipped = true; + } + + final int newOrd = skipper.skipTo(target); + + // nocommit rename ord -> count + assert curCount == ord: "curCount=" + curCount + " ord=" + ord; + + if (newOrd > ord) { + // Skipper moved + //System.out.println("newOrd=" + newOrd + " vs ord=" + ord + " doc=" + skipper.getDoc()); + + freqIn.seek(skipper.getFreqPointer()); + docDeltasReader.limit = 0; + + if (freqsReader != null) { + freqsReader.limit = 0; + } + + if (positionDeltasReader != null) { + positionDeltasReader.limit = 0; + proxIn.seek(skipper.getProxPointer()); + } + + jumpResult.count = ord = newOrd; + jumpResult.docID = skipper.getDoc(); + + return jumpResult; + } + } + + // no jump occurred + return null; + } + } + // Decodes docs & positions private class SegmentDocsAndPositionsEnum extends DocsAndPositionsEnum { final IndexInput startFreqIn; @@ -412,6 +749,7 @@ lazyProxPointer = termState.proxOffset; limit = termState.docFreq; + assert limit > 0; ord = 0; doc = 0; position = 0; diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java --- a/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Tue Nov 02 18:19:28 2010 -0400 @@ -224,6 +224,16 @@ @Override public void close() throws IOException { + + // Readers read whole blocks at once, so we have to + // flush final block out w/ unused values: + for(int i=0;i= minNrShouldMatch) { // TODO: re-enable this if BQ ever sends us required clauses - // (current.bits & requiredMask) == requiredMask && + // (current.bits & requiredMask) == requiredMask + // && + //System.out.println(" BS.nextDoc return doc=" + current.doc); return doc = current.doc; } } @@ -327,6 +342,7 @@ } } while (bucketTable.first != null || more); + //System.out.println(" bs done nextDoc"); return doc = NO_MORE_DOCS; } @@ -351,7 +367,7 @@ buffer.append(")"); return buffer.toString(); } - + @Override protected void visitSubScorers(Query parent, Occur relationship, ScorerVisitor visitor) { super.visitSubScorers(parent, relationship, visitor); diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java --- a/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java Tue Nov 02 18:19:28 2010 -0400 @@ -142,6 +142,11 @@ public int nextDoc() throws IOException { return docIdSetIterator.nextDoc(); } + + @Override + public String toString() { + return "ConstantScorer(" + filter + ")"; + } @Override public int docID() { diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java --- a/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java Tue Nov 02 18:19:28 2010 -0400 @@ -128,6 +128,7 @@ @Override final boolean matchDoc(int doc) { final int docOrd = fcsi.getOrd(doc); + //System.out.println(" doc=" + doc + " matches?=" + (docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint)); return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint; } }; diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java --- a/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Tue Nov 02 18:19:28 2010 -0400 @@ -26,6 +26,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.Bits; @@ -171,6 +172,12 @@ assert tenum != null; return tenum.docs(bits, reuse); } + + @Override + public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException { + assert tenum != null; + return tenum.bulkPostings(reuse, doFreqs, doPositions); + } @Override public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException { diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/search/IndexSearcher.java --- a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java Tue Nov 02 18:19:28 2010 -0400 @@ -222,6 +222,7 @@ final Filter filter, final Collector collector) throws IOException { assert filter != null; + //System.out.println("is.searchWithFilter"); Scorer scorer = weight.scorer(reader, true, false); if (scorer == null) { @@ -248,6 +249,7 @@ collector.setScorer(scorer); while (true) { + //System.out.println(" cycle sDoc=" + scorerDoc + " fDoc=" + filterDoc); if (scorerDoc == filterDoc) { // Check if scorer has exhausted, only before collecting. if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/search/MultiTermQuery.java --- a/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Tue Nov 02 18:19:28 2010 -0400 @@ -280,8 +280,9 @@ BytesRef bytes; while ((bytes = termsEnum.next()) != null) { termsEnum.cacheCurrentTerm(); - if (!collector.collect(bytes)) + if (!collector.collect(bytes)) { return; // interrupt whole term collection, so also don't iterate other subReaders + } } } } @@ -759,6 +760,7 @@ pendingTerms.add(bytes); docVisitCount += termsEnum.docFreq(); if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { + // System.out.println("MTQ: now cutoff termCount= " + pendingTerms.size() + " vs " + termCountLimit + "; docCount=" + docVisitCount + " vs " + docCountCutoff); hasCutOff = true; return false; } diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java --- a/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java Tue Nov 02 18:19:28 2010 -0400 @@ -23,7 +23,7 @@ import org.apache.lucene.index.Fields; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.Bits; @@ -120,31 +120,43 @@ final TermsEnum termsEnum = query.getTermsEnum(reader); assert termsEnum != null; + //System.out.println("\nmtqwf.getDocIdSet r=" + reader); if (termsEnum.next() != null) { // fill into a OpenBitSet final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); int termCount = 0; final Bits delDocs = MultiFields.getDeletedDocs(reader); - DocsEnum docsEnum = null; + BulkPostingsEnum postingsEnum = null; do { termCount++; - // System.out.println(" iter termCount=" + termCount + " term=" + - // enumerator.term().toBytesString()); - docsEnum = termsEnum.docs(delDocs, docsEnum); - final DocsEnum.BulkReadResult result = docsEnum.getBulkResult(); - while (true) { - final int count = docsEnum.read(); - if (count != 0) { - final int[] docs = result.docs.ints; - for (int i = 0; i < count; i++) { - bitSet.set(docs[i]); - } - } else { - break; + postingsEnum = termsEnum.bulkPostings(postingsEnum, false, false); + final int docFreq = termsEnum.docFreq(); + //System.out.println(" iter termCount=" + termCount + " term=" + termsEnum.term().utf8ToString() + " df=" + docFreq); + final BulkPostingsEnum.BlockReader docDeltasReader = postingsEnum.getDocDeltasReader(); + final int[] docDeltas = docDeltasReader.getBuffer(); + int offset = docDeltasReader.offset(); + int limit = docDeltasReader.end(); + if (offset >= limit) { + limit = docDeltasReader.fill(); + } + //System.out.println(" start offset=" + offset + " limit=" + limit); + int count = 0; + int doc = 0; + while (count < docFreq) { + if (offset >= limit) { + offset = 0; + limit = docDeltasReader.fill(); + //System.out.println(" fill limit=" + limit); + } + doc += docDeltas[offset++]; + count++; + if (delDocs == null || !delDocs.get(doc)) { + bitSet.set(doc); } } + //System.out.println(" end offset=" + offset); } while (termsEnum.next() != null); - // System.out.println(" done termCount=" + termCount); + //System.out.println(" done termCount=" + termCount); query.incTotalNumberOfTerms(termCount); return bitSet; diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/search/TermQuery.java --- a/lucene/src/java/org/apache/lucene/search/TermQuery.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/TermQuery.java Tue Nov 02 18:19:28 2010 -0400 @@ -21,6 +21,7 @@ import java.util.Set; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.BulkPostingsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Explanation.IDFExplanation; @@ -76,15 +77,26 @@ @Override public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), - term.field(), - term.bytes()); - + assert reader.getSequentialSubReaders() == null; + //System.out.println("TQ: make bulk postings"); + BulkPostingsEnum docs = reader.bulkTermPostingsEnum(term.field(), + term.bytes(), + true, + false); + //System.out.println("bulk enum " + docs + " reader=" + reader); if (docs == null) { return null; } - return new TermScorer(this, docs, similarity, reader.norms(term.field())); + //System.out.println("R=" + reader + " df=" + reader.docFreq(term.field(), term.bytes())); + + TermScorer ts = new TermScorer(this, docs, + reader.docFreq(term.field(), term.bytes()), + reader.getDeletedDocs(), similarity, reader.norms(term.field())); + // nocommit + ts.term = term; + ts.maxDoc = reader.maxDoc(); + return ts; } @Override @@ -124,10 +136,10 @@ int tf = 0; DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), term.field(), term.bytes()); if (docs != null) { - int newDoc = docs.advance(doc); - if (newDoc == doc) { - tf = docs.freq(); - } + int newDoc = docs.advance(doc); + if (newDoc == doc) { + tf = docs.freq(); + } tfExplanation.setValue(similarity.tf(tf)); tfExplanation.setDescription("tf(termFreq("+term+")="+tf+")"); } else { diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/search/TermScorer.java --- a/lucene/src/java/org/apache/lucene/search/TermScorer.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/TermScorer.java Tue Nov 02 18:19:28 2010 -0400 @@ -19,26 +19,38 @@ import java.io.IOException; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.index.BulkPostingsEnum; +import org.apache.lucene.util.Bits; +import org.apache.lucene.index.Term; /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { - private DocsEnum docsEnum; + private BulkPostingsEnum docsEnum; private byte[] norms; private float weightValue; - private int doc = -1; - private int freq; + private int doc; - private int pointer; - private int pointerMax; + private final int[] docDeltas; + private int docPointer; + private int docPointerMax; + private boolean first = true; + + private final int[] freqs; + private int freqPointer; + private int freqPointerMax; private static final int SCORE_CACHE_SIZE = 32; private float[] scoreCache = new float[SCORE_CACHE_SIZE]; - private int[] docs; - private int[] freqs; - private final DocsEnum.BulkReadResult bulkResult; + private final BulkPostingsEnum.BlockReader freqsReader; + private final BulkPostingsEnum.BlockReader docDeltasReader; + private final Bits skipDocs; + private final int docFreq; + private int count; + + // nocommit + public Term term; + public int maxDoc; /** * Construct a TermScorer. @@ -53,13 +65,38 @@ * @param norms * The field norms of the document fields for the Term. */ - TermScorer(Weight weight, DocsEnum td, Similarity similarity, byte[] norms) { + TermScorer(Weight weight, BulkPostingsEnum td, int docFreq, Bits skipDocs, Similarity similarity, byte[] norms) throws IOException { super(similarity, weight); this.docsEnum = td; + this.docFreq = docFreq; + docDeltasReader = td.getDocDeltasReader(); + docDeltas = docDeltasReader.getBuffer(); + docPointerMax = docDeltasReader.end(); + docPointer = docDeltasReader.offset(); + if (docPointer >= docPointerMax) { + docPointerMax = docDeltasReader.fill(); + } + docPointer--; + + freqsReader = td.getFreqsReader(); + if (freqsReader != null) { + freqs = freqsReader.getBuffer(); + freqPointerMax = freqsReader.end(); + freqPointer = freqsReader.offset(); + if (freqPointer >= freqPointerMax) { + freqPointerMax = freqsReader.fill(); + } + freqPointer--; + } else { + freqs = null; + } + + //System.out.println("make new TS dp=" + docPointer + " dpMax=" + docPointerMax + " td=" + td + " freqP=" + freqPointer + " freqPMax=" + freqPointerMax + " this=" + this); + + this.skipDocs = skipDocs; this.norms = norms; this.weightValue = weight.getValue(); - bulkResult = td.getBulkResult(); for (int i = 0; i < SCORE_CACHE_SIZE; i++) scoreCache[i] = getSimilarity().tf(i) * weightValue; @@ -70,41 +107,74 @@ score(c, Integer.MAX_VALUE, nextDoc()); } - private final void refillBuffer() throws IOException { - pointerMax = docsEnum.read(); // refill - docs = bulkResult.docs.ints; - freqs = bulkResult.freqs.ints; - } - // firstDocID is ignored since nextDoc() sets 'doc' @Override protected boolean score(Collector c, int end, int firstDocID) throws IOException { c.setScorer(this); + //System.out.println("ts.collect firstdocID=" + firstDocID + " term=" + term + " end=" + end + " doc=" + doc); + // nocommit -- this can leave scorer on a deleted doc... while (doc < end) { // for docs in window - c.collect(doc); // collect score - if (++pointer >= pointerMax) { - refillBuffer(); - if (pointerMax != 0) { - pointer = 0; - } else { - doc = NO_MORE_DOCS; // set to sentinel value - return false; + if (skipDocs == null || !skipDocs.get(doc)) { + //System.out.println("ts.collect doc=" + doc + " skipDocs=" + skipDocs + " count=" + count + " vs dF=" + docFreq); + c.collect(doc); // collect + } + if (count == docFreq) { + doc = NO_MORE_DOCS; + return false; + } + count++; + docPointer++; + + //System.out.println("dp=" + docPointer + " dpMax=" + docPointerMax + " count=" + count + " countMax=" + docFreq); + + if (docPointer >= docPointerMax) { + docPointerMax = docDeltasReader.fill(); + //System.out.println(" refill! dpMax=" + docPointerMax + " reader=" + docDeltasReader); + assert docPointerMax != 0; + docPointer = 0; + + if (freqsReader != null) { + freqPointer++; + // NOTE: this code is intentionally dup'd + // (specialized) w/ the else clause, for better CPU + // branch prediction (assuming compiler doesn't + // de-dup): for codecs that always bulk read same + // number of docDeltas & freqs (standard, for, + // pfor), this if will always be true. Other codecs + // (simple9/16) will not be aligned: + if (freqPointer >= freqPointerMax) { + freqPointerMax = freqsReader.fill(); + assert freqPointerMax != 0; + freqPointer = 0; + } } - } - doc = docs[pointer]; - freq = freqs[pointer]; + } else if (freqsReader != null) { + freqPointer++; + if (freqPointer >= freqPointerMax) { + freqPointerMax = freqsReader.fill(); + assert freqPointerMax != 0; + freqPointer = 0; + } + } + + doc += docDeltas[docPointer]; + assert doc < maxDoc: "doc=" + doc + " maxDoc=" + maxDoc; } return true; } @Override public int docID() { - return doc; + return first ? -1 : doc; } @Override public float freq() { - return freq; + if (freqsReader != null) { + return freqs[freqPointer]; + } else { + return 1.0f; + } } /** @@ -116,30 +186,76 @@ */ @Override public int nextDoc() throws IOException { - pointer++; - if (pointer >= pointerMax) { - refillBuffer(); - if (pointerMax != 0) { - pointer = 0; + //System.out.println("ts.nextDoc " + this + " count=" + count + " vs docFreq=" + docFreq); + while(count < docFreq) { + docPointer++; + if (docPointer >= docPointerMax) { + //System.out.println("ts.nd refill docs"); + docPointerMax = docDeltasReader.fill(); + assert docPointerMax != 0; + docPointer = 0; + if (freqsReader != null) { + // NOTE: this code is intentionally dup'd + // (specialized) w/ the else clause, for better CPU + // branch prediction (assuming compiler doesn't + // de-dup): for codecs that always bulk read same + // number of docDeltas & freqs (standard, for, + // pfor), this if will always be true. Other codecs + // (simple9/16) will not be aligned: + freqPointer++; + if (freqPointer >= freqPointerMax) { + //System.out.println("ts.nd refill freqs"); + freqPointerMax = freqsReader.fill(); + assert freqPointerMax != 0; + freqPointer = 0; + } + } } else { - return doc = NO_MORE_DOCS; + if (freqsReader != null) { + freqPointer++; + if (freqPointer >= freqPointerMax) { + //System.out.println("ts.nd refill freqs"); + freqPointerMax = freqsReader.fill(); + assert freqPointerMax != 0; + freqPointer = 0; + } + } } - } - doc = docs[pointer]; - freq = freqs[pointer]; - assert doc != NO_MORE_DOCS; - return doc; + count++; + doc += docDeltas[docPointer]; + assert doc < maxDoc; + first = false; + assert doc >= 0 && (skipDocs == null || doc < skipDocs.length()) && doc != NO_MORE_DOCS: "doc=" + doc + " skipDocs=" + skipDocs + " skipDocs.length=" + (skipDocs==null? "n/a" : skipDocs.length()); + if (skipDocs == null || !skipDocs.get(doc)) { + //System.out.println(" ret doc=" + doc + " freq=" + freq()); + return doc; + } + } + + //System.out.println(" end"); + return doc = NO_MORE_DOCS; } @Override public float score() { + assert !first; + final int freq; + if (freqsReader == null) { + freq = 1; + } else { + freq = freqs[freqPointer]; + } + assert freq > 0; assert doc != NO_MORE_DOCS; float raw = // compute tf(f)*weight freq < SCORE_CACHE_SIZE // check cache ? scoreCache[freq] // cache hit : getSimilarity().tf(freq)*weightValue; // cache miss - return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize for field + // nocommit + float v = norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize for field + //System.out.println("TS.score " + this + ": v=" + v + " freq=" + freq); + return v; } /** @@ -153,28 +269,120 @@ */ @Override public int advance(int target) throws IOException { - // first scan in cache - for (pointer++; pointer < pointerMax; pointer++) { - if (docs[pointer] >= target) { - freq = freqs[pointer]; - return doc = docs[pointer]; + + // nocommit: should we, here, optimize .advance(target that isn't + // too far away) into scan? seems like simple win? + + //System.out.println("ts.advance " + this + " target=" + target + " ct=" + count + " vs df=" + docFreq + " dp=" + docPointer + " dpMax=" + docPointerMax + " id=" + System.identityHashCode(this) + " first=" + first); + + // first scan current doc deltas block + for (docPointer++; docPointer < docPointerMax && count < docFreq; docPointer++) { + assert first || docDeltas[docPointer] > 0; + doc += docDeltas[docPointer]; + assert doc < maxDoc; + first = false; + //System.out.println(" scan doc=" + doc); + count++; + if (freqsReader != null && ++freqPointer >= freqPointerMax) { + //System.out.println(" refill freqs"); + freqPointerMax = freqsReader.fill(); + assert freqPointerMax != 0; + freqPointer = 0; + } + if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) { + //System.out.println(" ret0 doc=" + doc + " count=" + count + " freq=" + freqs[freqPointer]); + return doc; } } - // not found in readahead cache, seek underlying stream - int newDoc = docsEnum.advance(target); - //System.out.println("ts.advance docsEnum=" + docsEnum); - if (newDoc != NO_MORE_DOCS) { - doc = newDoc; - freq = docsEnum.freq(); + if (count == docFreq) { + return doc = NO_MORE_DOCS; + } + + // not found in current block, seek underlying stream + BulkPostingsEnum.JumpResult jumpResult = docsEnum.jump(target, count); + //System.out.println(" jumpResult=" + jumpResult); + if (jumpResult != null) { + //System.out.println(" jump count=" + jumpResult.count + " jump docID=" + jumpResult.docID); + count = jumpResult.count; + doc = jumpResult.docID; + first = false; + docPointer = docDeltasReader.offset(); + docPointerMax = docDeltasReader.end(); + if (docPointer >= docPointerMax) { + docPointerMax = docDeltasReader.fill(); + //System.out.println(" re-fill docs dpMax=" + docPointerMax + " dd[0]=" + docDeltas[0] + " dp=" + docPointer); + } + docPointer--; + if (freqsReader != null) { + freqPointer = freqsReader.offset(); + freqPointerMax = freqsReader.end(); + if (freqPointer >= freqPointerMax) { + freqPointerMax = freqsReader.fill(); + } + freqPointer--; + } + //System.out.println(" count=" + count + " docMax=" + docPointerMax + " freqMax=" + freqPointerMax + " doc=" + doc); } else { - doc = NO_MORE_DOCS; + // seek did not jump -- just fill next buffer + docPointerMax = docDeltasReader.fill(); + //System.out.println(" fill docDeltas max=" + docPointerMax); + if (docPointerMax != 0) { + docPointer = 0; + assert first || docDeltas[0] > 0; + doc += docDeltas[0]; + assert doc < maxDoc: "doc=" + doc + " maxDoc=" + maxDoc; + count++; + first = false; + //System.out.println(" doc=" + doc + " dd[0]=" + docDeltas[0]); + } else { + return doc = NO_MORE_DOCS; + } + if (freqsReader != null && ++freqPointer >= freqPointerMax) { + freqPointerMax = freqsReader.fill(); + assert freqPointerMax != 0; + freqPointer = 0; + } } - return doc; + //System.out.println(" ts now scan start doc=" + doc); + + // now scan + while(true) { + assert doc >= 0 && doc != NO_MORE_DOCS; + if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) { + //System.out.println(" ret doc=" + doc + " count=" + count); + return doc; + } + + if (count >= docFreq) { + break; + } + + if (++docPointer >= docPointerMax) { + docPointerMax = docDeltasReader.fill(); + if (docPointerMax != 0) { + docPointer = 0; + } else { + return doc = NO_MORE_DOCS; + } + } + + if (freqsReader != null && ++freqPointer >= freqPointerMax) { + freqPointerMax = freqsReader.fill(); + assert freqPointerMax != 0; + freqPointer = 0; + } + + assert first || docDeltas[docPointer] > 0; + doc += docDeltas[docPointer]; + assert doc < maxDoc; + count++; + } + //System.out.println(" fallout END"); + return doc = NO_MORE_DOCS; } /** Returns a string representation of this TermScorer. */ @Override public String toString() { return "scorer(" + weight + ")"; } - } diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/store/MMapDirectory.java --- a/lucene/src/java/org/apache/lucene/store/MMapDirectory.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/store/MMapDirectory.java Tue Nov 02 18:19:28 2010 -0400 @@ -214,7 +214,8 @@ } } - private class MMapIndexInput extends IndexInput { + // nocommit was private + public class MMapIndexInput extends IndexInput { private ByteBuffer buffer; private final long length; @@ -225,6 +226,11 @@ this.buffer = raf.getChannel().map(MapMode.READ_ONLY, 0, length); } + // nocommit + public ByteBuffer getBuffer() { + return buffer; + } + @Override public byte readByte() throws IOException { try { @@ -283,7 +289,8 @@ // Because Java's ByteBuffer uses an int to address the // values, it's necessary to access a file > // Integer.MAX_VALUE in size using multiple byte buffers. - private class MultiMMapIndexInput extends IndexInput { + // nocommit was private + public class MultiMMapIndexInput extends IndexInput { private ByteBuffer[] buffers; private int[] bufSizes; // keep here, ByteBuffer.size() method is optional @@ -331,6 +338,11 @@ seek(0L); } + public ByteBuffer getBuffer() { + // nocommit fixup + return null; + } + @Override public byte readByte() throws IOException { // Performance might be improved by reading ahead into an array of diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/BitUtil.java --- a/lucene/src/java/org/apache/lucene/util/BitUtil.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/util/BitUtil.java Tue Nov 02 18:19:28 2010 -0400 @@ -812,4 +812,25 @@ return v; } + /** Returns the smallest non negative p such that a given value < (2**(p+1)) + * This differs from (63 - java.lang.Long.numberOfLeadingZeros(v)) + * for non positive given values. + */ + public static int logNextHigherPowerOfTwo(long v) { + long vinput = v; // only for assertions below. + int p = 0; + while (v >= (1 << 8)) { + v >>= 8; + p += 8; + } + while (v >= (1 << 1)) { + v >>= 1; + p++; + } + assert (p <= 62) : p; + assert (p == 62) || (vinput < (1L << (p + 1))) : "p " + p + ", vinput " + vinput; + assert (p == 0) || (vinput >= (1L << p)) : "p " + p + ", vinput " + vinput; + assert (vinput <= 0) || (p == (63 - java.lang.Long.numberOfLeadingZeros(vinput))) : "p " + p + ", vinput " + vinput; + return p; + } } diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For10Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For10Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,81 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For10Decompress extends ForDecompress { + static final int numFrameBits = 10; + static final int mask = (int) ((1L<>> 10) & mask; + output[2 + outputOffset] = (intValue0 >>> 20) & mask; + output[3 + outputOffset] = ((intValue0 >>> 30) | (intValue1 << 2)) & mask; + output[4 + outputOffset] = (intValue1 >>> 8) & mask; + output[5 + outputOffset] = (intValue1 >>> 18) & mask; + output[6 + outputOffset] = ((intValue1 >>> 28) | (intValue2 << 4)) & mask; + output[7 + outputOffset] = (intValue2 >>> 6) & mask; + output[8 + outputOffset] = (intValue2 >>> 16) & mask; + output[9 + outputOffset] = ((intValue2 >>> 26) | (intValue3 << 6)) & mask; + output[10 + outputOffset] = (intValue3 >>> 4) & mask; + output[11 + outputOffset] = (intValue3 >>> 14) & mask; + output[12 + outputOffset] = ((intValue3 >>> 24) | (intValue4 << 8)) & mask; + output[13 + outputOffset] = (intValue4 >>> 2) & mask; + output[14 + outputOffset] = (intValue4 >>> 12) & mask; + output[15 + outputOffset] = intValue4 >>> 22; + output[16 + outputOffset] = intValue5 & mask; + output[17 + outputOffset] = (intValue5 >>> 10) & mask; + output[18 + outputOffset] = (intValue5 >>> 20) & mask; + output[19 + outputOffset] = ((intValue5 >>> 30) | (intValue6 << 2)) & mask; + output[20 + outputOffset] = (intValue6 >>> 8) & mask; + output[21 + outputOffset] = (intValue6 >>> 18) & mask; + output[22 + outputOffset] = ((intValue6 >>> 28) | (intValue7 << 4)) & mask; + output[23 + outputOffset] = (intValue7 >>> 6) & mask; + output[24 + outputOffset] = (intValue7 >>> 16) & mask; + output[25 + outputOffset] = ((intValue7 >>> 26) | (intValue8 << 6)) & mask; + output[26 + outputOffset] = (intValue8 >>> 4) & mask; + output[27 + outputOffset] = (intValue8 >>> 14) & mask; + output[28 + outputOffset] = ((intValue8 >>> 24) | (intValue9 << 8)) & mask; + output[29 + outputOffset] = (intValue9 >>> 2) & mask; + output[30 + outputOffset] = (intValue9 >>> 12) & mask; + output[31 + outputOffset] = intValue9 >>> 22; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For11Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For11Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,82 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For11Decompress extends ForDecompress { + static final int numFrameBits = 11; + static final int mask = (int) ((1L<>> 11) & mask; + output[2 + outputOffset] = ((intValue0 >>> 22) | (intValue1 << 10)) & mask; + output[3 + outputOffset] = (intValue1 >>> 1) & mask; + output[4 + outputOffset] = (intValue1 >>> 12) & mask; + output[5 + outputOffset] = ((intValue1 >>> 23) | (intValue2 << 9)) & mask; + output[6 + outputOffset] = (intValue2 >>> 2) & mask; + output[7 + outputOffset] = (intValue2 >>> 13) & mask; + output[8 + outputOffset] = ((intValue2 >>> 24) | (intValue3 << 8)) & mask; + output[9 + outputOffset] = (intValue3 >>> 3) & mask; + output[10 + outputOffset] = (intValue3 >>> 14) & mask; + output[11 + outputOffset] = ((intValue3 >>> 25) | (intValue4 << 7)) & mask; + output[12 + outputOffset] = (intValue4 >>> 4) & mask; + output[13 + outputOffset] = (intValue4 >>> 15) & mask; + output[14 + outputOffset] = ((intValue4 >>> 26) | (intValue5 << 6)) & mask; + output[15 + outputOffset] = (intValue5 >>> 5) & mask; + output[16 + outputOffset] = (intValue5 >>> 16) & mask; + output[17 + outputOffset] = ((intValue5 >>> 27) | (intValue6 << 5)) & mask; + output[18 + outputOffset] = (intValue6 >>> 6) & mask; + output[19 + outputOffset] = (intValue6 >>> 17) & mask; + output[20 + outputOffset] = ((intValue6 >>> 28) | (intValue7 << 4)) & mask; + output[21 + outputOffset] = (intValue7 >>> 7) & mask; + output[22 + outputOffset] = (intValue7 >>> 18) & mask; + output[23 + outputOffset] = ((intValue7 >>> 29) | (intValue8 << 3)) & mask; + output[24 + outputOffset] = (intValue8 >>> 8) & mask; + output[25 + outputOffset] = (intValue8 >>> 19) & mask; + output[26 + outputOffset] = ((intValue8 >>> 30) | (intValue9 << 2)) & mask; + output[27 + outputOffset] = (intValue9 >>> 9) & mask; + output[28 + outputOffset] = (intValue9 >>> 20) & mask; + output[29 + outputOffset] = ((intValue9 >>> 31) | (intValue10 << 1)) & mask; + output[30 + outputOffset] = (intValue10 >>> 10) & mask; + output[31 + outputOffset] = intValue10 >>> 21; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For12Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For12Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,83 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For12Decompress extends ForDecompress { + static final int numFrameBits = 12; + static final int mask = (int) ((1L<>> 12) & mask; + output[2 + outputOffset] = ((intValue0 >>> 24) | (intValue1 << 8)) & mask; + output[3 + outputOffset] = (intValue1 >>> 4) & mask; + output[4 + outputOffset] = (intValue1 >>> 16) & mask; + output[5 + outputOffset] = ((intValue1 >>> 28) | (intValue2 << 4)) & mask; + output[6 + outputOffset] = (intValue2 >>> 8) & mask; + output[7 + outputOffset] = intValue2 >>> 20; + output[8 + outputOffset] = intValue3 & mask; + output[9 + outputOffset] = (intValue3 >>> 12) & mask; + output[10 + outputOffset] = ((intValue3 >>> 24) | (intValue4 << 8)) & mask; + output[11 + outputOffset] = (intValue4 >>> 4) & mask; + output[12 + outputOffset] = (intValue4 >>> 16) & mask; + output[13 + outputOffset] = ((intValue4 >>> 28) | (intValue5 << 4)) & mask; + output[14 + outputOffset] = (intValue5 >>> 8) & mask; + output[15 + outputOffset] = intValue5 >>> 20; + output[16 + outputOffset] = intValue6 & mask; + output[17 + outputOffset] = (intValue6 >>> 12) & mask; + output[18 + outputOffset] = ((intValue6 >>> 24) | (intValue7 << 8)) & mask; + output[19 + outputOffset] = (intValue7 >>> 4) & mask; + output[20 + outputOffset] = (intValue7 >>> 16) & mask; + output[21 + outputOffset] = ((intValue7 >>> 28) | (intValue8 << 4)) & mask; + output[22 + outputOffset] = (intValue8 >>> 8) & mask; + output[23 + outputOffset] = intValue8 >>> 20; + output[24 + outputOffset] = intValue9 & mask; + output[25 + outputOffset] = (intValue9 >>> 12) & mask; + output[26 + outputOffset] = ((intValue9 >>> 24) | (intValue10 << 8)) & mask; + output[27 + outputOffset] = (intValue10 >>> 4) & mask; + output[28 + outputOffset] = (intValue10 >>> 16) & mask; + output[29 + outputOffset] = ((intValue10 >>> 28) | (intValue11 << 4)) & mask; + output[30 + outputOffset] = (intValue11 >>> 8) & mask; + output[31 + outputOffset] = intValue11 >>> 20; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For13Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For13Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,84 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For13Decompress extends ForDecompress { + static final int numFrameBits = 13; + static final int mask = (int) ((1L<>> 13) & mask; + output[2 + outputOffset] = ((intValue0 >>> 26) | (intValue1 << 6)) & mask; + output[3 + outputOffset] = (intValue1 >>> 7) & mask; + output[4 + outputOffset] = ((intValue1 >>> 20) | (intValue2 << 12)) & mask; + output[5 + outputOffset] = (intValue2 >>> 1) & mask; + output[6 + outputOffset] = (intValue2 >>> 14) & mask; + output[7 + outputOffset] = ((intValue2 >>> 27) | (intValue3 << 5)) & mask; + output[8 + outputOffset] = (intValue3 >>> 8) & mask; + output[9 + outputOffset] = ((intValue3 >>> 21) | (intValue4 << 11)) & mask; + output[10 + outputOffset] = (intValue4 >>> 2) & mask; + output[11 + outputOffset] = (intValue4 >>> 15) & mask; + output[12 + outputOffset] = ((intValue4 >>> 28) | (intValue5 << 4)) & mask; + output[13 + outputOffset] = (intValue5 >>> 9) & mask; + output[14 + outputOffset] = ((intValue5 >>> 22) | (intValue6 << 10)) & mask; + output[15 + outputOffset] = (intValue6 >>> 3) & mask; + output[16 + outputOffset] = (intValue6 >>> 16) & mask; + output[17 + outputOffset] = ((intValue6 >>> 29) | (intValue7 << 3)) & mask; + output[18 + outputOffset] = (intValue7 >>> 10) & mask; + output[19 + outputOffset] = ((intValue7 >>> 23) | (intValue8 << 9)) & mask; + output[20 + outputOffset] = (intValue8 >>> 4) & mask; + output[21 + outputOffset] = (intValue8 >>> 17) & mask; + output[22 + outputOffset] = ((intValue8 >>> 30) | (intValue9 << 2)) & mask; + output[23 + outputOffset] = (intValue9 >>> 11) & mask; + output[24 + outputOffset] = ((intValue9 >>> 24) | (intValue10 << 8)) & mask; + output[25 + outputOffset] = (intValue10 >>> 5) & mask; + output[26 + outputOffset] = (intValue10 >>> 18) & mask; + output[27 + outputOffset] = ((intValue10 >>> 31) | (intValue11 << 1)) & mask; + output[28 + outputOffset] = (intValue11 >>> 12) & mask; + output[29 + outputOffset] = ((intValue11 >>> 25) | (intValue12 << 7)) & mask; + output[30 + outputOffset] = (intValue12 >>> 6) & mask; + output[31 + outputOffset] = intValue12 >>> 19; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For14Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For14Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,85 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For14Decompress extends ForDecompress { + static final int numFrameBits = 14; + static final int mask = (int) ((1L<>> 14) & mask; + output[2 + outputOffset] = ((intValue0 >>> 28) | (intValue1 << 4)) & mask; + output[3 + outputOffset] = (intValue1 >>> 10) & mask; + output[4 + outputOffset] = ((intValue1 >>> 24) | (intValue2 << 8)) & mask; + output[5 + outputOffset] = (intValue2 >>> 6) & mask; + output[6 + outputOffset] = ((intValue2 >>> 20) | (intValue3 << 12)) & mask; + output[7 + outputOffset] = (intValue3 >>> 2) & mask; + output[8 + outputOffset] = (intValue3 >>> 16) & mask; + output[9 + outputOffset] = ((intValue3 >>> 30) | (intValue4 << 2)) & mask; + output[10 + outputOffset] = (intValue4 >>> 12) & mask; + output[11 + outputOffset] = ((intValue4 >>> 26) | (intValue5 << 6)) & mask; + output[12 + outputOffset] = (intValue5 >>> 8) & mask; + output[13 + outputOffset] = ((intValue5 >>> 22) | (intValue6 << 10)) & mask; + output[14 + outputOffset] = (intValue6 >>> 4) & mask; + output[15 + outputOffset] = intValue6 >>> 18; + output[16 + outputOffset] = intValue7 & mask; + output[17 + outputOffset] = (intValue7 >>> 14) & mask; + output[18 + outputOffset] = ((intValue7 >>> 28) | (intValue8 << 4)) & mask; + output[19 + outputOffset] = (intValue8 >>> 10) & mask; + output[20 + outputOffset] = ((intValue8 >>> 24) | (intValue9 << 8)) & mask; + output[21 + outputOffset] = (intValue9 >>> 6) & mask; + output[22 + outputOffset] = ((intValue9 >>> 20) | (intValue10 << 12)) & mask; + output[23 + outputOffset] = (intValue10 >>> 2) & mask; + output[24 + outputOffset] = (intValue10 >>> 16) & mask; + output[25 + outputOffset] = ((intValue10 >>> 30) | (intValue11 << 2)) & mask; + output[26 + outputOffset] = (intValue11 >>> 12) & mask; + output[27 + outputOffset] = ((intValue11 >>> 26) | (intValue12 << 6)) & mask; + output[28 + outputOffset] = (intValue12 >>> 8) & mask; + output[29 + outputOffset] = ((intValue12 >>> 22) | (intValue13 << 10)) & mask; + output[30 + outputOffset] = (intValue13 >>> 4) & mask; + output[31 + outputOffset] = intValue13 >>> 18; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For15Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For15Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,86 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For15Decompress extends ForDecompress { + static final int numFrameBits = 15; + static final int mask = (int) ((1L<>> 15) & mask; + output[2 + outputOffset] = ((intValue0 >>> 30) | (intValue1 << 2)) & mask; + output[3 + outputOffset] = (intValue1 >>> 13) & mask; + output[4 + outputOffset] = ((intValue1 >>> 28) | (intValue2 << 4)) & mask; + output[5 + outputOffset] = (intValue2 >>> 11) & mask; + output[6 + outputOffset] = ((intValue2 >>> 26) | (intValue3 << 6)) & mask; + output[7 + outputOffset] = (intValue3 >>> 9) & mask; + output[8 + outputOffset] = ((intValue3 >>> 24) | (intValue4 << 8)) & mask; + output[9 + outputOffset] = (intValue4 >>> 7) & mask; + output[10 + outputOffset] = ((intValue4 >>> 22) | (intValue5 << 10)) & mask; + output[11 + outputOffset] = (intValue5 >>> 5) & mask; + output[12 + outputOffset] = ((intValue5 >>> 20) | (intValue6 << 12)) & mask; + output[13 + outputOffset] = (intValue6 >>> 3) & mask; + output[14 + outputOffset] = ((intValue6 >>> 18) | (intValue7 << 14)) & mask; + output[15 + outputOffset] = (intValue7 >>> 1) & mask; + output[16 + outputOffset] = (intValue7 >>> 16) & mask; + output[17 + outputOffset] = ((intValue7 >>> 31) | (intValue8 << 1)) & mask; + output[18 + outputOffset] = (intValue8 >>> 14) & mask; + output[19 + outputOffset] = ((intValue8 >>> 29) | (intValue9 << 3)) & mask; + output[20 + outputOffset] = (intValue9 >>> 12) & mask; + output[21 + outputOffset] = ((intValue9 >>> 27) | (intValue10 << 5)) & mask; + output[22 + outputOffset] = (intValue10 >>> 10) & mask; + output[23 + outputOffset] = ((intValue10 >>> 25) | (intValue11 << 7)) & mask; + output[24 + outputOffset] = (intValue11 >>> 8) & mask; + output[25 + outputOffset] = ((intValue11 >>> 23) | (intValue12 << 9)) & mask; + output[26 + outputOffset] = (intValue12 >>> 6) & mask; + output[27 + outputOffset] = ((intValue12 >>> 21) | (intValue13 << 11)) & mask; + output[28 + outputOffset] = (intValue13 >>> 4) & mask; + output[29 + outputOffset] = ((intValue13 >>> 19) | (intValue14 << 13)) & mask; + output[30 + outputOffset] = (intValue14 >>> 2) & mask; + output[31 + outputOffset] = intValue14 >>> 17; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For16Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For16Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,87 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For16Decompress extends ForDecompress { + static final int numFrameBits = 16; + static final int mask = (int) ((1L<>> 16; + output[2 + outputOffset] = intValue1 & mask; + output[3 + outputOffset] = intValue1 >>> 16; + output[4 + outputOffset] = intValue2 & mask; + output[5 + outputOffset] = intValue2 >>> 16; + output[6 + outputOffset] = intValue3 & mask; + output[7 + outputOffset] = intValue3 >>> 16; + output[8 + outputOffset] = intValue4 & mask; + output[9 + outputOffset] = intValue4 >>> 16; + output[10 + outputOffset] = intValue5 & mask; + output[11 + outputOffset] = intValue5 >>> 16; + output[12 + outputOffset] = intValue6 & mask; + output[13 + outputOffset] = intValue6 >>> 16; + output[14 + outputOffset] = intValue7 & mask; + output[15 + outputOffset] = intValue7 >>> 16; + output[16 + outputOffset] = intValue8 & mask; + output[17 + outputOffset] = intValue8 >>> 16; + output[18 + outputOffset] = intValue9 & mask; + output[19 + outputOffset] = intValue9 >>> 16; + output[20 + outputOffset] = intValue10 & mask; + output[21 + outputOffset] = intValue10 >>> 16; + output[22 + outputOffset] = intValue11 & mask; + output[23 + outputOffset] = intValue11 >>> 16; + output[24 + outputOffset] = intValue12 & mask; + output[25 + outputOffset] = intValue12 >>> 16; + output[26 + outputOffset] = intValue13 & mask; + output[27 + outputOffset] = intValue13 >>> 16; + output[28 + outputOffset] = intValue14 & mask; + output[29 + outputOffset] = intValue14 >>> 16; + output[30 + outputOffset] = intValue15 & mask; + output[31 + outputOffset] = intValue15 >>> 16; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For17Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For17Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,88 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For17Decompress extends ForDecompress { + static final int numFrameBits = 17; + static final int mask = (int) ((1L<>> 17) | (intValue1 << 15)) & mask; + output[2 + outputOffset] = (intValue1 >>> 2) & mask; + output[3 + outputOffset] = ((intValue1 >>> 19) | (intValue2 << 13)) & mask; + output[4 + outputOffset] = (intValue2 >>> 4) & mask; + output[5 + outputOffset] = ((intValue2 >>> 21) | (intValue3 << 11)) & mask; + output[6 + outputOffset] = (intValue3 >>> 6) & mask; + output[7 + outputOffset] = ((intValue3 >>> 23) | (intValue4 << 9)) & mask; + output[8 + outputOffset] = (intValue4 >>> 8) & mask; + output[9 + outputOffset] = ((intValue4 >>> 25) | (intValue5 << 7)) & mask; + output[10 + outputOffset] = (intValue5 >>> 10) & mask; + output[11 + outputOffset] = ((intValue5 >>> 27) | (intValue6 << 5)) & mask; + output[12 + outputOffset] = (intValue6 >>> 12) & mask; + output[13 + outputOffset] = ((intValue6 >>> 29) | (intValue7 << 3)) & mask; + output[14 + outputOffset] = (intValue7 >>> 14) & mask; + output[15 + outputOffset] = ((intValue7 >>> 31) | (intValue8 << 1)) & mask; + output[16 + outputOffset] = ((intValue8 >>> 16) | (intValue9 << 16)) & mask; + output[17 + outputOffset] = (intValue9 >>> 1) & mask; + output[18 + outputOffset] = ((intValue9 >>> 18) | (intValue10 << 14)) & mask; + output[19 + outputOffset] = (intValue10 >>> 3) & mask; + output[20 + outputOffset] = ((intValue10 >>> 20) | (intValue11 << 12)) & mask; + output[21 + outputOffset] = (intValue11 >>> 5) & mask; + output[22 + outputOffset] = ((intValue11 >>> 22) | (intValue12 << 10)) & mask; + output[23 + outputOffset] = (intValue12 >>> 7) & mask; + output[24 + outputOffset] = ((intValue12 >>> 24) | (intValue13 << 8)) & mask; + output[25 + outputOffset] = (intValue13 >>> 9) & mask; + output[26 + outputOffset] = ((intValue13 >>> 26) | (intValue14 << 6)) & mask; + output[27 + outputOffset] = (intValue14 >>> 11) & mask; + output[28 + outputOffset] = ((intValue14 >>> 28) | (intValue15 << 4)) & mask; + output[29 + outputOffset] = (intValue15 >>> 13) & mask; + output[30 + outputOffset] = ((intValue15 >>> 30) | (intValue16 << 2)) & mask; + output[31 + outputOffset] = intValue16 >>> 15; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For18Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For18Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,89 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For18Decompress extends ForDecompress { + static final int numFrameBits = 18; + static final int mask = (int) ((1L<>> 18) | (intValue1 << 14)) & mask; + output[2 + outputOffset] = (intValue1 >>> 4) & mask; + output[3 + outputOffset] = ((intValue1 >>> 22) | (intValue2 << 10)) & mask; + output[4 + outputOffset] = (intValue2 >>> 8) & mask; + output[5 + outputOffset] = ((intValue2 >>> 26) | (intValue3 << 6)) & mask; + output[6 + outputOffset] = (intValue3 >>> 12) & mask; + output[7 + outputOffset] = ((intValue3 >>> 30) | (intValue4 << 2)) & mask; + output[8 + outputOffset] = ((intValue4 >>> 16) | (intValue5 << 16)) & mask; + output[9 + outputOffset] = (intValue5 >>> 2) & mask; + output[10 + outputOffset] = ((intValue5 >>> 20) | (intValue6 << 12)) & mask; + output[11 + outputOffset] = (intValue6 >>> 6) & mask; + output[12 + outputOffset] = ((intValue6 >>> 24) | (intValue7 << 8)) & mask; + output[13 + outputOffset] = (intValue7 >>> 10) & mask; + output[14 + outputOffset] = ((intValue7 >>> 28) | (intValue8 << 4)) & mask; + output[15 + outputOffset] = intValue8 >>> 14; + output[16 + outputOffset] = intValue9 & mask; + output[17 + outputOffset] = ((intValue9 >>> 18) | (intValue10 << 14)) & mask; + output[18 + outputOffset] = (intValue10 >>> 4) & mask; + output[19 + outputOffset] = ((intValue10 >>> 22) | (intValue11 << 10)) & mask; + output[20 + outputOffset] = (intValue11 >>> 8) & mask; + output[21 + outputOffset] = ((intValue11 >>> 26) | (intValue12 << 6)) & mask; + output[22 + outputOffset] = (intValue12 >>> 12) & mask; + output[23 + outputOffset] = ((intValue12 >>> 30) | (intValue13 << 2)) & mask; + output[24 + outputOffset] = ((intValue13 >>> 16) | (intValue14 << 16)) & mask; + output[25 + outputOffset] = (intValue14 >>> 2) & mask; + output[26 + outputOffset] = ((intValue14 >>> 20) | (intValue15 << 12)) & mask; + output[27 + outputOffset] = (intValue15 >>> 6) & mask; + output[28 + outputOffset] = ((intValue15 >>> 24) | (intValue16 << 8)) & mask; + output[29 + outputOffset] = (intValue16 >>> 10) & mask; + output[30 + outputOffset] = ((intValue16 >>> 28) | (intValue17 << 4)) & mask; + output[31 + outputOffset] = intValue17 >>> 14; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For19Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For19Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,90 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For19Decompress extends ForDecompress { + static final int numFrameBits = 19; + static final int mask = (int) ((1L<>> 19) | (intValue1 << 13)) & mask; + output[2 + outputOffset] = (intValue1 >>> 6) & mask; + output[3 + outputOffset] = ((intValue1 >>> 25) | (intValue2 << 7)) & mask; + output[4 + outputOffset] = (intValue2 >>> 12) & mask; + output[5 + outputOffset] = ((intValue2 >>> 31) | (intValue3 << 1)) & mask; + output[6 + outputOffset] = ((intValue3 >>> 18) | (intValue4 << 14)) & mask; + output[7 + outputOffset] = (intValue4 >>> 5) & mask; + output[8 + outputOffset] = ((intValue4 >>> 24) | (intValue5 << 8)) & mask; + output[9 + outputOffset] = (intValue5 >>> 11) & mask; + output[10 + outputOffset] = ((intValue5 >>> 30) | (intValue6 << 2)) & mask; + output[11 + outputOffset] = ((intValue6 >>> 17) | (intValue7 << 15)) & mask; + output[12 + outputOffset] = (intValue7 >>> 4) & mask; + output[13 + outputOffset] = ((intValue7 >>> 23) | (intValue8 << 9)) & mask; + output[14 + outputOffset] = (intValue8 >>> 10) & mask; + output[15 + outputOffset] = ((intValue8 >>> 29) | (intValue9 << 3)) & mask; + output[16 + outputOffset] = ((intValue9 >>> 16) | (intValue10 << 16)) & mask; + output[17 + outputOffset] = (intValue10 >>> 3) & mask; + output[18 + outputOffset] = ((intValue10 >>> 22) | (intValue11 << 10)) & mask; + output[19 + outputOffset] = (intValue11 >>> 9) & mask; + output[20 + outputOffset] = ((intValue11 >>> 28) | (intValue12 << 4)) & mask; + output[21 + outputOffset] = ((intValue12 >>> 15) | (intValue13 << 17)) & mask; + output[22 + outputOffset] = (intValue13 >>> 2) & mask; + output[23 + outputOffset] = ((intValue13 >>> 21) | (intValue14 << 11)) & mask; + output[24 + outputOffset] = (intValue14 >>> 8) & mask; + output[25 + outputOffset] = ((intValue14 >>> 27) | (intValue15 << 5)) & mask; + output[26 + outputOffset] = ((intValue15 >>> 14) | (intValue16 << 18)) & mask; + output[27 + outputOffset] = (intValue16 >>> 1) & mask; + output[28 + outputOffset] = ((intValue16 >>> 20) | (intValue17 << 12)) & mask; + output[29 + outputOffset] = (intValue17 >>> 7) & mask; + output[30 + outputOffset] = ((intValue17 >>> 26) | (intValue18 << 6)) & mask; + output[31 + outputOffset] = intValue18 >>> 13; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For1Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For1Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,72 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For1Decompress extends ForDecompress { + static final int numFrameBits = 1; + static final int mask = (int) ((1L<>> 1) & mask; + output[2 + outputOffset] = (intValue0 >>> 2) & mask; + output[3 + outputOffset] = (intValue0 >>> 3) & mask; + output[4 + outputOffset] = (intValue0 >>> 4) & mask; + output[5 + outputOffset] = (intValue0 >>> 5) & mask; + output[6 + outputOffset] = (intValue0 >>> 6) & mask; + output[7 + outputOffset] = (intValue0 >>> 7) & mask; + output[8 + outputOffset] = (intValue0 >>> 8) & mask; + output[9 + outputOffset] = (intValue0 >>> 9) & mask; + output[10 + outputOffset] = (intValue0 >>> 10) & mask; + output[11 + outputOffset] = (intValue0 >>> 11) & mask; + output[12 + outputOffset] = (intValue0 >>> 12) & mask; + output[13 + outputOffset] = (intValue0 >>> 13) & mask; + output[14 + outputOffset] = (intValue0 >>> 14) & mask; + output[15 + outputOffset] = (intValue0 >>> 15) & mask; + output[16 + outputOffset] = (intValue0 >>> 16) & mask; + output[17 + outputOffset] = (intValue0 >>> 17) & mask; + output[18 + outputOffset] = (intValue0 >>> 18) & mask; + output[19 + outputOffset] = (intValue0 >>> 19) & mask; + output[20 + outputOffset] = (intValue0 >>> 20) & mask; + output[21 + outputOffset] = (intValue0 >>> 21) & mask; + output[22 + outputOffset] = (intValue0 >>> 22) & mask; + output[23 + outputOffset] = (intValue0 >>> 23) & mask; + output[24 + outputOffset] = (intValue0 >>> 24) & mask; + output[25 + outputOffset] = (intValue0 >>> 25) & mask; + output[26 + outputOffset] = (intValue0 >>> 26) & mask; + output[27 + outputOffset] = (intValue0 >>> 27) & mask; + output[28 + outputOffset] = (intValue0 >>> 28) & mask; + output[29 + outputOffset] = (intValue0 >>> 29) & mask; + output[30 + outputOffset] = (intValue0 >>> 30) & mask; + output[31 + outputOffset] = intValue0 >>> 31; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For20Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For20Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,91 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For20Decompress extends ForDecompress { + static final int numFrameBits = 20; + static final int mask = (int) ((1L<>> 20) | (intValue1 << 12)) & mask; + output[2 + outputOffset] = (intValue1 >>> 8) & mask; + output[3 + outputOffset] = ((intValue1 >>> 28) | (intValue2 << 4)) & mask; + output[4 + outputOffset] = ((intValue2 >>> 16) | (intValue3 << 16)) & mask; + output[5 + outputOffset] = (intValue3 >>> 4) & mask; + output[6 + outputOffset] = ((intValue3 >>> 24) | (intValue4 << 8)) & mask; + output[7 + outputOffset] = intValue4 >>> 12; + output[8 + outputOffset] = intValue5 & mask; + output[9 + outputOffset] = ((intValue5 >>> 20) | (intValue6 << 12)) & mask; + output[10 + outputOffset] = (intValue6 >>> 8) & mask; + output[11 + outputOffset] = ((intValue6 >>> 28) | (intValue7 << 4)) & mask; + output[12 + outputOffset] = ((intValue7 >>> 16) | (intValue8 << 16)) & mask; + output[13 + outputOffset] = (intValue8 >>> 4) & mask; + output[14 + outputOffset] = ((intValue8 >>> 24) | (intValue9 << 8)) & mask; + output[15 + outputOffset] = intValue9 >>> 12; + output[16 + outputOffset] = intValue10 & mask; + output[17 + outputOffset] = ((intValue10 >>> 20) | (intValue11 << 12)) & mask; + output[18 + outputOffset] = (intValue11 >>> 8) & mask; + output[19 + outputOffset] = ((intValue11 >>> 28) | (intValue12 << 4)) & mask; + output[20 + outputOffset] = ((intValue12 >>> 16) | (intValue13 << 16)) & mask; + output[21 + outputOffset] = (intValue13 >>> 4) & mask; + output[22 + outputOffset] = ((intValue13 >>> 24) | (intValue14 << 8)) & mask; + output[23 + outputOffset] = intValue14 >>> 12; + output[24 + outputOffset] = intValue15 & mask; + output[25 + outputOffset] = ((intValue15 >>> 20) | (intValue16 << 12)) & mask; + output[26 + outputOffset] = (intValue16 >>> 8) & mask; + output[27 + outputOffset] = ((intValue16 >>> 28) | (intValue17 << 4)) & mask; + output[28 + outputOffset] = ((intValue17 >>> 16) | (intValue18 << 16)) & mask; + output[29 + outputOffset] = (intValue18 >>> 4) & mask; + output[30 + outputOffset] = ((intValue18 >>> 24) | (intValue19 << 8)) & mask; + output[31 + outputOffset] = intValue19 >>> 12; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For21Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For21Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,92 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For21Decompress extends ForDecompress { + static final int numFrameBits = 21; + static final int mask = (int) ((1L<>> 21) | (intValue1 << 11)) & mask; + output[2 + outputOffset] = (intValue1 >>> 10) & mask; + output[3 + outputOffset] = ((intValue1 >>> 31) | (intValue2 << 1)) & mask; + output[4 + outputOffset] = ((intValue2 >>> 20) | (intValue3 << 12)) & mask; + output[5 + outputOffset] = (intValue3 >>> 9) & mask; + output[6 + outputOffset] = ((intValue3 >>> 30) | (intValue4 << 2)) & mask; + output[7 + outputOffset] = ((intValue4 >>> 19) | (intValue5 << 13)) & mask; + output[8 + outputOffset] = (intValue5 >>> 8) & mask; + output[9 + outputOffset] = ((intValue5 >>> 29) | (intValue6 << 3)) & mask; + output[10 + outputOffset] = ((intValue6 >>> 18) | (intValue7 << 14)) & mask; + output[11 + outputOffset] = (intValue7 >>> 7) & mask; + output[12 + outputOffset] = ((intValue7 >>> 28) | (intValue8 << 4)) & mask; + output[13 + outputOffset] = ((intValue8 >>> 17) | (intValue9 << 15)) & mask; + output[14 + outputOffset] = (intValue9 >>> 6) & mask; + output[15 + outputOffset] = ((intValue9 >>> 27) | (intValue10 << 5)) & mask; + output[16 + outputOffset] = ((intValue10 >>> 16) | (intValue11 << 16)) & mask; + output[17 + outputOffset] = (intValue11 >>> 5) & mask; + output[18 + outputOffset] = ((intValue11 >>> 26) | (intValue12 << 6)) & mask; + output[19 + outputOffset] = ((intValue12 >>> 15) | (intValue13 << 17)) & mask; + output[20 + outputOffset] = (intValue13 >>> 4) & mask; + output[21 + outputOffset] = ((intValue13 >>> 25) | (intValue14 << 7)) & mask; + output[22 + outputOffset] = ((intValue14 >>> 14) | (intValue15 << 18)) & mask; + output[23 + outputOffset] = (intValue15 >>> 3) & mask; + output[24 + outputOffset] = ((intValue15 >>> 24) | (intValue16 << 8)) & mask; + output[25 + outputOffset] = ((intValue16 >>> 13) | (intValue17 << 19)) & mask; + output[26 + outputOffset] = (intValue17 >>> 2) & mask; + output[27 + outputOffset] = ((intValue17 >>> 23) | (intValue18 << 9)) & mask; + output[28 + outputOffset] = ((intValue18 >>> 12) | (intValue19 << 20)) & mask; + output[29 + outputOffset] = (intValue19 >>> 1) & mask; + output[30 + outputOffset] = ((intValue19 >>> 22) | (intValue20 << 10)) & mask; + output[31 + outputOffset] = intValue20 >>> 11; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For22Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For22Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,93 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For22Decompress extends ForDecompress { + static final int numFrameBits = 22; + static final int mask = (int) ((1L<>> 22) | (intValue1 << 10)) & mask; + output[2 + outputOffset] = ((intValue1 >>> 12) | (intValue2 << 20)) & mask; + output[3 + outputOffset] = (intValue2 >>> 2) & mask; + output[4 + outputOffset] = ((intValue2 >>> 24) | (intValue3 << 8)) & mask; + output[5 + outputOffset] = ((intValue3 >>> 14) | (intValue4 << 18)) & mask; + output[6 + outputOffset] = (intValue4 >>> 4) & mask; + output[7 + outputOffset] = ((intValue4 >>> 26) | (intValue5 << 6)) & mask; + output[8 + outputOffset] = ((intValue5 >>> 16) | (intValue6 << 16)) & mask; + output[9 + outputOffset] = (intValue6 >>> 6) & mask; + output[10 + outputOffset] = ((intValue6 >>> 28) | (intValue7 << 4)) & mask; + output[11 + outputOffset] = ((intValue7 >>> 18) | (intValue8 << 14)) & mask; + output[12 + outputOffset] = (intValue8 >>> 8) & mask; + output[13 + outputOffset] = ((intValue8 >>> 30) | (intValue9 << 2)) & mask; + output[14 + outputOffset] = ((intValue9 >>> 20) | (intValue10 << 12)) & mask; + output[15 + outputOffset] = intValue10 >>> 10; + output[16 + outputOffset] = intValue11 & mask; + output[17 + outputOffset] = ((intValue11 >>> 22) | (intValue12 << 10)) & mask; + output[18 + outputOffset] = ((intValue12 >>> 12) | (intValue13 << 20)) & mask; + output[19 + outputOffset] = (intValue13 >>> 2) & mask; + output[20 + outputOffset] = ((intValue13 >>> 24) | (intValue14 << 8)) & mask; + output[21 + outputOffset] = ((intValue14 >>> 14) | (intValue15 << 18)) & mask; + output[22 + outputOffset] = (intValue15 >>> 4) & mask; + output[23 + outputOffset] = ((intValue15 >>> 26) | (intValue16 << 6)) & mask; + output[24 + outputOffset] = ((intValue16 >>> 16) | (intValue17 << 16)) & mask; + output[25 + outputOffset] = (intValue17 >>> 6) & mask; + output[26 + outputOffset] = ((intValue17 >>> 28) | (intValue18 << 4)) & mask; + output[27 + outputOffset] = ((intValue18 >>> 18) | (intValue19 << 14)) & mask; + output[28 + outputOffset] = (intValue19 >>> 8) & mask; + output[29 + outputOffset] = ((intValue19 >>> 30) | (intValue20 << 2)) & mask; + output[30 + outputOffset] = ((intValue20 >>> 20) | (intValue21 << 12)) & mask; + output[31 + outputOffset] = intValue21 >>> 10; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For23Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For23Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,94 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For23Decompress extends ForDecompress { + static final int numFrameBits = 23; + static final int mask = (int) ((1L<>> 23) | (intValue1 << 9)) & mask; + output[2 + outputOffset] = ((intValue1 >>> 14) | (intValue2 << 18)) & mask; + output[3 + outputOffset] = (intValue2 >>> 5) & mask; + output[4 + outputOffset] = ((intValue2 >>> 28) | (intValue3 << 4)) & mask; + output[5 + outputOffset] = ((intValue3 >>> 19) | (intValue4 << 13)) & mask; + output[6 + outputOffset] = ((intValue4 >>> 10) | (intValue5 << 22)) & mask; + output[7 + outputOffset] = (intValue5 >>> 1) & mask; + output[8 + outputOffset] = ((intValue5 >>> 24) | (intValue6 << 8)) & mask; + output[9 + outputOffset] = ((intValue6 >>> 15) | (intValue7 << 17)) & mask; + output[10 + outputOffset] = (intValue7 >>> 6) & mask; + output[11 + outputOffset] = ((intValue7 >>> 29) | (intValue8 << 3)) & mask; + output[12 + outputOffset] = ((intValue8 >>> 20) | (intValue9 << 12)) & mask; + output[13 + outputOffset] = ((intValue9 >>> 11) | (intValue10 << 21)) & mask; + output[14 + outputOffset] = (intValue10 >>> 2) & mask; + output[15 + outputOffset] = ((intValue10 >>> 25) | (intValue11 << 7)) & mask; + output[16 + outputOffset] = ((intValue11 >>> 16) | (intValue12 << 16)) & mask; + output[17 + outputOffset] = (intValue12 >>> 7) & mask; + output[18 + outputOffset] = ((intValue12 >>> 30) | (intValue13 << 2)) & mask; + output[19 + outputOffset] = ((intValue13 >>> 21) | (intValue14 << 11)) & mask; + output[20 + outputOffset] = ((intValue14 >>> 12) | (intValue15 << 20)) & mask; + output[21 + outputOffset] = (intValue15 >>> 3) & mask; + output[22 + outputOffset] = ((intValue15 >>> 26) | (intValue16 << 6)) & mask; + output[23 + outputOffset] = ((intValue16 >>> 17) | (intValue17 << 15)) & mask; + output[24 + outputOffset] = (intValue17 >>> 8) & mask; + output[25 + outputOffset] = ((intValue17 >>> 31) | (intValue18 << 1)) & mask; + output[26 + outputOffset] = ((intValue18 >>> 22) | (intValue19 << 10)) & mask; + output[27 + outputOffset] = ((intValue19 >>> 13) | (intValue20 << 19)) & mask; + output[28 + outputOffset] = (intValue20 >>> 4) & mask; + output[29 + outputOffset] = ((intValue20 >>> 27) | (intValue21 << 5)) & mask; + output[30 + outputOffset] = ((intValue21 >>> 18) | (intValue22 << 14)) & mask; + output[31 + outputOffset] = intValue22 >>> 9; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For24Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For24Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,95 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For24Decompress extends ForDecompress { + static final int numFrameBits = 24; + static final int mask = (int) ((1L<>> 24) | (intValue1 << 8)) & mask; + output[2 + outputOffset] = ((intValue1 >>> 16) | (intValue2 << 16)) & mask; + output[3 + outputOffset] = intValue2 >>> 8; + output[4 + outputOffset] = intValue3 & mask; + output[5 + outputOffset] = ((intValue3 >>> 24) | (intValue4 << 8)) & mask; + output[6 + outputOffset] = ((intValue4 >>> 16) | (intValue5 << 16)) & mask; + output[7 + outputOffset] = intValue5 >>> 8; + output[8 + outputOffset] = intValue6 & mask; + output[9 + outputOffset] = ((intValue6 >>> 24) | (intValue7 << 8)) & mask; + output[10 + outputOffset] = ((intValue7 >>> 16) | (intValue8 << 16)) & mask; + output[11 + outputOffset] = intValue8 >>> 8; + output[12 + outputOffset] = intValue9 & mask; + output[13 + outputOffset] = ((intValue9 >>> 24) | (intValue10 << 8)) & mask; + output[14 + outputOffset] = ((intValue10 >>> 16) | (intValue11 << 16)) & mask; + output[15 + outputOffset] = intValue11 >>> 8; + output[16 + outputOffset] = intValue12 & mask; + output[17 + outputOffset] = ((intValue12 >>> 24) | (intValue13 << 8)) & mask; + output[18 + outputOffset] = ((intValue13 >>> 16) | (intValue14 << 16)) & mask; + output[19 + outputOffset] = intValue14 >>> 8; + output[20 + outputOffset] = intValue15 & mask; + output[21 + outputOffset] = ((intValue15 >>> 24) | (intValue16 << 8)) & mask; + output[22 + outputOffset] = ((intValue16 >>> 16) | (intValue17 << 16)) & mask; + output[23 + outputOffset] = intValue17 >>> 8; + output[24 + outputOffset] = intValue18 & mask; + output[25 + outputOffset] = ((intValue18 >>> 24) | (intValue19 << 8)) & mask; + output[26 + outputOffset] = ((intValue19 >>> 16) | (intValue20 << 16)) & mask; + output[27 + outputOffset] = intValue20 >>> 8; + output[28 + outputOffset] = intValue21 & mask; + output[29 + outputOffset] = ((intValue21 >>> 24) | (intValue22 << 8)) & mask; + output[30 + outputOffset] = ((intValue22 >>> 16) | (intValue23 << 16)) & mask; + output[31 + outputOffset] = intValue23 >>> 8; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For25Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For25Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,96 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For25Decompress extends ForDecompress { + static final int numFrameBits = 25; + static final int mask = (int) ((1L<>> 25) | (intValue1 << 7)) & mask; + output[2 + outputOffset] = ((intValue1 >>> 18) | (intValue2 << 14)) & mask; + output[3 + outputOffset] = ((intValue2 >>> 11) | (intValue3 << 21)) & mask; + output[4 + outputOffset] = (intValue3 >>> 4) & mask; + output[5 + outputOffset] = ((intValue3 >>> 29) | (intValue4 << 3)) & mask; + output[6 + outputOffset] = ((intValue4 >>> 22) | (intValue5 << 10)) & mask; + output[7 + outputOffset] = ((intValue5 >>> 15) | (intValue6 << 17)) & mask; + output[8 + outputOffset] = ((intValue6 >>> 8) | (intValue7 << 24)) & mask; + output[9 + outputOffset] = (intValue7 >>> 1) & mask; + output[10 + outputOffset] = ((intValue7 >>> 26) | (intValue8 << 6)) & mask; + output[11 + outputOffset] = ((intValue8 >>> 19) | (intValue9 << 13)) & mask; + output[12 + outputOffset] = ((intValue9 >>> 12) | (intValue10 << 20)) & mask; + output[13 + outputOffset] = (intValue10 >>> 5) & mask; + output[14 + outputOffset] = ((intValue10 >>> 30) | (intValue11 << 2)) & mask; + output[15 + outputOffset] = ((intValue11 >>> 23) | (intValue12 << 9)) & mask; + output[16 + outputOffset] = ((intValue12 >>> 16) | (intValue13 << 16)) & mask; + output[17 + outputOffset] = ((intValue13 >>> 9) | (intValue14 << 23)) & mask; + output[18 + outputOffset] = (intValue14 >>> 2) & mask; + output[19 + outputOffset] = ((intValue14 >>> 27) | (intValue15 << 5)) & mask; + output[20 + outputOffset] = ((intValue15 >>> 20) | (intValue16 << 12)) & mask; + output[21 + outputOffset] = ((intValue16 >>> 13) | (intValue17 << 19)) & mask; + output[22 + outputOffset] = (intValue17 >>> 6) & mask; + output[23 + outputOffset] = ((intValue17 >>> 31) | (intValue18 << 1)) & mask; + output[24 + outputOffset] = ((intValue18 >>> 24) | (intValue19 << 8)) & mask; + output[25 + outputOffset] = ((intValue19 >>> 17) | (intValue20 << 15)) & mask; + output[26 + outputOffset] = ((intValue20 >>> 10) | (intValue21 << 22)) & mask; + output[27 + outputOffset] = (intValue21 >>> 3) & mask; + output[28 + outputOffset] = ((intValue21 >>> 28) | (intValue22 << 4)) & mask; + output[29 + outputOffset] = ((intValue22 >>> 21) | (intValue23 << 11)) & mask; + output[30 + outputOffset] = ((intValue23 >>> 14) | (intValue24 << 18)) & mask; + output[31 + outputOffset] = intValue24 >>> 7; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For26Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For26Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,97 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For26Decompress extends ForDecompress { + static final int numFrameBits = 26; + static final int mask = (int) ((1L<>> 26) | (intValue1 << 6)) & mask; + output[2 + outputOffset] = ((intValue1 >>> 20) | (intValue2 << 12)) & mask; + output[3 + outputOffset] = ((intValue2 >>> 14) | (intValue3 << 18)) & mask; + output[4 + outputOffset] = ((intValue3 >>> 8) | (intValue4 << 24)) & mask; + output[5 + outputOffset] = (intValue4 >>> 2) & mask; + output[6 + outputOffset] = ((intValue4 >>> 28) | (intValue5 << 4)) & mask; + output[7 + outputOffset] = ((intValue5 >>> 22) | (intValue6 << 10)) & mask; + output[8 + outputOffset] = ((intValue6 >>> 16) | (intValue7 << 16)) & mask; + output[9 + outputOffset] = ((intValue7 >>> 10) | (intValue8 << 22)) & mask; + output[10 + outputOffset] = (intValue8 >>> 4) & mask; + output[11 + outputOffset] = ((intValue8 >>> 30) | (intValue9 << 2)) & mask; + output[12 + outputOffset] = ((intValue9 >>> 24) | (intValue10 << 8)) & mask; + output[13 + outputOffset] = ((intValue10 >>> 18) | (intValue11 << 14)) & mask; + output[14 + outputOffset] = ((intValue11 >>> 12) | (intValue12 << 20)) & mask; + output[15 + outputOffset] = intValue12 >>> 6; + output[16 + outputOffset] = intValue13 & mask; + output[17 + outputOffset] = ((intValue13 >>> 26) | (intValue14 << 6)) & mask; + output[18 + outputOffset] = ((intValue14 >>> 20) | (intValue15 << 12)) & mask; + output[19 + outputOffset] = ((intValue15 >>> 14) | (intValue16 << 18)) & mask; + output[20 + outputOffset] = ((intValue16 >>> 8) | (intValue17 << 24)) & mask; + output[21 + outputOffset] = (intValue17 >>> 2) & mask; + output[22 + outputOffset] = ((intValue17 >>> 28) | (intValue18 << 4)) & mask; + output[23 + outputOffset] = ((intValue18 >>> 22) | (intValue19 << 10)) & mask; + output[24 + outputOffset] = ((intValue19 >>> 16) | (intValue20 << 16)) & mask; + output[25 + outputOffset] = ((intValue20 >>> 10) | (intValue21 << 22)) & mask; + output[26 + outputOffset] = (intValue21 >>> 4) & mask; + output[27 + outputOffset] = ((intValue21 >>> 30) | (intValue22 << 2)) & mask; + output[28 + outputOffset] = ((intValue22 >>> 24) | (intValue23 << 8)) & mask; + output[29 + outputOffset] = ((intValue23 >>> 18) | (intValue24 << 14)) & mask; + output[30 + outputOffset] = ((intValue24 >>> 12) | (intValue25 << 20)) & mask; + output[31 + outputOffset] = intValue25 >>> 6; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For27Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For27Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,98 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For27Decompress extends ForDecompress { + static final int numFrameBits = 27; + static final int mask = (int) ((1L<>> 27) | (intValue1 << 5)) & mask; + output[2 + outputOffset] = ((intValue1 >>> 22) | (intValue2 << 10)) & mask; + output[3 + outputOffset] = ((intValue2 >>> 17) | (intValue3 << 15)) & mask; + output[4 + outputOffset] = ((intValue3 >>> 12) | (intValue4 << 20)) & mask; + output[5 + outputOffset] = ((intValue4 >>> 7) | (intValue5 << 25)) & mask; + output[6 + outputOffset] = (intValue5 >>> 2) & mask; + output[7 + outputOffset] = ((intValue5 >>> 29) | (intValue6 << 3)) & mask; + output[8 + outputOffset] = ((intValue6 >>> 24) | (intValue7 << 8)) & mask; + output[9 + outputOffset] = ((intValue7 >>> 19) | (intValue8 << 13)) & mask; + output[10 + outputOffset] = ((intValue8 >>> 14) | (intValue9 << 18)) & mask; + output[11 + outputOffset] = ((intValue9 >>> 9) | (intValue10 << 23)) & mask; + output[12 + outputOffset] = (intValue10 >>> 4) & mask; + output[13 + outputOffset] = ((intValue10 >>> 31) | (intValue11 << 1)) & mask; + output[14 + outputOffset] = ((intValue11 >>> 26) | (intValue12 << 6)) & mask; + output[15 + outputOffset] = ((intValue12 >>> 21) | (intValue13 << 11)) & mask; + output[16 + outputOffset] = ((intValue13 >>> 16) | (intValue14 << 16)) & mask; + output[17 + outputOffset] = ((intValue14 >>> 11) | (intValue15 << 21)) & mask; + output[18 + outputOffset] = ((intValue15 >>> 6) | (intValue16 << 26)) & mask; + output[19 + outputOffset] = (intValue16 >>> 1) & mask; + output[20 + outputOffset] = ((intValue16 >>> 28) | (intValue17 << 4)) & mask; + output[21 + outputOffset] = ((intValue17 >>> 23) | (intValue18 << 9)) & mask; + output[22 + outputOffset] = ((intValue18 >>> 18) | (intValue19 << 14)) & mask; + output[23 + outputOffset] = ((intValue19 >>> 13) | (intValue20 << 19)) & mask; + output[24 + outputOffset] = ((intValue20 >>> 8) | (intValue21 << 24)) & mask; + output[25 + outputOffset] = (intValue21 >>> 3) & mask; + output[26 + outputOffset] = ((intValue21 >>> 30) | (intValue22 << 2)) & mask; + output[27 + outputOffset] = ((intValue22 >>> 25) | (intValue23 << 7)) & mask; + output[28 + outputOffset] = ((intValue23 >>> 20) | (intValue24 << 12)) & mask; + output[29 + outputOffset] = ((intValue24 >>> 15) | (intValue25 << 17)) & mask; + output[30 + outputOffset] = ((intValue25 >>> 10) | (intValue26 << 22)) & mask; + output[31 + outputOffset] = intValue26 >>> 5; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For28Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For28Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,99 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For28Decompress extends ForDecompress { + static final int numFrameBits = 28; + static final int mask = (int) ((1L<>> 28) | (intValue1 << 4)) & mask; + output[2 + outputOffset] = ((intValue1 >>> 24) | (intValue2 << 8)) & mask; + output[3 + outputOffset] = ((intValue2 >>> 20) | (intValue3 << 12)) & mask; + output[4 + outputOffset] = ((intValue3 >>> 16) | (intValue4 << 16)) & mask; + output[5 + outputOffset] = ((intValue4 >>> 12) | (intValue5 << 20)) & mask; + output[6 + outputOffset] = ((intValue5 >>> 8) | (intValue6 << 24)) & mask; + output[7 + outputOffset] = intValue6 >>> 4; + output[8 + outputOffset] = intValue7 & mask; + output[9 + outputOffset] = ((intValue7 >>> 28) | (intValue8 << 4)) & mask; + output[10 + outputOffset] = ((intValue8 >>> 24) | (intValue9 << 8)) & mask; + output[11 + outputOffset] = ((intValue9 >>> 20) | (intValue10 << 12)) & mask; + output[12 + outputOffset] = ((intValue10 >>> 16) | (intValue11 << 16)) & mask; + output[13 + outputOffset] = ((intValue11 >>> 12) | (intValue12 << 20)) & mask; + output[14 + outputOffset] = ((intValue12 >>> 8) | (intValue13 << 24)) & mask; + output[15 + outputOffset] = intValue13 >>> 4; + output[16 + outputOffset] = intValue14 & mask; + output[17 + outputOffset] = ((intValue14 >>> 28) | (intValue15 << 4)) & mask; + output[18 + outputOffset] = ((intValue15 >>> 24) | (intValue16 << 8)) & mask; + output[19 + outputOffset] = ((intValue16 >>> 20) | (intValue17 << 12)) & mask; + output[20 + outputOffset] = ((intValue17 >>> 16) | (intValue18 << 16)) & mask; + output[21 + outputOffset] = ((intValue18 >>> 12) | (intValue19 << 20)) & mask; + output[22 + outputOffset] = ((intValue19 >>> 8) | (intValue20 << 24)) & mask; + output[23 + outputOffset] = intValue20 >>> 4; + output[24 + outputOffset] = intValue21 & mask; + output[25 + outputOffset] = ((intValue21 >>> 28) | (intValue22 << 4)) & mask; + output[26 + outputOffset] = ((intValue22 >>> 24) | (intValue23 << 8)) & mask; + output[27 + outputOffset] = ((intValue23 >>> 20) | (intValue24 << 12)) & mask; + output[28 + outputOffset] = ((intValue24 >>> 16) | (intValue25 << 16)) & mask; + output[29 + outputOffset] = ((intValue25 >>> 12) | (intValue26 << 20)) & mask; + output[30 + outputOffset] = ((intValue26 >>> 8) | (intValue27 << 24)) & mask; + output[31 + outputOffset] = intValue27 >>> 4; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For29Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For29Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,100 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For29Decompress extends ForDecompress { + static final int numFrameBits = 29; + static final int mask = (int) ((1L<>> 29) | (intValue1 << 3)) & mask; + output[2 + outputOffset] = ((intValue1 >>> 26) | (intValue2 << 6)) & mask; + output[3 + outputOffset] = ((intValue2 >>> 23) | (intValue3 << 9)) & mask; + output[4 + outputOffset] = ((intValue3 >>> 20) | (intValue4 << 12)) & mask; + output[5 + outputOffset] = ((intValue4 >>> 17) | (intValue5 << 15)) & mask; + output[6 + outputOffset] = ((intValue5 >>> 14) | (intValue6 << 18)) & mask; + output[7 + outputOffset] = ((intValue6 >>> 11) | (intValue7 << 21)) & mask; + output[8 + outputOffset] = ((intValue7 >>> 8) | (intValue8 << 24)) & mask; + output[9 + outputOffset] = ((intValue8 >>> 5) | (intValue9 << 27)) & mask; + output[10 + outputOffset] = (intValue9 >>> 2) & mask; + output[11 + outputOffset] = ((intValue9 >>> 31) | (intValue10 << 1)) & mask; + output[12 + outputOffset] = ((intValue10 >>> 28) | (intValue11 << 4)) & mask; + output[13 + outputOffset] = ((intValue11 >>> 25) | (intValue12 << 7)) & mask; + output[14 + outputOffset] = ((intValue12 >>> 22) | (intValue13 << 10)) & mask; + output[15 + outputOffset] = ((intValue13 >>> 19) | (intValue14 << 13)) & mask; + output[16 + outputOffset] = ((intValue14 >>> 16) | (intValue15 << 16)) & mask; + output[17 + outputOffset] = ((intValue15 >>> 13) | (intValue16 << 19)) & mask; + output[18 + outputOffset] = ((intValue16 >>> 10) | (intValue17 << 22)) & mask; + output[19 + outputOffset] = ((intValue17 >>> 7) | (intValue18 << 25)) & mask; + output[20 + outputOffset] = ((intValue18 >>> 4) | (intValue19 << 28)) & mask; + output[21 + outputOffset] = (intValue19 >>> 1) & mask; + output[22 + outputOffset] = ((intValue19 >>> 30) | (intValue20 << 2)) & mask; + output[23 + outputOffset] = ((intValue20 >>> 27) | (intValue21 << 5)) & mask; + output[24 + outputOffset] = ((intValue21 >>> 24) | (intValue22 << 8)) & mask; + output[25 + outputOffset] = ((intValue22 >>> 21) | (intValue23 << 11)) & mask; + output[26 + outputOffset] = ((intValue23 >>> 18) | (intValue24 << 14)) & mask; + output[27 + outputOffset] = ((intValue24 >>> 15) | (intValue25 << 17)) & mask; + output[28 + outputOffset] = ((intValue25 >>> 12) | (intValue26 << 20)) & mask; + output[29 + outputOffset] = ((intValue26 >>> 9) | (intValue27 << 23)) & mask; + output[30 + outputOffset] = ((intValue27 >>> 6) | (intValue28 << 26)) & mask; + output[31 + outputOffset] = intValue28 >>> 3; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For2Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For2Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,73 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For2Decompress extends ForDecompress { + static final int numFrameBits = 2; + static final int mask = (int) ((1L<>> 2) & mask; + output[2 + outputOffset] = (intValue0 >>> 4) & mask; + output[3 + outputOffset] = (intValue0 >>> 6) & mask; + output[4 + outputOffset] = (intValue0 >>> 8) & mask; + output[5 + outputOffset] = (intValue0 >>> 10) & mask; + output[6 + outputOffset] = (intValue0 >>> 12) & mask; + output[7 + outputOffset] = (intValue0 >>> 14) & mask; + output[8 + outputOffset] = (intValue0 >>> 16) & mask; + output[9 + outputOffset] = (intValue0 >>> 18) & mask; + output[10 + outputOffset] = (intValue0 >>> 20) & mask; + output[11 + outputOffset] = (intValue0 >>> 22) & mask; + output[12 + outputOffset] = (intValue0 >>> 24) & mask; + output[13 + outputOffset] = (intValue0 >>> 26) & mask; + output[14 + outputOffset] = (intValue0 >>> 28) & mask; + output[15 + outputOffset] = intValue0 >>> 30; + output[16 + outputOffset] = intValue1 & mask; + output[17 + outputOffset] = (intValue1 >>> 2) & mask; + output[18 + outputOffset] = (intValue1 >>> 4) & mask; + output[19 + outputOffset] = (intValue1 >>> 6) & mask; + output[20 + outputOffset] = (intValue1 >>> 8) & mask; + output[21 + outputOffset] = (intValue1 >>> 10) & mask; + output[22 + outputOffset] = (intValue1 >>> 12) & mask; + output[23 + outputOffset] = (intValue1 >>> 14) & mask; + output[24 + outputOffset] = (intValue1 >>> 16) & mask; + output[25 + outputOffset] = (intValue1 >>> 18) & mask; + output[26 + outputOffset] = (intValue1 >>> 20) & mask; + output[27 + outputOffset] = (intValue1 >>> 22) & mask; + output[28 + outputOffset] = (intValue1 >>> 24) & mask; + output[29 + outputOffset] = (intValue1 >>> 26) & mask; + output[30 + outputOffset] = (intValue1 >>> 28) & mask; + output[31 + outputOffset] = intValue1 >>> 30; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For30Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For30Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,101 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For30Decompress extends ForDecompress { + static final int numFrameBits = 30; + static final int mask = (int) ((1L<>> 30) | (intValue1 << 2)) & mask; + output[2 + outputOffset] = ((intValue1 >>> 28) | (intValue2 << 4)) & mask; + output[3 + outputOffset] = ((intValue2 >>> 26) | (intValue3 << 6)) & mask; + output[4 + outputOffset] = ((intValue3 >>> 24) | (intValue4 << 8)) & mask; + output[5 + outputOffset] = ((intValue4 >>> 22) | (intValue5 << 10)) & mask; + output[6 + outputOffset] = ((intValue5 >>> 20) | (intValue6 << 12)) & mask; + output[7 + outputOffset] = ((intValue6 >>> 18) | (intValue7 << 14)) & mask; + output[8 + outputOffset] = ((intValue7 >>> 16) | (intValue8 << 16)) & mask; + output[9 + outputOffset] = ((intValue8 >>> 14) | (intValue9 << 18)) & mask; + output[10 + outputOffset] = ((intValue9 >>> 12) | (intValue10 << 20)) & mask; + output[11 + outputOffset] = ((intValue10 >>> 10) | (intValue11 << 22)) & mask; + output[12 + outputOffset] = ((intValue11 >>> 8) | (intValue12 << 24)) & mask; + output[13 + outputOffset] = ((intValue12 >>> 6) | (intValue13 << 26)) & mask; + output[14 + outputOffset] = ((intValue13 >>> 4) | (intValue14 << 28)) & mask; + output[15 + outputOffset] = intValue14 >>> 2; + output[16 + outputOffset] = intValue15 & mask; + output[17 + outputOffset] = ((intValue15 >>> 30) | (intValue16 << 2)) & mask; + output[18 + outputOffset] = ((intValue16 >>> 28) | (intValue17 << 4)) & mask; + output[19 + outputOffset] = ((intValue17 >>> 26) | (intValue18 << 6)) & mask; + output[20 + outputOffset] = ((intValue18 >>> 24) | (intValue19 << 8)) & mask; + output[21 + outputOffset] = ((intValue19 >>> 22) | (intValue20 << 10)) & mask; + output[22 + outputOffset] = ((intValue20 >>> 20) | (intValue21 << 12)) & mask; + output[23 + outputOffset] = ((intValue21 >>> 18) | (intValue22 << 14)) & mask; + output[24 + outputOffset] = ((intValue22 >>> 16) | (intValue23 << 16)) & mask; + output[25 + outputOffset] = ((intValue23 >>> 14) | (intValue24 << 18)) & mask; + output[26 + outputOffset] = ((intValue24 >>> 12) | (intValue25 << 20)) & mask; + output[27 + outputOffset] = ((intValue25 >>> 10) | (intValue26 << 22)) & mask; + output[28 + outputOffset] = ((intValue26 >>> 8) | (intValue27 << 24)) & mask; + output[29 + outputOffset] = ((intValue27 >>> 6) | (intValue28 << 26)) & mask; + output[30 + outputOffset] = ((intValue28 >>> 4) | (intValue29 << 28)) & mask; + output[31 + outputOffset] = intValue29 >>> 2; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For31Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For31Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,102 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For31Decompress extends ForDecompress { + static final int numFrameBits = 31; + static final int mask = (int) ((1L<>> 31) | (intValue1 << 1)) & mask; + output[2 + outputOffset] = ((intValue1 >>> 30) | (intValue2 << 2)) & mask; + output[3 + outputOffset] = ((intValue2 >>> 29) | (intValue3 << 3)) & mask; + output[4 + outputOffset] = ((intValue3 >>> 28) | (intValue4 << 4)) & mask; + output[5 + outputOffset] = ((intValue4 >>> 27) | (intValue5 << 5)) & mask; + output[6 + outputOffset] = ((intValue5 >>> 26) | (intValue6 << 6)) & mask; + output[7 + outputOffset] = ((intValue6 >>> 25) | (intValue7 << 7)) & mask; + output[8 + outputOffset] = ((intValue7 >>> 24) | (intValue8 << 8)) & mask; + output[9 + outputOffset] = ((intValue8 >>> 23) | (intValue9 << 9)) & mask; + output[10 + outputOffset] = ((intValue9 >>> 22) | (intValue10 << 10)) & mask; + output[11 + outputOffset] = ((intValue10 >>> 21) | (intValue11 << 11)) & mask; + output[12 + outputOffset] = ((intValue11 >>> 20) | (intValue12 << 12)) & mask; + output[13 + outputOffset] = ((intValue12 >>> 19) | (intValue13 << 13)) & mask; + output[14 + outputOffset] = ((intValue13 >>> 18) | (intValue14 << 14)) & mask; + output[15 + outputOffset] = ((intValue14 >>> 17) | (intValue15 << 15)) & mask; + output[16 + outputOffset] = ((intValue15 >>> 16) | (intValue16 << 16)) & mask; + output[17 + outputOffset] = ((intValue16 >>> 15) | (intValue17 << 17)) & mask; + output[18 + outputOffset] = ((intValue17 >>> 14) | (intValue18 << 18)) & mask; + output[19 + outputOffset] = ((intValue18 >>> 13) | (intValue19 << 19)) & mask; + output[20 + outputOffset] = ((intValue19 >>> 12) | (intValue20 << 20)) & mask; + output[21 + outputOffset] = ((intValue20 >>> 11) | (intValue21 << 21)) & mask; + output[22 + outputOffset] = ((intValue21 >>> 10) | (intValue22 << 22)) & mask; + output[23 + outputOffset] = ((intValue22 >>> 9) | (intValue23 << 23)) & mask; + output[24 + outputOffset] = ((intValue23 >>> 8) | (intValue24 << 24)) & mask; + output[25 + outputOffset] = ((intValue24 >>> 7) | (intValue25 << 25)) & mask; + output[26 + outputOffset] = ((intValue25 >>> 6) | (intValue26 << 26)) & mask; + output[27 + outputOffset] = ((intValue26 >>> 5) | (intValue27 << 27)) & mask; + output[28 + outputOffset] = ((intValue27 >>> 4) | (intValue28 << 28)) & mask; + output[29 + outputOffset] = ((intValue28 >>> 3) | (intValue29 << 29)) & mask; + output[30 + outputOffset] = ((intValue29 >>> 2) | (intValue30 << 30)) & mask; + output[31 + outputOffset] = intValue30 >>> 1; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For32Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For32Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,26 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +class For32Decompress extends ForDecompress { + static void decompressFrame(FrameOfRef frameOfRef) { + int oldBufPos = frameOfRef.compressedBuffer.position(); + frameOfRef.compressedBuffer.position(frameOfRef.COMPRESSED_INDEX); + frameOfRef.compressedBuffer.get(frameOfRef.unCompressedData, frameOfRef.offset, frameOfRef.unComprSize); + frameOfRef.compressedBuffer.position(oldBufPos); + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For3Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For3Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,74 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For3Decompress extends ForDecompress { + static final int numFrameBits = 3; + static final int mask = (int) ((1L<>> 3) & mask; + output[2 + outputOffset] = (intValue0 >>> 6) & mask; + output[3 + outputOffset] = (intValue0 >>> 9) & mask; + output[4 + outputOffset] = (intValue0 >>> 12) & mask; + output[5 + outputOffset] = (intValue0 >>> 15) & mask; + output[6 + outputOffset] = (intValue0 >>> 18) & mask; + output[7 + outputOffset] = (intValue0 >>> 21) & mask; + output[8 + outputOffset] = (intValue0 >>> 24) & mask; + output[9 + outputOffset] = (intValue0 >>> 27) & mask; + output[10 + outputOffset] = ((intValue0 >>> 30) | (intValue1 << 2)) & mask; + output[11 + outputOffset] = (intValue1 >>> 1) & mask; + output[12 + outputOffset] = (intValue1 >>> 4) & mask; + output[13 + outputOffset] = (intValue1 >>> 7) & mask; + output[14 + outputOffset] = (intValue1 >>> 10) & mask; + output[15 + outputOffset] = (intValue1 >>> 13) & mask; + output[16 + outputOffset] = (intValue1 >>> 16) & mask; + output[17 + outputOffset] = (intValue1 >>> 19) & mask; + output[18 + outputOffset] = (intValue1 >>> 22) & mask; + output[19 + outputOffset] = (intValue1 >>> 25) & mask; + output[20 + outputOffset] = (intValue1 >>> 28) & mask; + output[21 + outputOffset] = ((intValue1 >>> 31) | (intValue2 << 1)) & mask; + output[22 + outputOffset] = (intValue2 >>> 2) & mask; + output[23 + outputOffset] = (intValue2 >>> 5) & mask; + output[24 + outputOffset] = (intValue2 >>> 8) & mask; + output[25 + outputOffset] = (intValue2 >>> 11) & mask; + output[26 + outputOffset] = (intValue2 >>> 14) & mask; + output[27 + outputOffset] = (intValue2 >>> 17) & mask; + output[28 + outputOffset] = (intValue2 >>> 20) & mask; + output[29 + outputOffset] = (intValue2 >>> 23) & mask; + output[30 + outputOffset] = (intValue2 >>> 26) & mask; + output[31 + outputOffset] = intValue2 >>> 29; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For4Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For4Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,75 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For4Decompress extends ForDecompress { + static final int numFrameBits = 4; + static final int mask = (int) ((1L<>> 4) & mask; + output[2 + outputOffset] = (intValue0 >>> 8) & mask; + output[3 + outputOffset] = (intValue0 >>> 12) & mask; + output[4 + outputOffset] = (intValue0 >>> 16) & mask; + output[5 + outputOffset] = (intValue0 >>> 20) & mask; + output[6 + outputOffset] = (intValue0 >>> 24) & mask; + output[7 + outputOffset] = intValue0 >>> 28; + output[8 + outputOffset] = intValue1 & mask; + output[9 + outputOffset] = (intValue1 >>> 4) & mask; + output[10 + outputOffset] = (intValue1 >>> 8) & mask; + output[11 + outputOffset] = (intValue1 >>> 12) & mask; + output[12 + outputOffset] = (intValue1 >>> 16) & mask; + output[13 + outputOffset] = (intValue1 >>> 20) & mask; + output[14 + outputOffset] = (intValue1 >>> 24) & mask; + output[15 + outputOffset] = intValue1 >>> 28; + output[16 + outputOffset] = intValue2 & mask; + output[17 + outputOffset] = (intValue2 >>> 4) & mask; + output[18 + outputOffset] = (intValue2 >>> 8) & mask; + output[19 + outputOffset] = (intValue2 >>> 12) & mask; + output[20 + outputOffset] = (intValue2 >>> 16) & mask; + output[21 + outputOffset] = (intValue2 >>> 20) & mask; + output[22 + outputOffset] = (intValue2 >>> 24) & mask; + output[23 + outputOffset] = intValue2 >>> 28; + output[24 + outputOffset] = intValue3 & mask; + output[25 + outputOffset] = (intValue3 >>> 4) & mask; + output[26 + outputOffset] = (intValue3 >>> 8) & mask; + output[27 + outputOffset] = (intValue3 >>> 12) & mask; + output[28 + outputOffset] = (intValue3 >>> 16) & mask; + output[29 + outputOffset] = (intValue3 >>> 20) & mask; + output[30 + outputOffset] = (intValue3 >>> 24) & mask; + output[31 + outputOffset] = intValue3 >>> 28; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For5Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For5Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,76 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For5Decompress extends ForDecompress { + static final int numFrameBits = 5; + static final int mask = (int) ((1L<>> 5) & mask; + output[2 + outputOffset] = (intValue0 >>> 10) & mask; + output[3 + outputOffset] = (intValue0 >>> 15) & mask; + output[4 + outputOffset] = (intValue0 >>> 20) & mask; + output[5 + outputOffset] = (intValue0 >>> 25) & mask; + output[6 + outputOffset] = ((intValue0 >>> 30) | (intValue1 << 2)) & mask; + output[7 + outputOffset] = (intValue1 >>> 3) & mask; + output[8 + outputOffset] = (intValue1 >>> 8) & mask; + output[9 + outputOffset] = (intValue1 >>> 13) & mask; + output[10 + outputOffset] = (intValue1 >>> 18) & mask; + output[11 + outputOffset] = (intValue1 >>> 23) & mask; + output[12 + outputOffset] = ((intValue1 >>> 28) | (intValue2 << 4)) & mask; + output[13 + outputOffset] = (intValue2 >>> 1) & mask; + output[14 + outputOffset] = (intValue2 >>> 6) & mask; + output[15 + outputOffset] = (intValue2 >>> 11) & mask; + output[16 + outputOffset] = (intValue2 >>> 16) & mask; + output[17 + outputOffset] = (intValue2 >>> 21) & mask; + output[18 + outputOffset] = (intValue2 >>> 26) & mask; + output[19 + outputOffset] = ((intValue2 >>> 31) | (intValue3 << 1)) & mask; + output[20 + outputOffset] = (intValue3 >>> 4) & mask; + output[21 + outputOffset] = (intValue3 >>> 9) & mask; + output[22 + outputOffset] = (intValue3 >>> 14) & mask; + output[23 + outputOffset] = (intValue3 >>> 19) & mask; + output[24 + outputOffset] = (intValue3 >>> 24) & mask; + output[25 + outputOffset] = ((intValue3 >>> 29) | (intValue4 << 3)) & mask; + output[26 + outputOffset] = (intValue4 >>> 2) & mask; + output[27 + outputOffset] = (intValue4 >>> 7) & mask; + output[28 + outputOffset] = (intValue4 >>> 12) & mask; + output[29 + outputOffset] = (intValue4 >>> 17) & mask; + output[30 + outputOffset] = (intValue4 >>> 22) & mask; + output[31 + outputOffset] = intValue4 >>> 27; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For6Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For6Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,77 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For6Decompress extends ForDecompress { + static final int numFrameBits = 6; + static final int mask = (int) ((1L<>> 6) & mask; + output[2 + outputOffset] = (intValue0 >>> 12) & mask; + output[3 + outputOffset] = (intValue0 >>> 18) & mask; + output[4 + outputOffset] = (intValue0 >>> 24) & mask; + output[5 + outputOffset] = ((intValue0 >>> 30) | (intValue1 << 2)) & mask; + output[6 + outputOffset] = (intValue1 >>> 4) & mask; + output[7 + outputOffset] = (intValue1 >>> 10) & mask; + output[8 + outputOffset] = (intValue1 >>> 16) & mask; + output[9 + outputOffset] = (intValue1 >>> 22) & mask; + output[10 + outputOffset] = ((intValue1 >>> 28) | (intValue2 << 4)) & mask; + output[11 + outputOffset] = (intValue2 >>> 2) & mask; + output[12 + outputOffset] = (intValue2 >>> 8) & mask; + output[13 + outputOffset] = (intValue2 >>> 14) & mask; + output[14 + outputOffset] = (intValue2 >>> 20) & mask; + output[15 + outputOffset] = intValue2 >>> 26; + output[16 + outputOffset] = intValue3 & mask; + output[17 + outputOffset] = (intValue3 >>> 6) & mask; + output[18 + outputOffset] = (intValue3 >>> 12) & mask; + output[19 + outputOffset] = (intValue3 >>> 18) & mask; + output[20 + outputOffset] = (intValue3 >>> 24) & mask; + output[21 + outputOffset] = ((intValue3 >>> 30) | (intValue4 << 2)) & mask; + output[22 + outputOffset] = (intValue4 >>> 4) & mask; + output[23 + outputOffset] = (intValue4 >>> 10) & mask; + output[24 + outputOffset] = (intValue4 >>> 16) & mask; + output[25 + outputOffset] = (intValue4 >>> 22) & mask; + output[26 + outputOffset] = ((intValue4 >>> 28) | (intValue5 << 4)) & mask; + output[27 + outputOffset] = (intValue5 >>> 2) & mask; + output[28 + outputOffset] = (intValue5 >>> 8) & mask; + output[29 + outputOffset] = (intValue5 >>> 14) & mask; + output[30 + outputOffset] = (intValue5 >>> 20) & mask; + output[31 + outputOffset] = intValue5 >>> 26; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For7Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For7Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,78 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For7Decompress extends ForDecompress { + static final int numFrameBits = 7; + static final int mask = (int) ((1L<>> 7) & mask; + output[2 + outputOffset] = (intValue0 >>> 14) & mask; + output[3 + outputOffset] = (intValue0 >>> 21) & mask; + output[4 + outputOffset] = ((intValue0 >>> 28) | (intValue1 << 4)) & mask; + output[5 + outputOffset] = (intValue1 >>> 3) & mask; + output[6 + outputOffset] = (intValue1 >>> 10) & mask; + output[7 + outputOffset] = (intValue1 >>> 17) & mask; + output[8 + outputOffset] = (intValue1 >>> 24) & mask; + output[9 + outputOffset] = ((intValue1 >>> 31) | (intValue2 << 1)) & mask; + output[10 + outputOffset] = (intValue2 >>> 6) & mask; + output[11 + outputOffset] = (intValue2 >>> 13) & mask; + output[12 + outputOffset] = (intValue2 >>> 20) & mask; + output[13 + outputOffset] = ((intValue2 >>> 27) | (intValue3 << 5)) & mask; + output[14 + outputOffset] = (intValue3 >>> 2) & mask; + output[15 + outputOffset] = (intValue3 >>> 9) & mask; + output[16 + outputOffset] = (intValue3 >>> 16) & mask; + output[17 + outputOffset] = (intValue3 >>> 23) & mask; + output[18 + outputOffset] = ((intValue3 >>> 30) | (intValue4 << 2)) & mask; + output[19 + outputOffset] = (intValue4 >>> 5) & mask; + output[20 + outputOffset] = (intValue4 >>> 12) & mask; + output[21 + outputOffset] = (intValue4 >>> 19) & mask; + output[22 + outputOffset] = ((intValue4 >>> 26) | (intValue5 << 6)) & mask; + output[23 + outputOffset] = (intValue5 >>> 1) & mask; + output[24 + outputOffset] = (intValue5 >>> 8) & mask; + output[25 + outputOffset] = (intValue5 >>> 15) & mask; + output[26 + outputOffset] = (intValue5 >>> 22) & mask; + output[27 + outputOffset] = ((intValue5 >>> 29) | (intValue6 << 3)) & mask; + output[28 + outputOffset] = (intValue6 >>> 4) & mask; + output[29 + outputOffset] = (intValue6 >>> 11) & mask; + output[30 + outputOffset] = (intValue6 >>> 18) & mask; + output[31 + outputOffset] = intValue6 >>> 25; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For8Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For8Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,79 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For8Decompress extends ForDecompress { + static final int numFrameBits = 8; + static final int mask = (int) ((1L<>> 8) & mask; + output[2 + outputOffset] = (intValue0 >>> 16) & mask; + output[3 + outputOffset] = intValue0 >>> 24; + output[4 + outputOffset] = intValue1 & mask; + output[5 + outputOffset] = (intValue1 >>> 8) & mask; + output[6 + outputOffset] = (intValue1 >>> 16) & mask; + output[7 + outputOffset] = intValue1 >>> 24; + output[8 + outputOffset] = intValue2 & mask; + output[9 + outputOffset] = (intValue2 >>> 8) & mask; + output[10 + outputOffset] = (intValue2 >>> 16) & mask; + output[11 + outputOffset] = intValue2 >>> 24; + output[12 + outputOffset] = intValue3 & mask; + output[13 + outputOffset] = (intValue3 >>> 8) & mask; + output[14 + outputOffset] = (intValue3 >>> 16) & mask; + output[15 + outputOffset] = intValue3 >>> 24; + output[16 + outputOffset] = intValue4 & mask; + output[17 + outputOffset] = (intValue4 >>> 8) & mask; + output[18 + outputOffset] = (intValue4 >>> 16) & mask; + output[19 + outputOffset] = intValue4 >>> 24; + output[20 + outputOffset] = intValue5 & mask; + output[21 + outputOffset] = (intValue5 >>> 8) & mask; + output[22 + outputOffset] = (intValue5 >>> 16) & mask; + output[23 + outputOffset] = intValue5 >>> 24; + output[24 + outputOffset] = intValue6 & mask; + output[25 + outputOffset] = (intValue6 >>> 8) & mask; + output[26 + outputOffset] = (intValue6 >>> 16) & mask; + output[27 + outputOffset] = intValue6 >>> 24; + output[28 + outputOffset] = intValue7 & mask; + output[29 + outputOffset] = (intValue7 >>> 8) & mask; + output[30 + outputOffset] = (intValue7 >>> 16) & mask; + output[31 + outputOffset] = intValue7 >>> 24; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/For9Decompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/For9Decompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,80 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* This program is generated, do not modify. See gendecompress.py */ + +import java.nio.IntBuffer; +class For9Decompress extends ForDecompress { + static final int numFrameBits = 9; + static final int mask = (int) ((1L<>> 9) & mask; + output[2 + outputOffset] = (intValue0 >>> 18) & mask; + output[3 + outputOffset] = ((intValue0 >>> 27) | (intValue1 << 5)) & mask; + output[4 + outputOffset] = (intValue1 >>> 4) & mask; + output[5 + outputOffset] = (intValue1 >>> 13) & mask; + output[6 + outputOffset] = (intValue1 >>> 22) & mask; + output[7 + outputOffset] = ((intValue1 >>> 31) | (intValue2 << 1)) & mask; + output[8 + outputOffset] = (intValue2 >>> 8) & mask; + output[9 + outputOffset] = (intValue2 >>> 17) & mask; + output[10 + outputOffset] = ((intValue2 >>> 26) | (intValue3 << 6)) & mask; + output[11 + outputOffset] = (intValue3 >>> 3) & mask; + output[12 + outputOffset] = (intValue3 >>> 12) & mask; + output[13 + outputOffset] = (intValue3 >>> 21) & mask; + output[14 + outputOffset] = ((intValue3 >>> 30) | (intValue4 << 2)) & mask; + output[15 + outputOffset] = (intValue4 >>> 7) & mask; + output[16 + outputOffset] = (intValue4 >>> 16) & mask; + output[17 + outputOffset] = ((intValue4 >>> 25) | (intValue5 << 7)) & mask; + output[18 + outputOffset] = (intValue5 >>> 2) & mask; + output[19 + outputOffset] = (intValue5 >>> 11) & mask; + output[20 + outputOffset] = (intValue5 >>> 20) & mask; + output[21 + outputOffset] = ((intValue5 >>> 29) | (intValue6 << 3)) & mask; + output[22 + outputOffset] = (intValue6 >>> 6) & mask; + output[23 + outputOffset] = (intValue6 >>> 15) & mask; + output[24 + outputOffset] = ((intValue6 >>> 24) | (intValue7 << 8)) & mask; + output[25 + outputOffset] = (intValue7 >>> 1) & mask; + output[26 + outputOffset] = (intValue7 >>> 10) & mask; + output[27 + outputOffset] = (intValue7 >>> 19) & mask; + output[28 + outputOffset] = ((intValue7 >>> 28) | (intValue8 << 4)) & mask; + output[29 + outputOffset] = (intValue8 >>> 5) & mask; + output[30 + outputOffset] = (intValue8 >>> 14) & mask; + output[31 + outputOffset] = intValue8 >>> 23; + // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/ForDecompress.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/ForDecompress.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,56 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.nio.IntBuffer; + +/** PFor frame decompression for any number of frame bits. */ +class ForDecompress { + + static void decodeAnyFrame( + IntBuffer intBuffer, int bufIndex, int inputSize, int numFrameBits, + int[] output, int outputOffset) { + + assert numFrameBits > 0 : numFrameBits; + assert numFrameBits <= 32 : numFrameBits; + final int mask = (int) ((1L<>> bitPos) & mask; + if (--inputSize == 0) return; + bitPos += numFrameBits; + } + + int intValue2 = intBuffer.get(++bufIndex); + output[++outputOffset] = ( (bitPos == 32) + ? intValue2 + : ((intValue1 >>> bitPos) | (intValue2 << (32 - bitPos))) + ) & mask; + + if (--inputSize == 0) return; + + intValue1 = intValue2; + bitPos += numFrameBits - 32; + } while (true); + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/FrameOfRef.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/FrameOfRef.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,330 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.nio.IntBuffer; + +import org.apache.lucene.util.BitUtil; + +/** Frame of Reference lossless integer compression/decompression. + * For positive integers, the compression is done by leaving out + * the most significant bits, and storing all numbers with a fixed number of bits + * contiguously in a buffer of bits. This buffer is called the frame, and it + * can store positive numbers in a range from 0 to a constant maximum fitting in + * the number of bits available for a single compressed number. + *

+ * This implementation uses 0 as the lower bound reference for the frame, + * so small positive integers can be most effectively compressed. + *

+ * Optimized code is used for decompression, see class ForDecompress and its subclasses. + *
Use of the -server option helps performance for the Sun 1.6 jvm under Linux. + *

+ * This class does not provide delta coding because the Lucene index + * structures already have that. + *

+ * To be done: + *

    + *
  • + * Optimize compression code by specializing for number of frame bits. + *
  • + * IntBuffer.get() is somewhat faster that IntBuffer.get(index), adapt (de)compression to + * use the relative get() method. + *
  • + * Check javadoc generation and generated javadocs. Add javadoc code references. + *
+ */ + +// nocommit -- make a dedicated decompressor -- much of this +// is not needed for decompress: + +public class FrameOfRef { + /** Number of frame bits. 2**numFrameBits - 1 is the maximum compressed value. */ + protected int numFrameBits; + + /** Constant header tag to allow other compression methods, use value 0001 for + * Frame of reference. + * CHECKME: Move this to another class defining various decompression methods? + */ + //protected int compressionMethod; + private final int FOR_COMPRESSION = 1; /** encode compression method in header */ + + /** IntBuffer for compressed data */ + protected IntBuffer compressedBuffer; + + /** Index of header in int buffer */ + protected final int HEADER_INDEX = 0; + + /** Start index in int buffer of array integers each compressed to numFrameBits. */ + protected final int COMPRESSED_INDEX = HEADER_INDEX + 1; + protected final int HEADER_SIZE = 1; // one integer in IntBuffer + + // used by decompress to read the "input" (compressed) ints + protected int[] scratch = new int[32]; + + /** Uncompressed data */ + protected int[] unCompressedData; + /** Offset into unCompressedData */ + protected int offset; + /** Size of unCompressedData, -1 when not available. */ + protected int unComprSize = -1; + + /** Create a Frame of Reference integer compressor/decompressor. */ + public FrameOfRef() { + } + + /** Integer buffer to hold the compressed data.
+ * Compression and decompression do not affect the current buffer position, + * and the beginning of the compressed data should be or will be at the current + * buffer position.
+ * When the buffer is not large enough, ArrayIndexOutOfBoundExceptions will occur + * during compression/decompression.
+ * Without a buffer for compressed data, compress() will only determine the number + * of integers needed in the buffer, see compress().
+ * Without a valid buffer, decompress() will throw a NullPointerException.
+ * For optimal speed when the IntBuffer is a view on a ByteBuffer, + * the IntBuffer should have a byte offset of a multiple of 4 bytes, possibly 0.
+ * An IntBuffer is used here because 32 bits can efficiently accessed in the buffer + * on all current processors, and a positive int is normally large enough + * for various purposes in a Lucene index. + * + * @param compressedBuffer The buffer to hold the compressed integers. + * + */ + public void setCompressedBuffer(IntBuffer compressedBuffer) { + this.compressedBuffer = compressedBuffer; + } + + + /** Array with offset holding uncompressed data. + * @param unCompressedData The array holding uncompressed integers. + * @param offset offset in unCompressedData. + * @param unComprSize The number of uncompressed integers, should be at least 1. + */ + public void setUnCompressedData(int[] unCompressedData, int offset, int unComprSize) { + assert unCompressedData != null; + assert offset >= 0; + assert unComprSize >= 1; + assert (offset + unComprSize) <= unCompressedData.length; + this.unCompressedData = unCompressedData; + this.offset = offset; + this.unComprSize = unComprSize; + } + + /** Compress the uncompressed data into the buffer using the given number of + * frame bits, storing only this number of least significant bits of the + * uncompressed integers in the compressed buffer. + * Should only be used after setUnCompressedData(). + *
+ * When setCompressBuffer() was not done, no actual compression is done. + * Regardless of the use of setCompressBuffer(), bufferByteSize() will return + * a valid value after calling compress(). + *

+ * When a buffer is available, the following is done. + * A header is stored as a first integer into the buffer, encoding + * the compression method, the number of frame bits and the number of compressed integers. + * All uncompressed integers are stored sequentially in compressed form + * in the buffer after the header. + * + * @param numFrameBits The number of frame bits. Should be between 1 and 32. + * Note that when this value is 32, no compression occurs. + */ + public void compress(int numFrameBits) { + assert numFrameBits >= 1; + assert numFrameBits <= 32; + this.numFrameBits = numFrameBits; + encodeHeader(unComprSize); + for (int i = 0; i < unComprSize; i++) { + int v = unCompressedData[i + offset]; + encodeCompressedValue(i, v); + } + } + + /** As compress(), using the result of frameBitsForCompression() as the number of frame bits. */ + public void compress() { + compress( frameBitsForCompression()); + } + + /** Return the number of integers used in IntBuffer. + * Only valid after compress() or decompress(). + */ + public int compressedSize() { + return HEADER_SIZE + (unComprSize * numFrameBits + 31) / 32; + } + + /** Encode an integer value by compressing it into the buffer. + * @param compressedPos The index of the compressed integer in the compressed buffer. + * @param value The non negative value to be stored in compressed form. + * This should fit into the number of frame bits. + */ + protected void encodeCompressedValue(int compressedPos, int value) { + encodeCompressedValueBase(compressedPos, value, numFrameBits); // FIXME: inline private method. + } + + /** Encode a value into the compressed buffer. + * Since numFrameBits is always smaller than the number of bits in an int, + * at most two ints in the buffer will be affected. + *
Has no effect when compressedBuffer == null. + *
This could be specialized for numBits just like decompressFrame(). + */ + private void encodeCompressedValueBase(int compressedPos, int value, int numBits) { + assert numBits >= 1; + assert numBits <= 32; + if (compressedBuffer == null) { + return; + } + final int mask = (int) ((1L << numBits) - 1); + assert ((value & mask) == value) : ("value " + value + ", mask " + mask + ", numBits " + numBits); // lossless compression + final int compressedBitPos = numBits * compressedPos; + final int firstBitPosition = compressedBitPos & 31; + int intIndex = COMPRESSED_INDEX + (compressedBitPos >> 5); + setBufferIntBits(intIndex, firstBitPosition, numBits, value); + if ((firstBitPosition + numBits) > 32) { // value does not fit in first int + setBufferIntBits(intIndex+1, 0, (firstBitPosition + numBits - 32), (value >>> (32 - firstBitPosition))); + } + } + + /** Change bits of an integer in the compressed buffer. + *
A more efficient implementation is possible when the compressed + * buffer is known to contain only zero bits, in that case one mask operation can be removed. + * @param intIndex The index of the affected integer in the compressed buffer. + * @param firstBitPosition The position of the least significant bit to be changed. + * @param numBits The number of more significant bits to be changed. + * @param value The new value of the bits to be changed, with the least significant bit at position zero. + */ + protected void setBufferIntBits(int intIndex, int firstBitPosition, int numBits, int value) { + final int mask = (int) ((1L << numBits) - 1); + compressedBuffer.put(intIndex, + (compressedBuffer.get(intIndex) + & ~ (mask << firstBitPosition)) // masking superfluous on clear buffer + | (value << firstBitPosition)); + } + + /** The 4 byte header (32 bits) contains: + *

    + *
  • + *
      + *
    • 4 bits for the compression method: 0b0001 for FrameOfRef, + *
    • 4 bits unused, + *
    + *
  • + *
      + *
    • 5 bits for (numFrameBits-1), + *
    • 3 bit unused, + *
    + *
  • 8 bits for number of compressed integers - 1, + *
  • 8 bits unused. + *
+ */ + private void encodeHeader(int unComprSize) { + assert numFrameBits >= 1; + assert numFrameBits <= (1 << 5); // 32 + assert unComprSize >= 1; + assert unComprSize <= (1 << 8); // 256 + if (compressedBuffer != null) { + compressedBuffer.put(HEADER_INDEX, + ((unComprSize-1) << 16) + | ((numFrameBits-1) << 8) + | (FOR_COMPRESSION << 4)); + } + } + + protected void decodeHeader() { + int header = compressedBuffer.get(); + // nocommit -- we know this will always be fixed (eg + // 128)? silly to encode in every frame? + //unComprSize = ((header >>> 16) & 255) + 1; + numFrameBits = ((header >>> 8) & 31) + 1; + // verify compression method: + assert FOR_COMPRESSION == ((header >>> 4) & 15); + } + + /** Decompress from the buffer into output from a given offset. */ + public void decompress() { + decodeHeader(); + decompressFrame(); + } + + /** Return the number of integers available for decompression. + * Do not use before an IntBuffer was passed to setCompressBuffer. + */ + public int decompressedSize() { + decodeHeader(); + return unComprSize; + } + + /** For performance, this delegates to classes with fixed numFrameBits. */ + private void decompressFrame() { + switch (numFrameBits) { + // CHECKME: two other implementations might be faster: + // - array of static methods: Method[numFrameBits].invoke(null, [this]), + // - array of non static decompressors: ForDecompressor[numFrameBits].decompressFrame(this) . + case 1: For1Decompress.decompressFrame(this); break; + case 2: For2Decompress.decompressFrame(this); break; + case 3: For3Decompress.decompressFrame(this); break; + case 4: For4Decompress.decompressFrame(this); break; + case 5: For5Decompress.decompressFrame(this); break; + case 6: For6Decompress.decompressFrame(this); break; + case 7: For7Decompress.decompressFrame(this); break; + case 8: For8Decompress.decompressFrame(this); break; + case 9: For9Decompress.decompressFrame(this); break; + case 10: For10Decompress.decompressFrame(this); break; + case 11: For11Decompress.decompressFrame(this); break; + case 12: For12Decompress.decompressFrame(this); break; + case 13: For13Decompress.decompressFrame(this); break; + case 14: For14Decompress.decompressFrame(this); break; + case 15: For15Decompress.decompressFrame(this); break; + case 16: For16Decompress.decompressFrame(this); break; + case 17: For17Decompress.decompressFrame(this); break; + case 18: For18Decompress.decompressFrame(this); break; + case 19: For19Decompress.decompressFrame(this); break; + case 20: For20Decompress.decompressFrame(this); break; + case 21: For21Decompress.decompressFrame(this); break; + case 22: For22Decompress.decompressFrame(this); break; + case 23: For23Decompress.decompressFrame(this); break; + case 24: For24Decompress.decompressFrame(this); break; + case 25: For25Decompress.decompressFrame(this); break; + case 26: For26Decompress.decompressFrame(this); break; + case 27: For27Decompress.decompressFrame(this); break; + case 28: For28Decompress.decompressFrame(this); break; + case 29: For29Decompress.decompressFrame(this); break; + case 30: For30Decompress.decompressFrame(this); break; + case 31: For31Decompress.decompressFrame(this); break; + case 32: For32Decompress.decompressFrame(this); break; + default: + throw new IllegalStateException("Unknown number of frame bits " + numFrameBits); + } + } + + public int getNumFrameBits() { + return numFrameBits; + } + + /** Determine the number of frame bits to be used for compression. + * Use only after setUnCompressedData(). + * @return The number of bits needed to encode the maximum positive uncompressed value. + * Negative uncompressed values have no influence on the result. + */ + public int frameBitsForCompression() { + int maxNonNegVal = 0; + for (int i = offset; i < (offset + unComprSize); i++) { + if (unCompressedData[i] > maxNonNegVal) { + maxNonNegVal = unCompressedData[i]; + } + } + return BitUtil.logNextHigherPowerOfTwo(maxNonNegVal) + 1; + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/PFor.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/PFor.java Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,413 @@ +package org.apache.lucene.util.pfor; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; + +/** Patched Frame of Reference PFOR compression/decompression. + *

+ * As defined in:
+ * Super-Scalar RAM-CPU Cache Compression
+ * Marcin Zukowski, Sándor Héman, Niels Nes, Peter Boncz, 2006.
+ * with extensions from:
+ * Performance of Compressed Inverted List Caching in Search Engines
+ * Jiangong Zhang, Xiaohui Long, Torsten Suel, 2008.
+ *

+ * This class does not provide delta coding because the lucene index + * structures already have that. + *

+ * The implementation uses 0 as lower bound for the frame, + * so small positive integers will be most effectively compressed. + *

+ * Some optimized code is used for decompression, + * see class ForDecompress and its subclasses. + *
Good decompression performance will depend on the performance + * of java.nio.IntBuffer indexed get() methods. + *
Use of the -server option helps performance for the Sun 1.6 jvm under Linux. + *

+ * The start point of first exception is at its natural boundary: + * 2 byte exceptions at even byte position, 4 byte at quadruple. + *

+ * To be done: + *

    + *
  • + * Optimize compression code. + *
  • + * IntBuffer.get() is somewhat faster that IntBuffer.get(index), adapt (de)compression for to + * use the relative get() method. + *
  • + * Check javadoc generation and generated javadocs. Add javadoc code references. + *
+ */ +public class PFor extends FrameOfRef { + /** Index on input and in compressed frame of first exception, -1 when no exceptions */ + private int firstExceptionIndex; + + /** CHECKME: Move this to another class defining various decompression methods? */ + private final int PFOR_COMPRESSION = 2; /** to encode compression method in header */ + + /** How to encode PFor exceptions: 0: byte, 1: short, 2:int, unused: 3: long */ + private int exceptionCode = -1; + + /** Total number of exception values */ + private int numExceptions; + + /** Create a PFor compressor/decompressor. */ + public PFor() { + } + + /** Compress the decompressed data into the buffer. + * Should only be used after setUnCompressedData(). + *
+ * When setCompressBuffer() was not done, no actual compression is done. + * Regardless of the use of setCompressBuffer(), bufferByteSize() will return + * a valid value after calling compress(). + *

+ * When a buffer is available, the following is done. + * A header is stored into the buffer, encoding a.o. numFrameBits and unComprSize. + * All ints < 2**numFrameBits are stored sequentially in compressed form + * in the buffer. + * All other ints are stored in the buffer as exceptions after the compressed sequential ints, + * using 1, 2 or 4 bytes per exception, starting at the first byte after the compressed + * sequential ints. + *
+ * The index of the first exception is encoded in the header in the buffer, + * all later exceptions have the offset to the next exception as their value, + * the last one offset to just after the available input size. + * After the first exception, when the next exception index does not fit in + * numFrameBits bits, an exception after 2**numFrameBits inputs is forced and inserted. + *
+ * Exception values are stored in the order of the exceptions. + * The number of bytes used for an exception is also encoded in the header. + * This depends on the maximum exception value and does not vary between the exceptions. + */ + public void compress(int numFrameBits) { + assert numFrameBits >= 1; + assert numFrameBits <= 32; + this.numFrameBits = numFrameBits; + numExceptions = 0; + int maxException = -1; + firstExceptionIndex = -1; + int lastExceptionIndex = -1; + int i; + int[] exceptionValues = new int[unComprSize]; + int maxNonExceptionMask = (int) ((1L << numFrameBits) - 1); + int maxChain = 254; // maximum value of firstExceptionIndex in header + // CHECKME: maxChain 1 off because of initial value of lastExceptionIndex and force exception test below? + for (i = 0; i < unComprSize; i++) { + int v = unCompressedData[i + offset]; + // FIXME: split this loop to avoid if statement in loop. + // use predication for this: (someBool ? 1 : 0), and hope that the jit optimizes this. + if ( (((v & maxNonExceptionMask) == v) // no value exception + && (i < (lastExceptionIndex + maxChain)))) { // no forced exception + encodeCompressedValue(i, v); // normal encoding + } else { // exception + exceptionValues[numExceptions] = v; + numExceptions++; + if (firstExceptionIndex == -1) { + firstExceptionIndex = i; + assert firstExceptionIndex <= 254; // maximum value of firstExceptionIndex in header + maxException = v; + maxChain = 1 << ((30 < numFrameBits) ? 30 : numFrameBits); // fewer bits available for exception chain value. + } else if (v > maxException) { + maxException = v; + } + // encode the previous exception pointer + if (lastExceptionIndex >= 0) { + encodeCompressedValue(lastExceptionIndex, i - lastExceptionIndex - 1); + } + lastExceptionIndex = i; + } + } + if (lastExceptionIndex >= 0) { + encodeCompressedValue(lastExceptionIndex, i - lastExceptionIndex - 1); // end the exception chain. + } + int bitsInArray = numFrameBits * unCompressedData.length; + //int bytesInArray = (bitsInArray + 7) / 8; + if (maxException < (1 << 8)) { // exceptions as byte + exceptionCode = 0; + } else if (maxException < (1 << 16)) { // exceptions as 2 bytes + exceptionCode = 1; + } else /* if (maxException < (1L << 32)) */ { // exceptions as 4 bytes + exceptionCode = 2; + } + encodeHeader(unComprSize, firstExceptionIndex); + encodeExceptionValues(exceptionValues); + } + + /** Return the number bytes used for a single exception */ + private int exceptionByteSize() { + assert exceptionCode >= 0; + assert exceptionCode <= 2; + return exceptionCode == 0 ? 1 + : exceptionCode == 1 ? 2 + : 4; + } + + /** Return the number of exceptions. + * Only valid after compress() or decompress(). + */ + public int getNumExceptions() { + return numExceptions; + } + + private int compressedArrayByteSize() { + int compressedArrayBits = unComprSize * numFrameBits; + return (compressedArrayBits + 7) / 8; + } + + /** Return the number of integers used in IntBuffer. + * Only valid after compress() or decompress(). + */ + public int compressedSize() { + // numExceptions only valid after compress() or decompress() + return HEADER_SIZE + + ((compressedArrayByteSize() + + exceptionByteSize() * numExceptions + + 3) >> 2); // round up to next multiple of 4 and divide by 4 + } + + private void encodeExceptionValues(int[] exceptionValues) { + if ((compressedBuffer == null) || (numExceptions == 0)) { + return; + } + int excByteOffset = compressedArrayByteSize(); + + switch (exceptionCode) { + case 0: { // 1 byte exceptions + int i = 0; + do { + int intIndex = COMPRESSED_INDEX + (excByteOffset >> 2); // round down here. + setBufferIntBits(intIndex, ((excByteOffset & 3) * 8), 8, exceptionValues[i]); + excByteOffset++; + } while (++i < numExceptions); + } + break; + + case 1: { // 2 byte exceptions + int excShortOffset = (excByteOffset + 1) >> 1; // to next multiple of two bytes. + int intIndex = COMPRESSED_INDEX + (excShortOffset >> 1); // round down here. + int i = 0; + if ((excShortOffset & 1) != 0) { // encode first 2 byte exception in high 2 bytes of same int as last frame bits. + setBufferIntBits(intIndex, 16, 16, exceptionValues[i]); + intIndex++; + i++; + } + for (; i < (numExceptions-1); i += 2) { + compressedBuffer.put(intIndex++, (exceptionValues[i+1] << 16) | exceptionValues[i]); + } + if (i < numExceptions) { + compressedBuffer.put(intIndex, exceptionValues[i]); // also clear the high 16 bits + } + } + break; + + case 2: { // 4 byte exceptions + int excIntOffSet = COMPRESSED_INDEX + ((excByteOffset + 3) >> 2); // to next multiple of four bytes, in ints. + int i = 0; + do { + compressedBuffer.put(excIntOffSet + i, exceptionValues[i]); + } while(++i < numExceptions); + } + break; + } + } + + /** Decode the exception values while going through the exception chain. + *
For performance, delegate/subclass this to classes with fixed exceptionCode. + *
Also, decoding exceptions is preferably done from an int border instead of + * from a random byte directly after the compressed array. This will allow faster + * decoding of exceptions, at the cost of at most 3 bytes. + *
When ((numFrameBits * unComprSize) % 32) == 0, this cost will always be + * zero bytes so specialize for these cases. + */ + private void patchExceptions() { + numExceptions = 0; + if (firstExceptionIndex == -1) { + return; + } + int excIndex = firstExceptionIndex; + int excByteOffset = compressedArrayByteSize(); + int excValue; + int intIndex; + + switch (exceptionCode) { + case 0: { // 1 byte exceptions + do { + intIndex = COMPRESSED_INDEX + (excByteOffset >> 2); + int firstBitPosition = (excByteOffset & 3) << 3; + excValue = (compressedBuffer.get(intIndex) >>> firstBitPosition) & ((1 << 8) - 1); + excIndex = patch(excIndex, excValue); + excByteOffset++; + } while (excIndex < unComprSize); + } + break; + + case 1: { // 2 byte exceptions + int excShortOffset = (excByteOffset + 1) >> 1; // to next multiple of two bytes. + intIndex = COMPRESSED_INDEX + (excShortOffset >> 1); // round down here. + if ((excShortOffset & 1) != 0) { + // decode first 2 byte exception from high 2 bytes of same int as last frame bits. + excValue = compressedBuffer.get(intIndex++) >>> 16; + excIndex = patch(excIndex, excValue); + } + while (excIndex < unComprSize) { + excValue = compressedBuffer.get(intIndex) & ((1<<16)-1); + excIndex = patch(excIndex, excValue); + if (excIndex >= unComprSize) { + break; + } + excValue = compressedBuffer.get(intIndex++) >>> 16; + excIndex = patch(excIndex, excValue); + } + } + break; + + case 2: // 4 byte exceptions + intIndex = COMPRESSED_INDEX + ((excByteOffset + 3) >> 2); // to next multiple of four bytes, in ints. + do { + excValue = compressedBuffer.get(intIndex++); + excIndex = patch(excIndex, excValue); + } while (excIndex < unComprSize); + break; + } + } + + /** The 4 byte header (32 bits) contains: + * + * - 4 bits for the compression method: 0b0001 for PFor + * - 4 bits unused + * + * - 5 bits for (numFrameBits-1) + * - 2 bits for the exception code: 0b00: byte, 0b01: short, 0b10: int, 0b11: long (unused). + * - 1 bit unused + * + * - 8 bits for uncompressed input size - 1, + * + * - 8 bits for the index of the first exception + 1, (0 when no exceptions) + */ + private void encodeHeader(int unComprSize, int firstExceptionIndex) { + assert exceptionCode >= 0; + assert exceptionCode <= 2; // 3 for long, but unused for now. + assert numFrameBits >= 1; + assert numFrameBits <= 32; + assert unComprSize >= 1; + assert unComprSize <= 128; + assert firstExceptionIndex >= -1; + assert firstExceptionIndex < unComprSize; + if (compressedBuffer != null) { + compressedBuffer.put(HEADER_INDEX, + ((firstExceptionIndex+1) << 24) + | ((unComprSize-1) << 16) + | ((exceptionCode & 3) << 13) | ((numFrameBits-1) << 8) + | (PFOR_COMPRESSION << 4)); + } + } + + protected void decodeHeader() { + int header = compressedBuffer.get(); + firstExceptionIndex = ((header >>> 24) & 255) - 1; + //unComprSize = ((header >>> 16) & 255) + 1; + numFrameBits = ((header >>> 8) & 31) + 1; + assert numFrameBits > 0: numFrameBits; + assert numFrameBits <= 32: numFrameBits; + // verify compression method: + assert PFOR_COMPRESSION == ((header >>> 4) & 15); + exceptionCode = (header >>> 13) & 3; + assert exceptionCode <= 2; + } + + /** Decompress from the buffer into output from a given offset. */ + public void decompress() { + super.decompress(); + patchExceptions(); + } + + /** Patch and return index of next exception */ + private int patch(int excIndex, int excValue) { + int nextExceptionIndex = unCompressedData[excIndex] + excIndex + 1; // chain offset + unCompressedData[excIndex + offset] = excValue; // patch + assert nextExceptionIndex > excIndex; + numExceptions++; + return nextExceptionIndex; + } + + /** Determine the number of frame bits to be used for compression. + * Use only after setUnCompressedData(). + * This is done by taking a copy of the input, sorting it and using this + * to determine the compressed size for each possible numbits in a single pass, + * ignoring forced exceptions. + * Finally an estimation of the number of forced exceptions is reduced to + * less than 1 in 32 input numbers by increasing the number of frame bits. + * This implementation works by determining the total number of bytes needed for + * the compressed data, but does take into account alignment of exceptions + * at 2 or 4 byte boundaries. + */ + public int frameBitsForCompression() { + if ((offset + unComprSize) > unCompressedData.length) { + throw new IllegalArgumentException( "(offset " + offset + + " + unComprSize " + unComprSize + + ") > unCompressedData.length " + unCompressedData.length); + } + int copy[] = Arrays.copyOfRange(unCompressedData, offset, offset + unComprSize); + assert copy.length == unComprSize; + Arrays.sort(copy); + int maxValue = copy[copy.length-1]; + if (maxValue <= 1) { + return 1; + } + int bytesPerException = (maxValue < (1 << 8)) ? 1 : (maxValue < (1 << 16)) ? 2 : 4; + int frameBits = 1; + int bytesForFrame = (copy.length * frameBits + 7) / 8; + // initially assume all input is an exception. + int totalBytes = bytesForFrame + copy.length * bytesPerException; // excluding the header. + int bestBytes = totalBytes; + int bestFrameBits = frameBits; + int bestExceptions = copy.length; + for (int i = 0; i < copy.length; i++) { + // determine frameBits so that copy[i] is no more exception + while (copy[i] >= (1 << frameBits)) { + if (frameBits == 30) { // no point to increase further. + return bestFrameBits; + } + ++frameBits; + // increase bytesForFrame and totalBytes to correspond to frameBits + int newBytesForFrame = (copy.length * frameBits + 7) / 8; + totalBytes += newBytesForFrame - bytesForFrame; + bytesForFrame = newBytesForFrame; + } + totalBytes -= bytesPerException; // no more need to store copy[i] as exception + if (totalBytes <= bestBytes) { // <= : prefer fewer exceptions at higher number of frame bits. + bestBytes = totalBytes; + bestFrameBits = frameBits; + bestExceptions = (copy.length - i - 1); + } + } + if (bestExceptions > 0) { // check for forced exceptions. + // This ignores the position of the first exception, for which enough bits are available in the header. + int allowedNumExceptions = bestExceptions + (copy.length >> 5); // 1 in 32 is allowed to be forced. + // (copy.length >> bestFrameBits): Minimum exception chain size including forced ones, + // ignoring the position of the first exception. + while (allowedNumExceptions < (copy.length >> bestFrameBits)) { // Too many forced? + bestFrameBits++; // Reduce forced exceptions and perhaps reduce actual exceptions + // Dilemma: decompression speed reduces with increasing number of frame bits, + // so it may be better to increase no more than once or twice here. + } + } + return bestFrameBits; + } +} diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/gendecompress.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/gendecompress.py Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,121 @@ +""" + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +""" +Generate source code for java classes for FOR decompression. +""" + +USE_SCRATCH = False + +def bitsExpr(i, numFrameBits): + framePos = i * numFrameBits + intValNum = (framePos / 32) + bitPos = framePos % 32 + if USE_SCRATCH: + bitsInInt = "inputInts[" + str(intValNum) + "]" + else: + bitsInInt = "intValue" + str(intValNum) + needBrackets = 0 + if bitPos > 0: + bitsInInt += " >>> " + str(bitPos) + needBrackets = 1 + if bitPos + numFrameBits > 32: + if needBrackets: + bitsInInt = "(" + bitsInInt + ")" + if USE_SCRATCH: + bitsInInt += " | (inputInts[" + str(intValNum+1) + "] << "+ str(32 - bitPos) + ")" + else: + bitsInInt += " | (intValue" + str(intValNum+1) + " << "+ str(32 - bitPos) + ")" + needBrackets = 1 + if bitPos + numFrameBits != 32: + if needBrackets: + bitsInInt = "(" + bitsInInt + ")" + bitsInInt += " & mask" + return bitsInInt + + +def genDecompressClass(numFrameBits): + className = "For" + str(numFrameBits) + "Decompress" + fileName = className + ".java" + imports = "import java.nio.IntBuffer;\n" + f = open(fileName, 'w') + w = f.write + try: + w("package org.apache.lucene.util.pfor;\n") + w("""/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */""") + w("\n/* This program is generated, do not modify. See gendecompress.py */\n\n") + w("import java.nio.IntBuffer;\n") + w("class " + className + " extends ForDecompress {\n") + w(" static final int numFrameBits = " + str(numFrameBits) + ";\n") + w(" static final int mask = (int) ((1L<= 32) {\n") + w(' for(int step=0;step<4;step++) {\n') + if USE_SCRATCH: + w(' compressedBuffer.get(inputInts, 0, %d);\n' % numFrameBits) + else: + for i in range(numFrameBits): # declare int vars and init from buffer + w(" int intValue" + str(i) + " = compressedBuffer.get();\n") + + for i in range(32): # set output from int vars + w(" output[" + str(i) + " + outputOffset] = " + bitsExpr(i, numFrameBits) + ";\n") + w(""" // inputSize -= 32; + outputOffset += 32; + } + + //if (inputSize > 0) { + // decodeAnyFrame(compressedBuffer, bufIndex, inputSize, numFrameBits, output, outputOffset); + //} + } +} +""") + finally: f.close() + + + +def genDecompressClasses(): + numFrameBits = 1 + while numFrameBits <= 31: # 32 special case, not generated. + genDecompressClass(numFrameBits) + numFrameBits += 1 + + + +if __name__ == "__main__": + genDecompressClasses() diff -r e4226f9efcdc lucene/src/java/org/apache/lucene/util/pfor/package.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/java/org/apache/lucene/util/pfor/package.html Tue Nov 02 18:19:28 2010 -0400 @@ -0,0 +1,25 @@ + + + + + + + +Classes dealing with (patched) frame of reference compression and decompression. + + diff -r e4226f9efcdc lucene/src/test/org/apache/lucene/TestDemo.java --- a/lucene/src/test/org/apache/lucene/TestDemo.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/test/org/apache/lucene/TestDemo.java Tue Nov 02 18:19:28 2010 -0400 @@ -61,9 +61,10 @@ // Now search the index: IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fieldname", analyzer); + assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); // Parse a simple query that searches for "text": - QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fieldname", analyzer); Query query = parser.parse("text"); TopDocs hits = isearcher.search(query, null, 1); assertEquals(1, hits.totalHits); diff -r e4226f9efcdc lucene/src/test/org/apache/lucene/TestExternalCodecs.java --- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Tue Nov 02 18:19:28 2010 -0400 @@ -339,6 +339,114 @@ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) { return new RAMDocsAndPositionsEnum(ramField.termToDocs.get(current), skipDocs); } + + @Override + public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException { + return new RAMBulkPostingsEnum(ramField.termToDocs.get(current)); + } + } + + static final int BULK_BUFFER_SIZE = 64; + + // Bulk postings API + private static class RAMBulkPostingsEnum extends BulkPostingsEnum { + private final RAMTerm ramTerm; + private final BlockReader docDeltasReader; + private final BlockReader freqsReader; + private final BlockReader posDeltasReader; + + public RAMBulkPostingsEnum(RAMTerm ramTerm) throws IOException { + this.ramTerm = ramTerm; + + int[] docDeltas = new int[10]; + int[] freqs = new int[10]; + int[] posDeltas = new int[10]; + int docUpto = 0; + int posUpto = 0; + int lastDocID = 0; + for(RAMDoc doc : ramTerm.docs) { + if (docDeltas.length == docUpto) { + docDeltas = ArrayUtil.grow(docDeltas, 1+docUpto); + freqs = ArrayUtil.grow(freqs, 1+docUpto); + } + docDeltas[docUpto] = doc.docID - lastDocID; + freqs[docUpto] = doc.positions.length; + docUpto++; + lastDocID = doc.docID; + int lastPos = 0; + for(int pos : doc.positions) { + if (posDeltas.length == posUpto) { + posDeltas = ArrayUtil.grow(posDeltas, 1+posUpto); + } + posDeltas[posUpto++] = pos - lastPos; + lastPos = pos; + } + } + docDeltasReader = new SimpleBlockReader(docDeltas, docUpto); + freqsReader = new SimpleBlockReader(freqs, docUpto); + posDeltasReader = new SimpleBlockReader(posDeltas, posUpto); + } + + @Override + public BlockReader getDocDeltasReader() { + return docDeltasReader; + } + + @Override + public BlockReader getFreqsReader() { + return freqsReader; + } + + @Override + public BlockReader getPositionDeltasReader() { + return posDeltasReader; + } + + @Override + public JumpResult jump(int target, int curCount) { + return null; + } + + private static class SimpleBlockReader extends BlockReader { + private final int[] ints; + private final int count; + private boolean done; + + public SimpleBlockReader(int[] ints, int count) { + this.ints = ints; + this.count = count; + } + + @Override + public int[] getBuffer() { + return ints; + } + + @Override + public int fill() { + if (!done) { + done = true; + return count; + } else { + return 0; + } + } + + @Override + public int end() { + return done ? 0 : count; + } + + @Override + public int offset() { + return 0; + } + + @Override + public void setOffset(int offset) { + throw new UnsupportedOperationException(); + } + } } private static class RAMDocsEnum extends DocsEnum { diff -r e4226f9efcdc lucene/src/test/org/apache/lucene/index/TestIndexWriter.java --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Tue Nov 02 18:19:28 2010 -0400 @@ -5311,4 +5311,138 @@ _TestUtil.checkIndex(dir); dir.close(); } + + public void testGrowingGaps() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + //w.w.setInfoStream(System.out); + Document doc = new Document(); + Field f = newField(random, "field", "two", Field.Store.NO, Field.Index.ANALYZED); + doc.add(f); + final int NUM_GAPS = 100; + for(int i=0;i= docDeltaMax) { + docDeltaMax = docDeltasReader.fill(); + } + docID = 0; + for(int i=0;i 0 || i==0); + docID += docDeltas[docDeltaUpto++]; + assertEquals(docID, docIDs[i]); + } + + // nocommit test reuse too + // test jump using BulkPostingsEnum: + boolean didJump = false; + for(int i=0;i= docDeltaMax) { + docDeltaMax = docDeltasReader.fill(); + //System.out.println(" do pre-fill"); + } + for(int j=count;j 0); for(int i=0;i<11777;i++) { - assertEquals(i, r.next()); + assertEquals(i, buffer[pointer++]); + if (pointer == pointerMax) { + pointerMax = r.fill(); + assertTrue(pointerMax > 0); + pointer = 0; + } } in.close(); diff -r e4226f9efcdc lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java --- a/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java Tue Nov 02 16:09:05 2010 +0000 +++ b/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java Tue Nov 02 18:19:28 2010 -0400 @@ -81,11 +81,14 @@ public void seek(long pos) {} public int readBlock() throws IOException { buffer[0] = in.readVInt(); + //System.out.println("readBlock in=" + in + " fp=" + in.getFilePointer() + ":\n buffer[0]=" + buffer[0]); final int count = buffer[0] <= 3 ? baseBlockSize-1 : 2*baseBlockSize-1; assert buffer.length >= count: "buffer.length=" + buffer.length + " count=" + count; for(int i=0;i