Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt	(revision 1189604)
+++ lucene/CHANGES.txt	(working copy)
@@ -99,6 +99,9 @@
   FilteredQuery/IndexSearcher added by LUCENE-1536 to Lucene 4.0.
   (Uwe Schindler)
 
+* LUCENE-2205: Very substantial (3-5X) RAM reduction required to hold
+  the terms index on opening an IndexReader (Aaron McCurry via Mike McCandless)
+
 Test Cases
 
 * LUCENE-3420: Disable the finalness checks in TokenStream and Analyzer
Index: lucene/src/test/org/apache/lucene/index/TestTermInfosReaderIndex.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestTermInfosReaderIndex.java	(revision 0)
+++ lucene/src/test/org/apache/lucene/index/TestTermInfosReaderIndex.java	(revision 0)
@@ -0,0 +1,171 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestTermInfosReaderIndex extends LuceneTestCase {
+  
+  private static final int NUMBER_OF_DOCUMENTS = 1000;
+  private static final int NUMBER_OF_FIELDS = 100;
+  private TermInfosReaderIndex index;
+  private Directory directory;
+  private SegmentTermEnum termEnum;
+  private int indexDivisor;
+  private int termIndexInterval;
+  private int readBufferSize = 1024;
+  private IndexReader reader;
+  private List<Term> sampleTerms;
+  
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    indexDivisor = _TestUtil.nextInt(random, 1, 10);
+    directory = newDirectory();
+    termIndexInterval = populate(directory);
+    
+    SegmentReader r = SegmentReader.getOnlySegmentReader(directory);
+    String segment = r.getSegmentName();
+    r.close();
+
+    FieldInfos fieldInfos = new FieldInfos(directory, IndexFileNames.segmentFileName(segment, IndexFileNames.FIELD_INFOS_EXTENSION));
+    String segmentFileName = IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION);
+    long tiiFileLength = directory.fileLength(segmentFileName);
+    IndexInput input = directory.openInput(segmentFileName, readBufferSize);
+    termEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION), readBufferSize), fieldInfos, false);
+    int totalIndexInterval = termEnum.indexInterval * indexDivisor;
+    
+    SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true);
+    index = new TermInfosReaderIndex(indexEnum, indexDivisor, tiiFileLength, totalIndexInterval);
+    indexEnum.close();
+    input.close();
+    
+    reader = IndexReader.open(directory);
+    sampleTerms = sample(reader,1000);
+    
+  }
+  
+  @Override
+  public void tearDown() throws Exception {
+    termEnum.close();
+    reader.close();
+    directory.close();
+    super.tearDown();
+  }
+  
+  public void testSeekEnum() throws CorruptIndexException, IOException {
+    int indexPosition = 3;
+    SegmentTermEnum clone = (SegmentTermEnum) termEnum.clone();
+    Term term = findTermThatWouldBeAtIndex(clone, indexPosition);
+    clone.close();
+    SegmentTermEnum enumerator = clone;
+    index.seekEnum(enumerator, indexPosition);
+    assertEquals(term, enumerator.term());
+  }
+  
+  public void testCompareTo() throws IOException {
+    Term term = new Term("field" + random.nextInt(NUMBER_OF_FIELDS) ,getText());
+    BytesRef termBytesRef = new BytesRef(term.text);
+    for (int i = 0; i < index.length(); i++) {
+      Term t = index.getTerm(i);
+      int compareTo = term.compareTo(t);
+      assertEquals(compareTo, index.compareTo(term, termBytesRef, i));
+    }
+  }
+  
+  public void testRandomSearchPerformance() throws CorruptIndexException, IOException {
+    IndexSearcher searcher = new IndexSearcher(reader);
+    for (Term t : sampleTerms) {
+      TermQuery query = new TermQuery(t);
+      TopDocs topDocs = searcher.search(query, 10);
+      assertTrue(topDocs.totalHits > 0);
+    }
+    searcher.close();
+  }
+
+  private List<Term> sample(IndexReader reader, int size) throws IOException {
+    List<Term> sample = new ArrayList<Term>();
+    Random random = new Random();
+    TermEnum terms = reader.terms();
+    while (terms.next()) {
+      if (sample.size() >= size) {
+        int pos = random.nextInt(size);
+        sample.set(pos, terms.term());
+      } else {
+        sample.add(terms.term());
+      }
+    }
+    terms.close();
+    Collections.shuffle(sample);
+    return sample;
+  }
+
+  private Term findTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index) throws IOException {
+    int termPosition = index * termIndexInterval * indexDivisor;
+    for (int i = 0; i < termPosition; i++) {
+      if (!termEnum.next()) {
+        fail("Should not have run out of terms.");
+      }
+    }
+    return termEnum.term();
+  }
+
+  private int populate(Directory directory) throws CorruptIndexException, LockObtainFailedException, IOException {
+    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, 
+        new MockAnalyzer(random, MockTokenizer.KEYWORD, false));
+    // turn off compound file, this test will open some index files directly.
+    LogMergePolicy mp = newLogMergePolicy();
+    mp.setUseCompoundFile(false);
+    config.setMergePolicy(mp);
+
+    RandomIndexWriter writer = new RandomIndexWriter(random, directory, config);
+    for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++) {
+      Document document = new Document();
+      for (int f = 0; f < NUMBER_OF_FIELDS; f++) {
+        document.add(newField("field" + f,getText(),Store.NO,Index.NOT_ANALYZED_NO_NORMS));
+      }
+      writer.addDocument(document);
+    }
+    writer.optimize();
+    writer.close();
+    return config.getTermIndexInterval();
+  }
+  
+  private String getText() {
+    return Long.toString(random.nextLong(),Character.MAX_RADIX);
+  }
+}

Property changes on: lucene/src/test/org/apache/lucene/index/TestTermInfosReaderIndex.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/test/org/apache/lucene/util/TestPagedBytes.java
===================================================================
--- lucene/src/test/org/apache/lucene/util/TestPagedBytes.java	(revision 0)
+++ lucene/src/test/org/apache/lucene/util/TestPagedBytes.java	(revision 0)
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util;
+
+import java.util.Arrays;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
+public class TestPagedBytes extends LuceneTestCase {
+
+  public void testDataInputOutput() throws Exception {
+    for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) {
+      final PagedBytes p = new PagedBytes(_TestUtil.nextInt(random, 1, 20));
+      final DataOutput out = p.getDataOutput();
+      final int numBytes = random.nextInt(10000000);
+
+      final byte[] answer = new byte[numBytes];
+      random.nextBytes(answer);
+      int written = 0;
+      while(written < numBytes) {
+        if (random.nextInt(10) == 7) {
+          out.writeByte(answer[written++]);
+        } else {
+          int chunk = Math.max(random.nextInt(1000), numBytes - written);
+          out.writeBytes(answer, written, chunk);
+          written += chunk;
+        }
+      }
+
+      p.freeze(random.nextBoolean());
+
+      final DataInput in = p.getDataInput();
+
+      final byte[] verify = new byte[numBytes];
+      int read = 0;
+      while(read < numBytes) {
+        if (random.nextInt(10) == 7) {
+          verify[read++] = in.readByte();
+        } else {
+          int chunk = Math.max(random.nextInt(1000), numBytes - read);
+          in.readBytes(verify, read, chunk);
+          read += chunk;
+        }
+      }
+      assertTrue(Arrays.equals(answer, verify));
+    }
+  }
+}

Property changes on: lucene/src/test/org/apache/lucene/util/TestPagedBytes.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/index/TermInfosReaderIndex.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/TermInfosReaderIndex.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/index/TermInfosReaderIndex.java	(revision 0)
@@ -0,0 +1,251 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.util.BitUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PagedBytes.PagedBytesDataInput;
+import org.apache.lucene.util.PagedBytes.PagedBytesDataOutput;
+import org.apache.lucene.util.PagedBytes;
+import org.apache.lucene.util.packed.GrowableWriter;
+import org.apache.lucene.util.packed.PackedInts;
+
+/**
+ * This stores a monotonically increasing set of <Term, TermInfo> pairs in an
+ * index segment. Pairs are accessed either by Term or by ordinal position the
+ * set. The Terms and TermInfo are actually serialized and stored into a byte
+ * array and pointers to the position of each are stored in a int array.
+ */
+class TermInfosReaderIndex {
+
+  private static final int MAX_PAGE_BITS = 18; // 256 KB block
+  private Term[] fields;
+  private int totalIndexInterval;
+  private Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUTF16Comparator();
+  private final PagedBytesDataInput dataInput;
+  private final PackedInts.Reader indexToDataOffset;
+  private final int indexSize;
+  private final int skipInterval;
+
+  /**
+   * Loads the segment information at segment load time.
+   * 
+   * @param indexEnum
+   *          the term enum.
+   * @param indexDivisor
+   *          the index divisor.
+   * @param tiiFileLength
+   *          the size of the tii file, used to approximate the size of the
+   *          buffer.
+   * @param totalIndexInterval
+   *          the total index interval.
+   */
+  TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval) throws IOException {
+    this.totalIndexInterval = totalIndexInterval;
+    indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor;
+    skipInterval = indexEnum.skipInterval;
+    // this is only an inital size, it will be GCed once the build is complete
+    long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor;
+    PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize));
+    PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();
+
+    GrowableWriter indexToTerms = new GrowableWriter(4, indexSize, false);
+    String currentField = null;
+    List<String> fieldStrs = new ArrayList<String>();
+    int fieldCounter = -1;
+    for (int i = 0; indexEnum.next(); i++) {
+      Term term = indexEnum.term();
+      if (currentField != term.field) {
+        currentField = term.field;
+        fieldStrs.add(currentField);
+        fieldCounter++;
+      }
+      TermInfo termInfo = indexEnum.termInfo();
+      indexToTerms.set(i, dataOutput.getPosition());
+      dataOutput.writeVInt(fieldCounter);
+      dataOutput.writeString(term.text());
+      dataOutput.writeVInt(termInfo.docFreq);
+      if (termInfo.docFreq >= skipInterval) {
+        dataOutput.writeVInt(termInfo.skipOffset);
+      }
+      dataOutput.writeVLong(termInfo.freqPointer);
+      dataOutput.writeVLong(termInfo.proxPointer);
+      dataOutput.writeVLong(indexEnum.indexPointer);
+      for (int j = 1; j < indexDivisor; j++) {
+        if (!indexEnum.next()) {
+          break;
+        }
+      }
+    }
+
+    fields = new Term[fieldStrs.size()];
+    for (int i = 0; i < fields.length; i++) {
+      fields[i] = new Term(fieldStrs.get(i));
+    }
+    
+    dataPagedBytes.freeze(true);
+    dataInput = dataPagedBytes.getDataInput();
+    indexToDataOffset = indexToTerms.getMutable();
+  }
+
+  private static int estimatePageBits(long estSize) {
+    return Math.max(Math.min(64 - BitUtil.nlz(estSize), MAX_PAGE_BITS), 4);
+  }
+
+  void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
+    PagedBytesDataInput input = (PagedBytesDataInput) dataInput.clone();
+    
+    input.setPosition(indexToDataOffset.get(indexOffset));
+
+    // read the term
+    int fieldId = input.readVInt();
+    Term field = fields[fieldId];
+    Term term = field.createTerm(input.readString());
+
+    // read the terminfo
+    TermInfo termInfo = new TermInfo();
+    termInfo.docFreq = input.readVInt();
+    if (termInfo.docFreq >= skipInterval) {
+      termInfo.skipOffset = input.readVInt();
+    } else {
+      termInfo.skipOffset = 0;
+    }
+    termInfo.freqPointer = input.readVLong();
+    termInfo.proxPointer = input.readVLong();
+
+    long pointer = input.readVLong();
+
+    // perform the seek
+    enumerator.seek(pointer, ((long) indexOffset * totalIndexInterval) - 1, term, termInfo);
+  }
+
+  /**
+   * Binary search for the given term.
+   * 
+   * @param term
+   *          the term to locate.
+   * @throws IOException 
+   */
+  int getIndexOffset(Term term, BytesRef termBytesRef) throws IOException {
+    int lo = 0;
+    int hi = indexSize - 1;
+    PagedBytesDataInput input = (PagedBytesDataInput) dataInput.clone();
+    BytesRef scratch = new BytesRef();
+    while (hi >= lo) {
+      int mid = (lo + hi) >>> 1;
+      int delta = compareTo(term, termBytesRef, mid, input, scratch);
+      if (delta < 0)
+        hi = mid - 1;
+      else if (delta > 0)
+        lo = mid + 1;
+      else
+        return mid;
+    }
+    return hi;
+  }
+
+  /**
+   * Gets the term at the given position.  For testing.
+   * 
+   * @param termIndex
+   *          the position to read the term from the index.
+   * @return the term.
+   * @throws IOException
+   */
+  Term getTerm(int termIndex) throws IOException {
+    PagedBytesDataInput input = (PagedBytesDataInput) dataInput.clone();
+    input.setPosition(indexToDataOffset.get(termIndex));
+
+    // read the term
+    int fieldId = input.readVInt();
+    Term field = fields[fieldId];
+    return field.createTerm(input.readString());
+  }
+
+  /**
+   * Returns the number of terms.
+   * 
+   * @return int.
+   */
+  int length() {
+    return indexSize;
+  }
+
+  /**
+   * The compares the given term against the term in the index specified by the
+   * term index. ie It returns negative N when term is less than index term;
+   * 
+   * @param term
+   *          the given term.
+   * @param termIndex
+   *          the index of the of term to compare.
+   * @return int.
+   * @throws IOException 
+   */
+  int compareTo(Term term, BytesRef termBytesRef, int termIndex) throws IOException {
+    return compareTo(term, termBytesRef, termIndex, (PagedBytesDataInput) dataInput.clone(), new BytesRef());
+  }
+
+  /**
+   * Compare the fields of the terms first, and if not equals return from
+   * compare. If equal compare terms.
+   * 
+   * @param term
+   *          the term to compare.
+   * @param termIndex
+   *          the position of the term in the input to compare
+   * @param input
+   *          the input buffer.
+   * @return int.
+   * @throws IOException 
+   */
+  private int compareTo(Term term, BytesRef termBytesRef, int termIndex, PagedBytesDataInput input, BytesRef reuse) throws IOException {
+    // if term field does not equal mid's field index, then compare fields
+    // else if they are equal, compare term's string values...
+    int c = compareField(term, termIndex, input);
+    if (c == 0) {
+      reuse.length = input.readVInt();
+      reuse.grow(reuse.length);
+      input.readBytes(reuse.bytes, 0, reuse.length);
+      return comparator.compare(termBytesRef, reuse);
+    }
+    return c;
+  }
+
+  /**
+   * Compares the fields before checking the text of the terms.
+   * 
+   * @param term
+   *          the given term.
+   * @param termIndex
+   *          the term that exists in the data block.
+   * @param input
+   *          the data block.
+   * @return int.
+   * @throws IOException 
+   */
+  private int compareField(Term term, int termIndex, PagedBytesDataInput input) throws IOException {
+    input.setPosition(indexToDataOffset.get(termIndex));
+    return term.field.compareTo(fields[input.readVInt()].field);
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/index/TermInfosReaderIndex.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/index/TermInfosReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/TermInfosReader.java	(revision 1189604)
+++ lucene/src/java/org/apache/lucene/index/TermInfosReader.java	(working copy)
@@ -21,6 +21,7 @@
 import java.io.IOException;
 
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.DoubleBarrelLRUCache;
 import org.apache.lucene.util.CloseableThreadLocal;
 
@@ -37,9 +38,8 @@
   private final SegmentTermEnum origEnum;
   private final long size;
 
-  private final Term[] indexTerms;
-  private final TermInfo[] indexInfos;
-  private final long[] indexPointers;
+  private final TermInfosReaderIndex index;
+  private final int indexLength;
   
   private final int totalIndexInterval;
 
@@ -109,34 +109,20 @@
       if (indexDivisor != -1) {
         // Load terms index
         totalIndexInterval = origEnum.indexInterval * indexDivisor;
-        final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION),
+        final String indexFileName = IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION);
+        final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(indexFileName,
                                                                                   readBufferSize), fieldInfos, true);
-
         try {
-          int indexSize = 1+((int)indexEnum.size-1)/indexDivisor;  // otherwise read index
-
-          indexTerms = new Term[indexSize];
-          indexInfos = new TermInfo[indexSize];
-          indexPointers = new long[indexSize];
-        
-          for (int i = 0; indexEnum.next(); i++) {
-            indexTerms[i] = indexEnum.term();
-            indexInfos[i] = indexEnum.termInfo();
-            indexPointers[i] = indexEnum.indexPointer;
-        
-            for (int j = 1; j < indexDivisor; j++)
-              if (!indexEnum.next())
-                break;
-          }
+          index = new TermInfosReaderIndex(indexEnum, indexDivisor, (int) dir.fileLength(indexFileName), totalIndexInterval);
+          indexLength = index.length();
         } finally {
           indexEnum.close();
         }
       } else {
         // Do not load terms index:
         totalIndexInterval = -1;
-        indexTerms = null;
-        indexInfos = null;
-        indexPointers = null;
+        index = null;
+        indexLength = -1;
       }
       success = true;
     } finally {
@@ -180,38 +166,14 @@
     return resources;
   }
 
-
-  /** Returns the offset of the greatest index entry which is less than or equal to term.*/
-  private final int getIndexOffset(Term term) {
-    int lo = 0;					  // binary search indexTerms[]
-    int hi = indexTerms.length - 1;
-
-    while (hi >= lo) {
-      int mid = (lo + hi) >>> 1;
-      int delta = term.compareTo(indexTerms[mid]);
-      if (delta < 0)
-	hi = mid - 1;
-      else if (delta > 0)
-	lo = mid + 1;
-      else
-	return mid;
-    }
-    return hi;
-  }
-
-  private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
-    enumerator.seek(indexPointers[indexOffset],
-                   ((long) indexOffset * totalIndexInterval) - 1,
-                   indexTerms[indexOffset], indexInfos[indexOffset]);
-  }
-
   /** Returns the TermInfo for a Term in the set, or null. */
   TermInfo get(Term term) throws IOException {
-    return get(term, false);
+    BytesRef termBytesRef = new BytesRef(term.text);
+    return get(term, false, termBytesRef);
   }
   
   /** Returns the TermInfo for a Term in the set, or null. */
-  private TermInfo get(Term term, boolean mustSeekEnum) throws IOException {
+  private TermInfo get(Term term, boolean mustSeekEnum, BytesRef termBytesRef) throws IOException {
     if (size == 0) return null;
 
     ensureIndexIsRead();
@@ -231,8 +193,8 @@
 	&& ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
 	    || term.compareTo(enumerator.term()) >= 0)) {
       int enumOffset = (int)(enumerator.position/totalIndexInterval)+1;
-      if (indexTerms.length == enumOffset	  // but before end of block
-    || term.compareTo(indexTerms[enumOffset]) < 0) {
+      if (indexLength == enumOffset    // but before end of block
+    || index.compareTo(term,termBytesRef,enumOffset) < 0) {
        // no need to seek
 
         final TermInfo ti;
@@ -267,10 +229,10 @@
       indexPos = (int) (tiOrd.termOrd / totalIndexInterval);
     } else {
       // Must do binary search:
-      indexPos = getIndexOffset(term);
+      indexPos = index.getIndexOffset(term,termBytesRef);
     }
 
-    seekEnum(enumerator, indexPos);
+    index.seekEnum(enumerator, indexPos);
     enumerator.scanTo(term);
     final TermInfo ti;
     if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
@@ -307,7 +269,7 @@
   }
 
   private void ensureIndexIsRead() {
-    if (indexTerms == null) {
+    if (index == null) {
       throw new IllegalStateException("terms index was not loaded when this reader was created");
     }
   }
@@ -317,10 +279,11 @@
     if (size == 0) return -1;
 
     ensureIndexIsRead();
-    int indexOffset = getIndexOffset(term);
+    BytesRef termBytesRef = new BytesRef(term.text);
+    int indexOffset = index.getIndexOffset(term,termBytesRef);
     
     SegmentTermEnum enumerator = getThreadResources().termEnum;
-    seekEnum(enumerator, indexOffset);
+    index.seekEnum(enumerator, indexOffset);
 
     while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
 
@@ -337,7 +300,8 @@
 
   /** Returns an enumeration of terms starting at or after the named term. */
   public SegmentTermEnum terms(Term term) throws IOException {
-    get(term, true);
+    BytesRef termBytesRef = new BytesRef(term.text);
+    get(term, true, termBytesRef);
     return (SegmentTermEnum)getThreadResources().termEnum.clone();
   }
 }
Index: lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java
===================================================================
--- lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java	(revision 1189604)
+++ lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java	(working copy)
@@ -46,7 +46,7 @@
   public int getPosition() {
     return pos;
   }
-
+  
   public void reset(byte[] bytes, int offset, int len) {
     this.bytes = bytes;
     pos = offset;
Index: lucene/src/java/org/apache/lucene/util/PagedBytes.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/PagedBytes.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/util/PagedBytes.java	(revision 0)
@@ -0,0 +1,550 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexInput;
+
+/** Represents a logical byte[] as a series of pages.  You
+ *  can write-once into the logical byte[] (append only),
+ *  using copy, and then retrieve slices (BytesRef) into it
+ *  using fill.
+ *
+ * @lucene.internal
+ **/
+public final class PagedBytes {
+  private final List<byte[]> blocks = new ArrayList<byte[]>();
+  private final List<Integer> blockEnd = new ArrayList<Integer>();
+  private final int blockSize;
+  private final int blockBits;
+  private final int blockMask;
+  private boolean didSkipBytes;
+  private boolean frozen;
+  private int upto;
+  private byte[] currentBlock;
+
+  private static final byte[] EMPTY_BYTES = new byte[0];
+
+  public final static class Reader implements Closeable {
+    private final byte[][] blocks;
+    private final int[] blockEnds;
+    private final int blockBits;
+    private final int blockMask;
+    private final int blockSize;
+    private final CloseableThreadLocal<byte[]> threadBuffers = new CloseableThreadLocal<byte[]>();
+
+    public Reader(PagedBytes pagedBytes) {
+      blocks = new byte[pagedBytes.blocks.size()][];
+      for(int i=0;i<blocks.length;i++) {
+        blocks[i] = pagedBytes.blocks.get(i);
+      }
+      blockEnds = new int[blocks.length];
+      for(int i=0;i< blockEnds.length;i++) {
+        blockEnds[i] = pagedBytes.blockEnd.get(i);
+      }
+      blockBits = pagedBytes.blockBits;
+      blockMask = pagedBytes.blockMask;
+      blockSize = pagedBytes.blockSize;
+    }
+
+    /**
+     * Gets a slice out of {@link PagedBytes} starting at <i>start</i> with a
+     * given length. Iff the slice spans across a block border this method will
+     * allocate sufficient resources and copy the paged data.
+     * <p>
+     * Slices spanning more than one block are not supported.
+     * </p>
+     * @lucene.internal 
+     **/
+    public BytesRef fillSlice(BytesRef b, long start, int length) {
+      assert length >= 0: "length=" + length;
+      final int index = (int) (start >> blockBits);
+      final int offset = (int) (start & blockMask);
+      b.length = length;
+      if (blockSize - offset >= length) {
+        // Within block
+        b.bytes = blocks[index];
+        b.offset = offset;
+      } else {
+        // Split
+        byte[] buffer = threadBuffers.get();
+        if (buffer == null) {
+          buffer = new byte[length];
+          threadBuffers.set(buffer);
+        } else if (buffer.length < length) {
+          buffer = ArrayUtil.grow(buffer, length);
+          threadBuffers.set(buffer);
+        }
+        b.bytes = buffer;
+        b.offset = 0;
+        System.arraycopy(blocks[index], offset, buffer, 0, blockSize-offset);
+        System.arraycopy(blocks[1+index], 0, buffer, blockSize-offset, length-(blockSize-offset));
+      }
+      return b;
+    }
+    
+    /**
+     * Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i>.
+     * <p>
+     * <b>Note:</b> this method does not support slices spanning across block
+     * borders.
+     * </p>
+     * 
+     * @return the given {@link BytesRef}
+     * 
+     * @lucene.internal
+     **/
+    public BytesRef fill(BytesRef b, long start) {
+      final int index = (int) (start >> blockBits);
+      final int offset = (int) (start & blockMask);
+      final byte[] block = b.bytes = blocks[index];
+
+      if ((block[offset] & 128) == 0) {
+        b.length = block[offset];
+        b.offset = offset+1;
+      } else {
+        b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
+        b.offset = offset+2;
+        assert b.length > 0;
+      }
+      return b;
+    }
+
+    /**
+     * Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i>. *
+     * <p>
+     * <b>Note:</b> this method does not support slices spanning across block
+     * borders.
+     * </p>
+     * 
+     * @return the internal block number of the slice.
+     * @lucene.internal
+     **/
+    public int fillAndGetIndex(BytesRef b, long start) {
+      final int index = (int) (start >> blockBits);
+      final int offset = (int) (start & blockMask);
+      final byte[] block = b.bytes = blocks[index];
+
+      if ((block[offset] & 128) == 0) {
+        b.length = block[offset];
+        b.offset = offset+1;
+      } else {
+        b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
+        b.offset = offset+2;
+        assert b.length > 0;
+      }
+      return index;
+    }
+
+    /**
+     * Reads length as 1 or 2 byte vInt prefix, starting at <i>start</i> and
+     * returns the start offset of the next part, suitable as start parameter on
+     * next call to sequentially read all {@link BytesRef}.
+     * 
+     * <p>
+     * <b>Note:</b> this method does not support slices spanning across block
+     * borders.
+     * </p>
+     * 
+     * @return the start offset of the next part, suitable as start parameter on
+     *         next call to sequentially read all {@link BytesRef}.
+     * @lucene.internal
+     **/
+    public long fillAndGetStart(BytesRef b, long start) {
+      final int index = (int) (start >> blockBits);
+      final int offset = (int) (start & blockMask);
+      final byte[] block = b.bytes = blocks[index];
+
+      if ((block[offset] & 128) == 0) {
+        b.length = block[offset];
+        b.offset = offset+1;
+        start += 1L + b.length;
+      } else {
+        b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
+        b.offset = offset+2;
+        start += 2L + b.length;
+        assert b.length > 0;
+      }
+      return start;
+    }
+    
+  
+    /**
+     * Gets a slice out of {@link PagedBytes} starting at <i>start</i>, the
+     * length is read as 1 or 2 byte vInt prefix. Iff the slice spans across a
+     * block border this method will allocate sufficient resources and copy the
+     * paged data.
+     * <p>
+     * Slices spanning more than one block are not supported.
+     * </p>
+     * 
+     * @lucene.internal
+     **/
+    public BytesRef fillSliceWithPrefix(BytesRef b, long start) {
+      final int index = (int) (start >> blockBits);
+      int offset = (int) (start & blockMask);
+      final byte[] block = blocks[index];
+      final int length;
+      if ((block[offset] & 128) == 0) {
+        length = block[offset];
+        offset = offset+1;
+      } else {
+        length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
+        offset = offset+2;
+        assert length > 0;
+      }
+      assert length >= 0: "length=" + length;
+      b.length = length;
+      if (blockSize - offset >= length) {
+        // Within block
+        b.offset = offset;
+        b.bytes = blocks[index];
+      } else {
+        // Split
+        byte[] buffer = threadBuffers.get();
+        if (buffer == null) {
+          buffer = new byte[length];
+          threadBuffers.set(buffer);
+        } else if (buffer.length < length) {
+          buffer = ArrayUtil.grow(buffer, length);
+          threadBuffers.set(buffer);
+        }
+        b.bytes = buffer;
+        b.offset = 0;
+        System.arraycopy(blocks[index], offset, buffer, 0, blockSize-offset);
+        System.arraycopy(blocks[1+index], 0, buffer, blockSize-offset, length-(blockSize-offset));
+      }
+      return b;
+    }
+
+    /** @lucene.internal */
+    public byte[][] getBlocks() {
+      return blocks;
+    }
+
+    /** @lucene.internal */
+    public int[] getBlockEnds() {
+      return blockEnds;
+    }
+
+    public void close() {
+      threadBuffers.close();
+    }
+  }
+
+  /** 1<<blockBits must be bigger than biggest single
+   *  BytesRef slice that will be pulled */
+  public PagedBytes(int blockBits) {
+    this.blockSize = 1 << blockBits;
+    this.blockBits = blockBits;
+    blockMask = blockSize-1;
+    upto = blockSize;
+  }
+
+  /** Read this many bytes from in */
+  public void copy(IndexInput in, long byteCount) throws IOException {
+    while (byteCount > 0) {
+      int left = blockSize - upto;
+      if (left == 0) {
+        if (currentBlock != null) {
+          blocks.add(currentBlock);
+          blockEnd.add(upto);
+        }
+        currentBlock = new byte[blockSize];
+        upto = 0;
+        left = blockSize;
+      }
+      if (left < byteCount) {
+        in.readBytes(currentBlock, upto, left, false);
+        upto = blockSize;
+        byteCount -= left;
+      } else {
+        in.readBytes(currentBlock, upto, (int) byteCount, false);
+        upto += byteCount;
+        break;
+      }
+    }
+  }
+
+  /** Copy BytesRef in */
+  public void copy(BytesRef bytes) throws IOException {
+    int byteCount = bytes.length;
+    int bytesUpto = bytes.offset;
+    while (byteCount > 0) {
+      int left = blockSize - upto;
+      if (left == 0) {
+        if (currentBlock != null) {
+          blocks.add(currentBlock);
+          blockEnd.add(upto);
+        }
+        currentBlock = new byte[blockSize];
+        upto = 0;
+        left = blockSize;
+      }
+      if (left < byteCount) {
+        System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, left);
+        upto = blockSize;
+        byteCount -= left;
+        bytesUpto += left;
+      } else {
+        System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, byteCount);
+        upto += byteCount;
+        break;
+      }
+    }
+  }
+
+  /** Copy BytesRef in, setting BytesRef out to the result.
+   * Do not use this if you will use freeze(true).
+   * This only supports bytes.length <= blockSize */
+  public void copy(BytesRef bytes, BytesRef out) throws IOException {
+    int left = blockSize - upto;
+    if (bytes.length > left || currentBlock==null) {
+      if (currentBlock != null) {
+        blocks.add(currentBlock);
+        blockEnd.add(upto);
+        didSkipBytes = true;
+      }
+      currentBlock = new byte[blockSize];
+      upto = 0;
+      left = blockSize;
+      assert bytes.length <= blockSize;
+      // TODO: we could also support variable block sizes
+    }
+
+    out.bytes = currentBlock;
+    out.offset = upto;
+    out.length = bytes.length;
+
+    System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
+    upto += bytes.length;
+  }
+
+  /** Commits final byte[], trimming it if necessary and if trim=true */
+  public Reader freeze(boolean trim) {
+    if (frozen) {
+      throw new IllegalStateException("already frozen");
+    }
+    if (didSkipBytes) {
+      throw new IllegalStateException("cannot freeze when copy(BytesRef, BytesRef) was used");
+    }
+    if (trim && upto < blockSize) {
+      final byte[] newBlock = new byte[upto];
+      System.arraycopy(currentBlock, 0, newBlock, 0, upto);
+      currentBlock = newBlock;
+    }
+    if (currentBlock == null) {
+      currentBlock = EMPTY_BYTES;
+    }
+    blocks.add(currentBlock);
+    blockEnd.add(upto); 
+    frozen = true;
+    currentBlock = null;
+    return new Reader(this);
+  }
+
+  public long getPointer() {
+    if (currentBlock == null) {
+      return 0;
+    } else {
+      return (blocks.size() * ((long) blockSize)) + upto;
+    }
+  }
+
+  /** Copy bytes in, writing the length as a 1 or 2 byte
+   *  vInt prefix. */
+  public long copyUsingLengthPrefix(BytesRef bytes) throws IOException {
+
+    if (upto + bytes.length + 2 > blockSize) {
+      if (bytes.length + 2 > blockSize) {
+        throw new IllegalArgumentException("block size " + blockSize + " is too small to store length " + bytes.length + " bytes");
+      }
+      if (currentBlock != null) {
+        blocks.add(currentBlock);
+        blockEnd.add(upto);        
+      }
+      currentBlock = new byte[blockSize];
+      upto = 0;
+    }
+
+    final long pointer = getPointer();
+
+    if (bytes.length < 128) {
+      currentBlock[upto++] = (byte) bytes.length;
+    } else {
+      currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8));
+      currentBlock[upto++] = (byte) (bytes.length & 0xff);
+    }
+    System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
+    upto += bytes.length;
+
+    return pointer;
+  }
+
+  public final class PagedBytesDataInput extends DataInput {
+    private int currentBlockIndex;
+    private int currentBlockUpto;
+    private byte[] currentBlock;
+
+    PagedBytesDataInput() {
+      currentBlock = blocks.get(0);
+    }
+
+    @Override
+    public Object clone() {
+      PagedBytesDataInput clone = getDataInput();
+      clone.setPosition(getPosition());
+      return clone;
+    }
+
+    /** Returns the current byte position. */
+    public long getPosition() {
+      return currentBlockIndex * blockSize + currentBlockUpto;
+    }
+  
+    /** Seek to a position previously obtained from
+     *  {@link #getPosition}. */
+    public void setPosition(long pos) {
+      currentBlockIndex = (int) (pos >> blockBits);
+      currentBlock = blocks.get(currentBlockIndex);
+      currentBlockUpto = (int) (pos & blockMask);
+    }
+
+    @Override
+    public byte readByte() {
+      if (currentBlockUpto == blockSize) {
+        nextBlock();
+      }
+      return currentBlock[currentBlockUpto++];
+    }
+
+    @Override
+    public void readBytes(byte[] b, int offset, int len) {
+      final int offsetEnd = offset + len;
+      while (true) {
+        final int blockLeft = blockSize - currentBlockUpto;
+        final int left = offsetEnd - offset;
+        if (blockLeft < left) {
+          System.arraycopy(currentBlock, currentBlockUpto,
+                           b, offset,
+                           blockLeft);
+          nextBlock();
+          offset += blockLeft;
+        } else {
+          // Last block
+          System.arraycopy(currentBlock, currentBlockUpto,
+                           b, offset,
+                           left);
+          currentBlockUpto += left;
+          break;
+        }
+      }
+    }
+
+    private void nextBlock() {
+      currentBlockIndex++;
+      currentBlockUpto = 0;
+      currentBlock = blocks.get(currentBlockIndex);
+    }
+  }
+
+  public final class PagedBytesDataOutput extends DataOutput {
+    @Override
+    public void writeByte(byte b) {
+      if (upto == blockSize) {
+        if (currentBlock != null) {
+          blocks.add(currentBlock);
+          blockEnd.add(upto);
+        }
+        currentBlock = new byte[blockSize];
+        upto = 0;
+      }
+      currentBlock[upto++] = b;
+    }
+
+    @Override
+    public void writeBytes(byte[] b, int offset, int length) throws IOException {
+      if (length == 0) {
+        return;
+      }
+
+      if (upto == blockSize) {
+        if (currentBlock != null) {
+          blocks.add(currentBlock);
+          blockEnd.add(upto);
+        }
+        currentBlock = new byte[blockSize];
+        upto = 0;
+      }
+          
+      final int offsetEnd = offset + length;
+      while(true) {
+        final int left = offsetEnd - offset;
+        final int blockLeft = blockSize - upto;
+        if (blockLeft < left) {
+          System.arraycopy(b, offset, currentBlock, upto, blockLeft);
+          blocks.add(currentBlock);
+          blockEnd.add(blockSize);
+          currentBlock = new byte[blockSize];
+          upto = 0;
+          offset += blockLeft;
+        } else {
+          // Last block
+          System.arraycopy(b, offset, currentBlock, upto, left);
+          upto += left;
+          break;
+        }
+      }
+    }
+
+    /** Return the current byte position. */
+    public long getPosition() {
+      if (currentBlock == null) {
+        return 0;
+      } else {
+        return blocks.size() * blockSize + upto;
+      }
+    }
+  }
+
+  /** Returns a DataInput to read values from this
+   *  PagedBytes instance. */
+  public PagedBytesDataInput getDataInput() {
+    if (!frozen) {
+      throw new IllegalStateException("must call freeze() before getDataInput");
+    }
+    return new PagedBytesDataInput();
+  }
+
+  /** Returns a DataOutput that you may use to write into
+   *  this PagedBytes instance.  If you do this, you should
+   *  not call the other writing methods (eg, copy);
+   *  results are undefined. */
+  public PagedBytesDataOutput getDataOutput() {
+    if (frozen) {
+      throw new IllegalStateException("cannot get DataOutput after freeze()");
+    }
+    return new PagedBytesDataOutput();
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/util/PagedBytes.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/util/packed/GrowableWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/packed/GrowableWriter.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/util/packed/GrowableWriter.java	(revision 0)
@@ -0,0 +1,93 @@
+package org.apache.lucene.util.packed;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**     
+ * Implements {@link PackedInts.Mutable}, but grows the
+ * bit count of the underlying packed ints on-demand.
+ *
+ * <p>@lucene.internal</p>
+ */
+
+public class GrowableWriter implements PackedInts.Mutable {
+
+  private long currentMaxValue;
+  private PackedInts.Mutable current;
+  private final boolean roundFixedSize;
+
+  public GrowableWriter(int startBitsPerValue, int valueCount, boolean roundFixedSize) {
+    this.roundFixedSize = roundFixedSize;
+    current = PackedInts.getMutable(valueCount, getSize(startBitsPerValue));
+    currentMaxValue = PackedInts.maxValue(current.getBitsPerValue());
+  }
+
+  private final int getSize(int bpv) {
+    if (roundFixedSize) {
+      return PackedInts.getNextFixedSize(bpv);
+    } else {
+      return bpv;
+    }
+  }
+
+  public long get(int index) {
+    return current.get(index);
+  }
+
+  public int size() {
+    return current.size();
+  }
+
+  public int getBitsPerValue() {
+    return current.getBitsPerValue();
+  }
+
+  public PackedInts.Mutable getMutable() {
+    return current;
+  }
+
+  public void set(int index, long value) {
+    if (value >= currentMaxValue) {
+      int bpv = getBitsPerValue();
+      while(currentMaxValue <= value && currentMaxValue != Long.MAX_VALUE) {
+        bpv++;
+        currentMaxValue *= 2;
+      }
+      final int valueCount = size();
+      PackedInts.Mutable next = PackedInts.getMutable(valueCount, getSize(bpv));
+      for(int i=0;i<valueCount;i++) {
+        next.set(i, current.get(i));
+      }
+      current = next;
+      currentMaxValue = PackedInts.maxValue(current.getBitsPerValue());
+    }
+    current.set(index, value);
+  }
+
+  public void clear() {
+    current.clear();
+  }
+
+  public GrowableWriter resize(int newSize) {
+    GrowableWriter next = new GrowableWriter(getBitsPerValue(), newSize, roundFixedSize);
+    final int limit = Math.min(size(), newSize);
+    for(int i=0;i<limit;i++) {
+      next.set(i, get(i));
+    }
+    return next;
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/util/packed/GrowableWriter.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/util/packed/Direct32.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/packed/Direct32.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/util/packed/Direct32.java	(revision 0)
@@ -0,0 +1,87 @@
+package org.apache.lucene.util.packed;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Direct wrapping of 32 bit values to a backing array of ints.
+ * @lucene.internal
+ */
+
+public class Direct32 extends PackedInts.ReaderImpl
+        implements PackedInts.Mutable {
+  private int[] values;
+  private static final int BITS_PER_VALUE = 32;
+
+  public Direct32(int valueCount) {
+    super(valueCount, BITS_PER_VALUE);
+    values = new int[valueCount];
+  }
+
+  public Direct32(DataInput in, int valueCount) throws IOException {
+    super(valueCount, BITS_PER_VALUE);
+    int[] values = new int[valueCount];
+    for(int i=0;i<valueCount;i++) {
+      values[i] = in.readInt();
+    }
+    final int mod = valueCount % 2;
+    if (mod != 0) {
+      in.readInt();
+    }
+
+    this.values = values;
+  }
+
+  /**
+   * Creates an array backed by the given values.
+   * </p><p>
+   * Note: The values are used directly, so changes to the given values will
+   * affect the structure.
+   * @param values   used as the internal backing array.
+   */
+  public Direct32(int[] values) {
+    super(values.length, BITS_PER_VALUE);
+    this.values = values;
+  }
+
+  public int[] getArray() {
+    return values;
+  }
+
+  public long get(final int index) {
+    return 0xFFFFFFFFL & values[index];
+  }
+
+  public void set(final int index, final long value) {
+    values[index] = (int)(value & 0xFFFFFFFF);
+  }
+
+  public long ramBytesUsed() {
+    return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER +
+            values.length * RamUsageEstimator.NUM_BYTES_INT;
+  }
+
+  public void clear() {
+    Arrays.fill(values, 0);
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/util/packed/Direct32.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/util/packed/Packed64.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/packed/Packed64.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/util/packed/Packed64.java	(revision 0)
@@ -0,0 +1,217 @@
+package org.apache.lucene.util.packed;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Space optimized random access capable array of values with a fixed number of
+ * bits. For 32 bits/value and less, performance on 32 bit machines is not
+ * optimal. Consider using {@link Packed32} for such a setup.
+ * </p><p>
+ * The implementation strives to avoid conditionals and expensive operations,
+ * sacrificing code clarity to achieve better performance.
+ */
+
+class Packed64 extends PackedInts.ReaderImpl implements PackedInts.Mutable {
+  static final int BLOCK_SIZE = 64; // 32 = int, 64 = long
+  static final int BLOCK_BITS = 6; // The #bits representing BLOCK_SIZE
+  static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
+
+  private static final int ENTRY_SIZE = BLOCK_SIZE + 1;
+  private static final int FAC_BITPOS = 3;
+
+  /*
+   * In order to make an efficient value-getter, conditionals should be
+   * avoided. A value can be positioned inside of a block, requiring shifting
+   * left or right or it can span two blocks, requiring a left-shift on the
+   * first block and a right-shift on the right block.
+   * </p><p>
+   * By always shifting the first block both left and right, we get exactly
+   * the right bits. By always shifting the second block right and applying
+   * a mask, we get the right bits there. After that, we | the two bitsets.
+  */
+  private static final int[][] SHIFTS =
+          new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
+          //new int[BLOCK_SIZE+1][BLOCK_SIZE][BLOCK_SIZE+1];
+  private static final long[][] MASKS = new long[ENTRY_SIZE][ENTRY_SIZE];
+
+  static { // Generate shifts
+      for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
+          for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
+              int[] currentShifts = SHIFTS[elementBits];
+              int base = bitPos * FAC_BITPOS;
+              currentShifts[base    ] = bitPos;
+              currentShifts[base + 1] = BLOCK_SIZE - elementBits;
+              if (bitPos <= BLOCK_SIZE - elementBits) { // Single block
+                  currentShifts[base + 2] = 0;
+                  MASKS[elementBits][bitPos] = 0;
+              } else { // Two blocks
+                  int rBits = elementBits - (BLOCK_SIZE - bitPos);
+                  currentShifts[base + 2] = BLOCK_SIZE - rBits;
+                  MASKS[elementBits][bitPos] = ~(~0L << rBits);
+              }
+          }
+      }
+  }
+
+  /*
+   * The setter requires more masking than the getter.
+  */
+  private static final long[][] WRITE_MASKS =
+          new long[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
+  static {
+      for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
+          long elementPosMask = ~(~0L << elementBits);
+          int[] currentShifts = SHIFTS[elementBits];
+          long[] currentMasks = WRITE_MASKS[elementBits];
+          for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
+              int base = bitPos * FAC_BITPOS;
+              currentMasks[base  ] =~((elementPosMask
+                                 << currentShifts[base + 1])
+                                >>> currentShifts[base]);
+              if (bitPos <= BLOCK_SIZE - elementBits) { // Second block not used
+                currentMasks[base+1] = ~0; // Keep all bits
+                currentMasks[base+2] = 0;  // Or with 0
+              } else {
+                currentMasks[base+1] = ~(elementPosMask
+                                         << currentShifts[base + 2]);
+                currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0;
+              }
+          }
+      }
+  }
+
+  /* The bits */
+  private long[] blocks;
+
+  // Cached calculations
+  private int maxPos;      // blocks.length * BLOCK_SIZE / elementBits - 1
+  private int[] shifts;    // The shifts for the current elementBits
+  private long[] readMasks;
+  private long[] writeMasks;
+
+  /**
+   * Creates an array with the internal structures adjusted for the given
+   * limits and initialized to 0.
+   * @param valueCount   the number of elements.
+   * @param bitsPerValue the number of bits available for any given value.
+   */
+  public Packed64(int valueCount, int bitsPerValue) {
+    // TODO: Test for edge-cases (2^31 values, 63 bitsPerValue)
+    // +2 due to the avoid-conditionals-trick. The last entry is always 0
+    this(new long[(int)((long)valueCount * bitsPerValue / BLOCK_SIZE + 2)],
+            valueCount, bitsPerValue);
+  }
+
+
+  /**
+   * Creates an array backed by the given blocks.
+   * </p><p>
+   * Note: The blocks are used directly, so changes to the given block will
+   * affect the Packed32-structure.
+   * @param blocks   used as the internal backing array. Not that the last
+   *                 element cannot be addressed directly.
+   * @param valueCount the number of values.
+   * @param bitsPerValue the number of bits available for any given value.
+   */
+  public Packed64(long[] blocks, int valueCount, int bitsPerValue) {
+    super(valueCount, bitsPerValue);
+    this.blocks = blocks;
+    updateCached();
+  }
+
+  /**
+   * Creates an array with content retrieved from the given DataInput.
+   * @param in       a DataInput, positioned at the start of Packed64-content.
+   * @param valueCount  the number of elements.
+   * @param bitsPerValue the number of bits available for any given value.
+   * @throws java.io.IOException if the values for the backing array could not
+   *                             be retrieved.
+   */
+  public Packed64(DataInput in, int valueCount, int bitsPerValue)
+                                                            throws IOException {
+    super(valueCount, bitsPerValue);
+    int size = size(valueCount, bitsPerValue);
+    blocks = new long[size+1]; // +1 due to non-conditional tricks
+    // TODO: find a faster way to bulk-read longs...
+    for(int i=0;i<size;i++) {
+      blocks[i] = in.readLong();
+    }
+    updateCached();
+  }
+
+  private static int size(int valueCount, int bitsPerValue) {
+    final long totBitCount = (long) valueCount * bitsPerValue;
+    return (int)(totBitCount/64 + ((totBitCount % 64 == 0 ) ? 0:1));
+  }
+
+  private void updateCached() {
+    readMasks = MASKS[bitsPerValue];
+    shifts = SHIFTS[bitsPerValue];
+    writeMasks = WRITE_MASKS[bitsPerValue];
+    maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2);
+  }
+
+  /**
+   * @param index the position of the value.
+   * @return the value at the given index.
+   */
+  public long get(final int index) {
+    final long majorBitPos = (long)index * bitsPerValue;
+    final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
+    final int bitPos =     (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
+
+    final int base = bitPos * FAC_BITPOS;
+    assert elementPos < blocks.length : "elementPos: " + elementPos + "; blocks.len: " + blocks.length;
+    return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) |
+            ((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]);
+  }
+
+  public void set(final int index, final long value) {
+    final long majorBitPos = (long)index * bitsPerValue;
+    final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
+    final int bitPos =     (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
+    final int base = bitPos * FAC_BITPOS;
+
+    blocks[elementPos  ] = (blocks[elementPos  ] & writeMasks[base])
+                           | (value << shifts[base + 1] >>> shifts[base]);
+    blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1])
+                           | ((value << shifts[base + 2]) & writeMasks[base+2]);
+  }
+
+  @Override
+  public String toString() {
+    return "Packed64(bitsPerValue=" + bitsPerValue + ", size="
+            + size() + ", maxPos=" + maxPos
+            + ", elements.length=" + blocks.length + ")";
+  }
+
+  public long ramBytesUsed() {
+    return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+            + blocks.length * RamUsageEstimator.NUM_BYTES_LONG;
+  }
+
+  public void clear() {
+    Arrays.fill(blocks, 0L);
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/util/packed/Packed64.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/util/packed/Direct16.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/packed/Direct16.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/util/packed/Direct16.java	(revision 0)
@@ -0,0 +1,91 @@
+package org.apache.lucene.util.packed;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Direct wrapping of 16 bit values to a backing array of shorts.
+ * @lucene.internal
+ */
+
+public class Direct16 extends PackedInts.ReaderImpl
+        implements PackedInts.Mutable {
+  private short[] values;
+  private static final int BITS_PER_VALUE = 16;
+
+  public Direct16(int valueCount) {
+    super(valueCount, BITS_PER_VALUE);
+    values = new short[valueCount];
+  }
+
+  public Direct16(DataInput in, int valueCount) throws IOException {
+    super(valueCount, BITS_PER_VALUE);
+    short[] values = new short[valueCount];
+    for(int i=0;i<valueCount;i++) {
+      values[i] = in.readShort();
+    }
+    final int mod = valueCount % 4;
+    if (mod != 0) {
+      final int pad = 4-mod;
+      // round out long
+      for(int i=0;i<pad;i++) {
+        in.readShort();
+      }
+    }
+
+    this.values = values;
+  }
+
+  /**
+   * Creates an array backed by the given values.
+   * </p><p>
+   * Note: The values are used directly, so changes to the values will
+   * affect the structure.
+   * @param values   used as the internal backing array.
+   */
+  public Direct16(short[] values) {
+    super(values.length, BITS_PER_VALUE);
+    this.values = values;
+  }
+
+  public short[] getArray() {
+    return values;
+  }
+
+  public long get(final int index) {
+    return 0xFFFFL & values[index];
+  }
+
+  public void set(final int index, final long value) {
+    values[index] = (short)(value & 0xFFFF);
+  }
+
+  public long ramBytesUsed() {
+    return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER +
+            values.length * RamUsageEstimator.NUM_BYTES_SHORT;
+  }
+
+  public void clear() {
+    Arrays.fill(values, (short)0);
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/util/packed/Direct16.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/util/packed/Direct8.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/packed/Direct8.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/util/packed/Direct8.java	(revision 0)
@@ -0,0 +1,91 @@
+package org.apache.lucene.util.packed;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Direct wrapping of 8 bit values to a backing array of bytes.
+ * @lucene.internal
+ */
+
+public class Direct8 extends PackedInts.ReaderImpl
+        implements PackedInts.Mutable {
+  private byte[] values;
+  private static final int BITS_PER_VALUE = 8;
+
+  public Direct8(int valueCount) {
+    super(valueCount, BITS_PER_VALUE);
+    values = new byte[valueCount];
+  }
+
+  public Direct8(DataInput in, int valueCount)
+          throws IOException {
+    super(valueCount, BITS_PER_VALUE);
+    byte[] values = new byte[valueCount];
+    for(int i=0;i<valueCount;i++) {
+      values[i] = in.readByte();
+    }
+    final int mod = valueCount % 8;
+    if (mod != 0) {
+      final int pad = 8-mod;
+      // round out long
+      for(int i=0;i<pad;i++) {
+        in.readByte();
+      }
+    }
+
+    this.values = values;
+  }
+
+  /**
+   * Creates an array backed by the given values.
+   * </p><p>
+   * Note: The values are used directly, so changes to the given values will
+   * affect the structure.
+   * @param values used as the internal backing array.
+   */
+  public Direct8(byte[] values) {
+    super(values.length, BITS_PER_VALUE);
+    this.values = values;
+  }
+
+  public byte[] getArray() {
+    return values;
+  }
+
+  public long get(final int index) {
+    return 0xFFL & values[index];
+  }
+
+  public void set(final int index, final long value) {
+    values[index] = (byte)(value & 0xFF);
+  }
+
+  public long ramBytesUsed() {
+    return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.length;
+  }
+
+  public void clear() {
+    Arrays.fill(values, (byte)0);
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/util/packed/Direct8.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/util/packed/PackedWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/packed/PackedWriter.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/util/packed/PackedWriter.java	(revision 0)
@@ -0,0 +1,114 @@
+package org.apache.lucene.util.packed;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.DataOutput;
+
+import java.io.IOException;
+
+// Packs high order byte first, to match
+// IndexOutput.writeInt/Long/Short byte order
+
+/**
+ * Generic writer for space-optimal packed values. The resulting bits can be
+ * used directly by Packed32, Packed64 and PackedDirect* and will always be
+ * long-aligned.
+ */
+
+class PackedWriter extends PackedInts.Writer {
+  private long pending;
+  private int pendingBitPos;
+
+  // masks[n-1] masks for bottom n bits
+  private final long[] masks;
+  private int written = 0;
+
+  public PackedWriter(DataOutput out, int valueCount, int bitsPerValue)
+                                                            throws IOException {
+    super(out, valueCount, bitsPerValue);
+
+    pendingBitPos = 64;
+    masks = new long[bitsPerValue - 1];
+
+    long v = 1;
+    for (int i = 0; i < bitsPerValue - 1; i++) {
+      v *= 2;
+      masks[i] = v - 1;
+    }
+  }
+
+  /**
+   * Do not call this after finish
+   */
+  @Override
+  public void add(long v) throws IOException {
+    assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
+            + " maxValue=" + PackedInts.maxValue(bitsPerValue);
+    assert v >= 0;
+    //System.out.println("    packedw add v=" + v + " pendingBitPos=" + pendingBitPos);
+
+    // TODO
+    if (pendingBitPos >= bitsPerValue) {
+      // not split
+
+      // write-once, so we can |= w/o first masking to 0s
+      pending |= v << (pendingBitPos - bitsPerValue);
+      if (pendingBitPos == bitsPerValue) {
+        // flush
+        out.writeLong(pending);
+        pending = 0;
+        pendingBitPos = 64;
+      } else {
+        pendingBitPos -= bitsPerValue;
+      }
+
+    } else {
+      // split
+
+      // write top pendingBitPos bits of value into bottom bits of pending
+      pending |= (v >> (bitsPerValue - pendingBitPos)) & masks[pendingBitPos - 1];
+      //System.out.println("      part1 (v >> " + (bitsPerValue - pendingBitPos) + ") & " + masks[pendingBitPos-1]);
+
+      // flush
+      out.writeLong(pending);
+
+      // write bottom (bitsPerValue - pendingBitPos) bits of value into top bits of pending
+      pendingBitPos = 64 - bitsPerValue + pendingBitPos;
+      //System.out.println("      part2 v << " + pendingBitPos);
+      pending = (v << pendingBitPos);
+    }
+    written++;
+  }
+
+  @Override
+  public void finish() throws IOException {
+    while (written < valueCount) {
+      add(0L); // Auto flush
+    }
+
+    if (pendingBitPos != 64) {
+      out.writeLong(pending);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "PackedWriter(written " + written + "/" + valueCount + " with "
+            + bitsPerValue + " bits/value)";
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/util/packed/PackedWriter.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/util/packed/Direct64.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/packed/Direct64.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/util/packed/Direct64.java	(revision 0)
@@ -0,0 +1,83 @@
+package org.apache.lucene.util.packed;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Direct wrapping of 32 bit values to a backing array of ints.
+ * @lucene.internal
+ */
+
+public class Direct64 extends PackedInts.ReaderImpl
+        implements PackedInts.Mutable {
+  private long[] values;
+  private static final int BITS_PER_VALUE = 64;
+
+  public Direct64(int valueCount) {
+    super(valueCount, BITS_PER_VALUE);
+    values = new long[valueCount];
+  }
+
+  public Direct64(DataInput in, int valueCount) throws IOException {
+    super(valueCount, BITS_PER_VALUE);
+    long[] values = new long[valueCount];
+    for(int i=0;i<valueCount;i++) {
+      values[i] = in.readLong();
+    }
+
+    this.values = values;
+  }
+
+  public long[] getArray() {
+    return values;
+  }
+
+  /**
+   * Creates an array backed by the given values.
+   * </p><p>
+   * Note: The values are used directly, so changes to the given values will
+   * affect the structure.
+   * @param values   used as the internal backing array.
+   */
+  public Direct64(long[] values) {
+    super(values.length, BITS_PER_VALUE);
+    this.values = values;
+  }
+
+  public long get(final int index) {
+    return values[index];
+  }
+
+  public void set(final int index, final long value) {
+    values[index] = value;
+  }
+
+  public long ramBytesUsed() {
+    return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER +
+            values.length * RamUsageEstimator.NUM_BYTES_LONG;
+  }
+
+  public void clear() {
+    Arrays.fill(values, 0L);
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/util/packed/Direct64.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/util/packed/PackedInts.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/packed/PackedInts.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/util/packed/PackedInts.java	(revision 0)
@@ -0,0 +1,266 @@
+package org.apache.lucene.util.packed;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.Constants;
+
+import java.io.IOException;
+
+/**
+ * Simplistic compression for array of unsigned long values.
+ * Each value is >= 0 and <= a specified maximum value.  The
+ * values are stored as packed ints, with each value
+ * consuming a fixed number of bits.
+ *
+ * @lucene.internal
+ */
+
+public class PackedInts {
+
+  private final static String CODEC_NAME = "PackedInts";
+  private final static int VERSION_START = 0;
+  private final static int VERSION_CURRENT = VERSION_START;
+
+  /**
+   * A read-only random access array of positive integers.
+   * @lucene.internal
+   */
+  public static interface Reader {
+    /**
+     * @param index the position of the wanted value.
+     * @return the value at the stated index.
+     */
+    long get(int index);
+
+    /**
+     * @return the number of bits used to store any given value.
+     *         Note: This does not imply that memory usage is
+     *         {@code bitsPerValue * #values} as implementations are free to
+     *         use non-space-optimal packing of bits.
+     */
+    int getBitsPerValue();
+
+    /**
+     * @return the number of values.
+     */
+    int size();
+  }
+
+  /**
+   * A packed integer array that can be modified.
+   * @lucene.internal
+   */
+  public static interface Mutable extends Reader {
+    /**
+     * Set the value at the given index in the array.
+     * @param index where the value should be positioned.
+     * @param value a value conforming to the constraints set by the array.
+     */
+    void set(int index, long value);
+
+    /**
+     * Sets all values to 0.
+     */
+    
+    void clear();
+  }
+
+  /**
+   * A simple base for Readers that keeps track of valueCount and bitsPerValue.
+   * @lucene.internal
+   */
+  public static abstract class ReaderImpl implements Reader {
+    protected final int bitsPerValue;
+    protected final int valueCount;
+
+    protected ReaderImpl(int valueCount, int bitsPerValue) {
+      this.bitsPerValue = bitsPerValue;
+      assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
+      this.valueCount = valueCount;
+    }
+
+    public int getBitsPerValue() {
+      return bitsPerValue;
+    }
+    
+    public int size() {
+      return valueCount;
+    }
+
+    public long getMaxValue() { // Convenience method
+      return maxValue(bitsPerValue);
+    }
+  }
+
+  /** A write-once Writer.
+   * @lucene.internal
+   */
+  public static abstract class Writer {
+    protected final DataOutput out;
+    protected final int bitsPerValue;
+    protected final int valueCount;
+
+    protected Writer(DataOutput out, int valueCount, int bitsPerValue)
+      throws IOException {
+      assert bitsPerValue <= 64;
+
+      this.out = out;
+      this.valueCount = valueCount;
+      this.bitsPerValue = bitsPerValue;
+      CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
+      out.writeVInt(bitsPerValue);
+      out.writeVInt(valueCount);
+    }
+
+    public abstract void add(long v) throws IOException;
+    public abstract void finish() throws IOException;
+  }
+
+  /**
+   * Retrieve PackedInt data from the DataInput and return a packed int
+   * structure based on it.
+   * @param in positioned at the beginning of a stored packed int structure.
+   * @return a read only random access capable array of positive integers.
+   * @throws IOException if the structure could not be retrieved.
+   * @lucene.internal
+   */
+  public static Reader getReader(DataInput in) throws IOException {
+    CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
+    final int bitsPerValue = in.readVInt();
+    assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
+    final int valueCount = in.readVInt();
+
+    switch (bitsPerValue) {
+    case 8:
+      return new Direct8(in, valueCount);
+    case 16:
+      return new Direct16(in, valueCount);
+    case 32:
+      return new Direct32(in, valueCount);
+    case 64:
+      return new Direct64(in, valueCount);
+    default:
+      if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
+        return new Packed64(in, valueCount, bitsPerValue);
+      } else {
+        return new Packed32(in, valueCount, bitsPerValue);
+      }
+    }
+  }
+
+  /**
+   * Create a packed integer array with the given amount of values initialized
+   * to 0. the valueCount and the bitsPerValue cannot be changed after creation.
+   * All Mutables known by this factory are kept fully in RAM.
+   * @param valueCount   the number of elements.
+   * @param bitsPerValue the number of bits available for any given value.
+   * @return a mutable packed integer array.
+   * @throws java.io.IOException if the Mutable could not be created. With the
+   *         current implementations, this never happens, but the method
+   *         signature allows for future persistence-backed Mutables.
+   * @lucene.internal
+   */
+  public static Mutable getMutable(
+         int valueCount, int bitsPerValue) {
+    switch (bitsPerValue) {
+    case 8:
+      return new Direct8(valueCount);
+    case 16:
+      return new Direct16(valueCount);
+    case 32:
+      return new Direct32(valueCount);
+    case 64:
+      return new Direct64(valueCount);
+    default:
+      if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
+        return new Packed64(valueCount, bitsPerValue);
+      } else {
+        return new Packed32(valueCount, bitsPerValue);
+      }
+    }
+  }
+
+  /**
+   * Create a packed integer array writer for the given number of values at the
+   * given bits/value. Writers append to the given IndexOutput and has very
+   * low memory overhead.
+   * @param out          the destination for the produced bits.
+   * @param valueCount   the number of elements.
+   * @param bitsPerValue the number of bits available for any given value.
+   * @return a Writer ready for receiving values.
+   * @throws IOException if bits could not be written to out.
+   * @lucene.internal
+   */
+  public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue)
+    throws IOException {
+    return new PackedWriter(out, valueCount, bitsPerValue);
+  }
+
+  /** Returns how many bits are required to hold values up
+   *  to and including maxValue
+   * @param maxValue the maximum value that should be representable.
+   * @return the amount of bits needed to represent values from 0 to maxValue.
+   * @lucene.internal
+   */
+  public static int bitsRequired(long maxValue) {
+    // Very high long values does not translate well to double, so we do an
+    // explicit check for the edge cases
+    if (maxValue > 0x3FFFFFFFFFFFFFFFL) {
+      return 63;
+    } if (maxValue > 0x1FFFFFFFFFFFFFFFL) {
+      return 62;
+    }
+    return Math.max(1, (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)));
+  }
+
+  /**
+   * Calculates the maximum unsigned long that can be expressed with the given
+   * number of bits.
+   * @param bitsPerValue the number of bits available for any given value.
+   * @return the maximum value for the given bits.
+   * @lucene.internal
+   */
+  public static long maxValue(int bitsPerValue) {
+    return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue);
+  }
+
+  /** Rounds bitsPerValue up to 8, 16, 32 or 64. */
+  public static int getNextFixedSize(int bitsPerValue) {
+    if (bitsPerValue <= 8) {
+      return 8;
+    } else if (bitsPerValue <= 16) {
+      return 16;
+    } else if (bitsPerValue <= 32) {
+      return 32;
+    } else {
+      return 64;
+    }
+  }
+
+  /** Possibly wastes some storage in exchange for faster lookups */
+  public static int getRoundedFixedSize(int bitsPerValue) {
+    if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok
+      return getNextFixedSize(bitsPerValue);
+    } else {
+      return bitsPerValue;
+    }
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/util/packed/PackedInts.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/java/org/apache/lucene/util/packed/Packed32.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/packed/Packed32.java	(revision 0)
+++ lucene/src/java/org/apache/lucene/util/packed/Packed32.java	(revision 0)
@@ -0,0 +1,227 @@
+package org.apache.lucene.util.packed;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Space optimized random access capable array of values with a fixed number of
+ * bits. The maximum number of bits/value is 31. Use {@link Packed64} for higher
+ * numbers.
+ * </p><p>
+ * The implementation strives to avoid conditionals and expensive operations,
+ * sacrificing code clarity to achieve better performance.
+ */
+
+class Packed32 extends PackedInts.ReaderImpl implements PackedInts.Mutable {
+  static final int BLOCK_SIZE = 32; // 32 = int, 64 = long
+  static final int BLOCK_BITS = 5; // The #bits representing BLOCK_SIZE
+  static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
+
+  private static final int ENTRY_SIZE = BLOCK_SIZE + 1;
+  private static final int FAC_BITPOS = 3;
+
+  /*
+   * In order to make an efficient value-getter, conditionals should be
+   * avoided. A value can be positioned inside of a block, requiring shifting
+   * left or right or it can span two blocks, requiring a left-shift on the
+   * first block and a right-shift on the right block.
+   * </p><p>
+   * By always shifting the first block both left and right, we get exactly
+   * the right bits. By always shifting the second block right and applying
+   * a mask, we get the right bits there. After that, we | the two bitsets.
+  */
+  private static final int[][] SHIFTS =
+          new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
+  private static final int[][] MASKS = new int[ENTRY_SIZE][ENTRY_SIZE];
+
+  static { // Generate shifts
+    for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
+      for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
+        int[] currentShifts = SHIFTS[elementBits];
+        int base = bitPos * FAC_BITPOS;
+        currentShifts[base    ] = bitPos;
+        currentShifts[base + 1] = BLOCK_SIZE - elementBits;
+        if (bitPos <= BLOCK_SIZE - elementBits) { // Single block
+          currentShifts[base + 2] = 0;
+          MASKS[elementBits][bitPos] = 0;
+        } else { // Two blocks
+          int rBits = elementBits - (BLOCK_SIZE - bitPos);
+          currentShifts[base + 2] = BLOCK_SIZE - rBits;
+          MASKS[elementBits][bitPos] = ~(~0 << rBits);
+        }
+      }
+    }
+  }
+
+  /*
+   * The setter requires more masking than the getter.
+  */
+  private static final int[][] WRITE_MASKS =
+          new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS];
+  static {
+    for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) {
+      int elementPosMask = ~(~0 << elementBits);
+      int[] currentShifts = SHIFTS[elementBits];
+      int[] currentMasks = WRITE_MASKS[elementBits];
+      for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) {
+        int base = bitPos * FAC_BITPOS;
+        currentMasks[base  ] =~((elementPosMask
+                << currentShifts[base + 1])
+                >>> currentShifts[base]);
+        if (bitPos <= BLOCK_SIZE - elementBits) { // Second block not used
+          currentMasks[base+1] = ~0; // Keep all bits
+          currentMasks[base+2] = 0;  // Or with 0
+        } else {
+          currentMasks[base+1] = ~(elementPosMask
+                                   << currentShifts[base + 2]);
+          currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0;
+        }
+      }
+    }
+  }
+
+  /* The bits */
+  private int[] blocks;
+
+  // Cached calculations
+  private int maxPos;      // blocks.length * BLOCK_SIZE / bitsPerValue - 1
+  private int[] shifts;    // The shifts for the current bitsPerValue
+  private int[] readMasks;
+  private int[] writeMasks;
+
+  /**
+   * Creates an array with the internal structures adjusted for the given
+   * limits and initialized to 0.
+   * @param valueCount   the number of elements.
+   * @param bitsPerValue the number of bits available for any given value.
+   *        Note: bitsPerValue >32 is not supported by this implementation.
+   */
+  public Packed32(int valueCount, int bitsPerValue) {
+    this(new int[(int)(((long)valueCount) * bitsPerValue / BLOCK_SIZE + 2)],
+            valueCount, bitsPerValue);
+  }
+
+  /**
+   * Creates an array with content retrieved from the given DataInput.
+   * @param in       a DataInput, positioned at the start of Packed64-content.
+   * @param valueCount  the number of elements.
+   * @param bitsPerValue the number of bits available for any given value.
+   * @throws java.io.IOException if the values for the backing array could not
+   *                             be retrieved.
+   */
+  public Packed32(DataInput in, int valueCount, int bitsPerValue)
+                                                            throws IOException {
+    super(valueCount, bitsPerValue);
+    int size = size(bitsPerValue, valueCount);
+    blocks = new int[size + 1]; // +1 due to non-conditional tricks
+    // TODO: find a faster way to bulk-read ints...
+    for(int i = 0 ; i < size ; i++) {
+      blocks[i] = in.readInt();
+    }
+    if (size % 2 == 1) {
+      in.readInt(); // Align to long
+    }
+    updateCached();
+  }
+
+  private static int size(int bitsPerValue, int valueCount) {
+    final long totBitCount = (long) valueCount * bitsPerValue;
+    return (int) (totBitCount/32 + ((totBitCount % 32 == 0 ) ? 0:1));
+  }
+
+
+  /**
+   * Creates an array backed by the given blocks.
+   * </p><p>
+   * Note: The blocks are used directly, so changes to the given block will
+   * affect the Packed32-structure.
+   * @param blocks   used as the internal backing array.
+   * @param valueCount   the number of values.
+   * @param bitsPerValue the number of bits available for any given value.
+   *        Note: bitsPerValue >32 is not supported by this implementation.
+   */
+  public Packed32(int[] blocks, int valueCount, int bitsPerValue) {
+    // TODO: Check that blocks.length is sufficient for holding length values
+    super(valueCount, bitsPerValue);
+    if (bitsPerValue > 31) {
+      throw new IllegalArgumentException(String.format(
+              "This array only supports values of 31 bits or less. The "
+                      + "required number of bits was %d. The Packed64 "
+                      + "implementation allows values with more than 31 bits",
+              bitsPerValue));
+    }
+    this.blocks = blocks;
+    updateCached();
+  }
+
+  private void updateCached() {
+    readMasks = MASKS[bitsPerValue];
+    maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2);
+    shifts = SHIFTS[bitsPerValue];
+    writeMasks = WRITE_MASKS[bitsPerValue];
+  }
+
+  /**
+   * @param index the position of the value.
+   * @return the value at the given index.
+   */
+  public long get(final int index) {
+    final long majorBitPos = (long)index * bitsPerValue;
+    final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
+    final int bitPos =     (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
+
+    final int base = bitPos * FAC_BITPOS;
+
+    return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) |
+            ((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]);
+  }
+
+  public void set(final int index, final long value) {
+    final int intValue = (int)value;
+    final long majorBitPos = (long)index * bitsPerValue;
+    final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
+    final int bitPos =     (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
+    final int base = bitPos * FAC_BITPOS;
+
+    blocks[elementPos  ] = (blocks[elementPos  ] & writeMasks[base])
+            | (intValue << shifts[base + 1] >>> shifts[base]);
+    blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1])
+            | ((intValue << shifts[base + 2])
+            & writeMasks[base+2]);
+  }
+
+  public void clear() {
+    Arrays.fill(blocks, 0);
+  }
+
+  @Override
+  public String toString() {
+    return "Packed32(bitsPerValue=" + bitsPerValue + ", maxPos=" + maxPos
+            + ", elements.length=" + blocks.length + ")";
+  }
+
+  public long ramBytesUsed() {
+    return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+            + blocks.length * RamUsageEstimator.NUM_BYTES_INT;
+  }
+}

Property changes on: lucene/src/java/org/apache/lucene/util/packed/Packed32.java
___________________________________________________________________
Added: svn:eol-style
   + native

