Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java	(revision 959077)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java	(working copy)
@@ -95,9 +95,19 @@
         
         // Initialize the map with the default fields.
         fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyStore, bodyIndex, termVector));
-        fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", store, index, termVector));
+        final Field f = new Field(TITLE_FIELD, "", store, index, termVector);
+        // nocommit need explicit per-field control
+        if (store == Field.Store.YES) {
+          f.setIndexValues(Field.Values.BYTES_VAR_SORTED);
+        }
+        fields.put(TITLE_FIELD, f);
         fields.put(DATE_FIELD, new Field(DATE_FIELD, "", store, index, termVector));
-        fields.put(ID_FIELD, new Field(ID_FIELD, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
+        final Field f2 = new Field(ID_FIELD, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
+        // nocommit need explicit per-field control
+        if (store == Field.Store.YES) {
+          f2.setIndexValues(Field.Values.BYTES_VAR_SORTED);
+        }
+        fields.put(ID_FIELD, f2);
         fields.put(NAME_FIELD, new Field(NAME_FIELD, "", store, index, termVector));
         
         doc = new Document();
@@ -114,12 +124,29 @@
      */
     Field getField(String name, Store store, Index index, TermVector termVector) {
       if (!reuseFields) {
-        return new Field(name, "", store, index, termVector);
+        final Field f = new Field(name, "", store, index, termVector);
+        if (store == Field.Store.YES) {
+          // nocommit need explicit per-field control
+          if (name.equals("sort_field")) {
+            f.setIndexValues(Field.Values.PACKED_INTS_FIXED);
+          } else {
+            f.setIndexValues(Field.Values.BYTES_VAR_SORTED);
+          }
+        }
+        return f;
       }
       
       Field f = fields.get(name);
       if (f == null) {
         f = new Field(name, "", store, index, termVector);
+        if (store == Field.Store.YES) {
+          // nocommit need explicit per-field control
+          if (name.equals("sort_field")) {
+            f.setIndexValues(Field.Values.PACKED_INTS_FIXED);
+          } else {
+            f.setIndexValues(Field.Values.BYTES_VAR_SORTED);
+          }
+        }
         fields.put(name, f);
       }
       return f;
@@ -235,7 +262,7 @@
       }
     }
     
-    //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
+    // System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
     return doc;
   }
 
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java	(revision 959077)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java	(working copy)
@@ -139,7 +139,7 @@
           for(int i=0;i<hits.scoreDocs.length;i++) {
             final int docID = hits.scoreDocs[i].doc;
             final Document doc = reader.document(docID);
-            System.out.println("  " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
+            System.out.println("  " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + "=" + doc.get(printHitsField));
           }
         }
 
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java	(revision 959077)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java	(working copy)
@@ -19,6 +19,7 @@
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
 import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.FieldComparatorSource;
 import org.apache.lucene.search.SortField;
 
 /**
@@ -75,8 +76,7 @@
         } else {
           throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
         }
-        int type = getType(typeString);
-        sortField0 = new SortField(fieldName, type);
+        sortField0 = getSortField(fieldName, typeString);
       }
       sortFields[upto++] = sortField0;
     }
@@ -89,9 +89,23 @@
     this.sort = new Sort(sortFields);
   }
 
-  private int getType(String typeString) {
-    int type;
-    if (typeString.equals("float")) {
+  private SortField getSortField(String fieldName, String typeString) {
+    boolean useIndexValues = false;
+    int type = -1;
+
+    if (typeString.equals("intvalues")) {
+      useIndexValues = true;
+      type = SortField.INT;
+    } else if (typeString.equals("floatvalues")) {
+      useIndexValues = true;
+      type = SortField.FLOAT;
+    } else if (typeString.equals("stringvalues")) {
+      useIndexValues = true;
+      type = SortField.STRING;
+    } else if (typeString.equals("bytesvalues")) {
+      useIndexValues = true;
+      type = SortField.BYTES;
+    } else if (typeString.equals("float")) {
       type = SortField.FLOAT;
     } else if (typeString.equals("double")) {
       type = SortField.DOUBLE;
@@ -110,7 +124,10 @@
     } else {
       throw new RuntimeException("Unrecognized sort field type " + typeString);
     }
-    return type;
+
+    SortField f = new SortField(fieldName, type);
+    f.setUseIndexValues(useIndexValues);
+    return f;
   }
 
   @Override
Index: src/java/org/apache/lucene/document/AbstractField.java
===================================================================
--- src/java/org/apache/lucene/document/AbstractField.java	(revision 959077)
+++ src/java/org/apache/lucene/document/AbstractField.java	(working copy)
@@ -15,10 +15,13 @@
  * limitations under the License.
  */
 
+import java.util.Comparator;
+
 import org.apache.lucene.search.PhraseQuery; // for javadocs
 import org.apache.lucene.search.spans.SpanQuery; // for javadocs
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.StringHelper; // for javadocs
 
 
@@ -235,6 +238,58 @@
     return lazy;
   }
 
+
+  /** Controls whether per-field values are stored into
+   *  index.  This storage is non-sparse, so it's best to
+   *  use this when all docs have the field, and loads all
+   *  values into RAM, exposing a random access API, when
+   *  loaded.
+   *
+   * <p><b>NOTE</b>: This feature is experimental and the
+   * API is free to change in non-backwards-compatible ways.  */
+  public static enum Values {
+
+    /** Integral value is stored as packed ints.  The bit
+     *  precision is fixed across the segment, and
+     *  determined by the min/max values in the field. */
+    PACKED_INTS,
+    PACKED_INTS_FIXED,
+    SIMPLE_FLOAT_4BYTE,
+    SIMPLE_FLOAT_8BYTE,
+
+    // nocommit -- shouldn't lucene decide/detect straight vs
+    // deref, as well fixed vs var?
+    BYTES_FIXED_STRAIGHT,
+    BYTES_FIXED_DEREF,
+    BYTES_FIXED_SORTED,
+
+    BYTES_VAR_STRAIGHT,
+    BYTES_VAR_DEREF,
+    BYTES_VAR_SORTED
+
+    // nocommit -- need STRING variants as well
+  }
+
+  Values values;
+  public void setIndexValues(Values values) {
+    this.values = values;
+  }
+
+  public Values getIndexValues() {
+    return values;
+  }
+  
+    private Comparator<BytesRef> bytesComparator;
+  
+    public void setBytesComparator(Comparator<BytesRef> c) {
+      this.bytesComparator = c;
+    }
+      
+    public Comparator<BytesRef> getBytesComparator() {
+      return bytesComparator;
+    }
+
+
   /** Prints a Field for human consumption. */
   @Override
   public final String toString() {
@@ -281,6 +336,9 @@
     if (lazy){
       result.append(",lazy");
     }
+    if (values != null) {
+      result.append(",values=" + values);
+    }
     result.append('<');
     result.append(name);
     result.append(':');
Index: src/java/org/apache/lucene/index/ByteBlockPool.java
===================================================================
--- src/java/org/apache/lucene/index/ByteBlockPool.java	(revision 959077)
+++ src/java/org/apache/lucene/index/ByteBlockPool.java	(working copy)
@@ -39,12 +39,12 @@
 import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
 import org.apache.lucene.util.ArrayUtil;
 
-final class ByteBlockPool {
+public final class ByteBlockPool {
 
-  abstract static class Allocator {
-    abstract void recycleByteBlocks(byte[][] blocks, int start, int end);
-    abstract void recycleByteBlocks(List<byte[]> blocks);
-    abstract byte[] getByteBlock();
+  public abstract static class Allocator {
+    public abstract void recycleByteBlocks(byte[][] blocks, int start, int end);
+    public abstract void recycleByteBlocks(List<byte[]> blocks);
+    public abstract byte[] getByteBlock();
   }
 
   public byte[][] buffers = new byte[10][];
Index: src/java/org/apache/lucene/index/CompoundFileReader.java
===================================================================
--- src/java/org/apache/lucene/index/CompoundFileReader.java	(revision 959077)
+++ src/java/org/apache/lucene/index/CompoundFileReader.java	(working copy)
@@ -24,6 +24,7 @@
 import org.apache.lucene.store.Lock;
 
 import java.util.HashMap;
+import java.util.Set;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 
@@ -157,7 +158,7 @@
             throw new IOException("Stream closed");
         
         id = IndexFileNames.stripSegmentName(id);
-        FileEntry entry = entries.get(id);
+        final FileEntry entry = entries.get(id);
         if (entry == null)
             throw new IOException("No sub-file with id " + id + " found");
 
Index: src/java/org/apache/lucene/index/CompoundFileWriter.java
===================================================================
--- src/java/org/apache/lucene/index/CompoundFileWriter.java	(revision 959077)
+++ src/java/org/apache/lucene/index/CompoundFileWriter.java	(working copy)
@@ -48,9 +48,13 @@
  */
 final class CompoundFileWriter {
 
-    private static final class FileEntry {
+    static final class FileEntry {
+	
+	FileEntry(String file) {
+	    this.file = file;
+	}
         /** source file */
-        String file;
+        final String file;
 
         /** temporary holder for the start of directory entry for this file */
         long directoryOffset;
@@ -127,10 +131,7 @@
         if (! ids.add(file))
             throw new IllegalArgumentException(
                 "File " + file + " already added");
-
-        FileEntry entry = new FileEntry();
-        entry.file = file;
-        entries.add(entry);
+        entries.add(new FileEntry(file));
     }
 
     /** Merge files with the extensions added up to now.
Index: src/java/org/apache/lucene/index/DirectoryReader.java
===================================================================
--- src/java/org/apache/lucene/index/DirectoryReader.java	(revision 959077)
+++ src/java/org/apache/lucene/index/DirectoryReader.java	(working copy)
@@ -36,6 +36,9 @@
 import org.apache.lucene.store.Lock;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.values.Bytes;
+import org.apache.lucene.index.values.Floats;
+import org.apache.lucene.index.values.Ints;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.ReaderUtil;
 import org.apache.lucene.util.BytesRef;
@@ -1014,6 +1017,33 @@
 
     return commits;
   }
+  
+  public Ints.Reader getIndexValuesInts(String field) {
+      ensureOpen();
+      if (subReaders.length == 1) {
+        return subReaders[0].getIndexValuesInts(field);
+      } else {
+        throw new UnsupportedOperationException();
+      }
+    }
+
+    public Floats.Reader getIndexValuesFloats(String field) {
+      ensureOpen();
+      if (subReaders.length == 1) {
+        return subReaders[0].getIndexValuesFloats(field);
+      } else {
+        throw new UnsupportedOperationException();
+      }
+    }
+
+    public Bytes.Reader getIndexValuesBytes(String field) {
+      ensureOpen();
+      if (subReaders.length == 1) {
+        return subReaders[0].getIndexValuesBytes(field);
+      } else {
+        throw new UnsupportedOperationException();
+      }
+    }
 
   private static final class ReaderCommit extends IndexCommit {
     private String segmentsFileName;
Index: src/java/org/apache/lucene/index/DocFieldProcessor.java
===================================================================
--- src/java/org/apache/lucene/index/DocFieldProcessor.java	(revision 959077)
+++ src/java/org/apache/lucene/index/DocFieldProcessor.java	(working copy)
@@ -17,8 +17,23 @@
  * limitations under the License.
  */
 
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.values.Floats;
+import org.apache.lucene.index.values.SimpleFloatsImpl;
+import org.apache.lucene.index.values.Ints;
+import org.apache.lucene.index.values.PackedIntsImpl;
+import org.apache.lucene.index.values.Bytes;
+import org.apache.lucene.index.values.BytesBaseImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.AbstractField;
+import org.apache.lucene.document.NumericField;
+
 import java.io.IOException;
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.Map;
 import java.util.HashMap;
 
@@ -37,6 +52,193 @@
   final FieldInfos fieldInfos = new FieldInfos();
   final DocFieldConsumer consumer;
   final StoredFieldsWriter fieldsWriter;
+  final private Map<String,IndexValuesProcessor> indexValues = new HashMap<String,IndexValuesProcessor>();
+
+  synchronized IndexValuesProcessor getProcessor(Directory dir, String segment, Fieldable field, FieldInfo fieldInfo)
+    throws IOException {
+    IndexValuesProcessor p = indexValues.get(field.name());
+    if (p == null) {
+      if (field instanceof AbstractField) {
+        final AbstractField f = (AbstractField) field;
+        Field.Values v = f.getIndexValues();
+        if (v == null) {
+          indexValues.put(field.name(), null);
+          return null;
+        }
+        final String id = segment + "_" + fieldInfo.number;
+        switch(v) {
+        case PACKED_INTS:
+          p = new IntValuesProcessor(dir, id, false);
+          break;
+        case PACKED_INTS_FIXED:
+          p = new IntValuesProcessor(dir, id, true);
+          break;
+        case SIMPLE_FLOAT_4BYTE:
+          p = new FloatValuesProcessor(dir, id, 4);
+          break;
+        case SIMPLE_FLOAT_8BYTE:
+          p = new FloatValuesProcessor(dir, id, 8);
+          break;
+        case BYTES_FIXED_STRAIGHT:
+          p = new BytesValuesProcessor(dir, id, true, null, BytesBaseImpl.Mode.STRAIGHT);
+          break;
+        case BYTES_FIXED_DEREF:
+          p = new BytesValuesProcessor(dir, id, true, null, BytesBaseImpl.Mode.DEREF);
+          break;
+        case BYTES_FIXED_SORTED:
+          p = new BytesValuesProcessor(dir, id, true, f.getBytesComparator(), BytesBaseImpl.Mode.SORTED);
+          break;
+        case BYTES_VAR_STRAIGHT:
+          p = new BytesValuesProcessor(dir, id, false, null, BytesBaseImpl.Mode.STRAIGHT);
+          break;
+        case BYTES_VAR_DEREF:
+          p = new BytesValuesProcessor(dir, id, false, null, BytesBaseImpl.Mode.DEREF);
+          break;
+        case BYTES_VAR_SORTED:
+          p = new BytesValuesProcessor(dir, id, false, f.getBytesComparator(), BytesBaseImpl.Mode.SORTED);
+          break;
+        }
+        fieldInfo.setIndexValues(v);
+        indexValues.put(field.name(), p);
+      } else {
+        indexValues.put(field.name(), null);
+        return null;
+      }
+    }
+
+    return p;
+  }
+
+  static abstract class IndexValuesProcessor {
+    public abstract void add(int docID, Fieldable field) throws IOException;
+    public abstract void finish(int docCount) throws IOException;
+    public abstract void files(Collection<String> files) throws IOException;
+  }
+
+  static class FloatValuesProcessor extends IndexValuesProcessor {
+    private final int precision;
+    private final Floats.Writer writer;
+    private final String id;
+
+    public FloatValuesProcessor(Directory dir, String id, int precision) throws IOException {
+      this.precision = precision;
+      this.id = id;
+      writer = SimpleFloatsImpl.getWriter(dir, id, precision);
+    }
+
+    @Override
+    public void add(int docID, Fieldable field) throws IOException {
+      if (field instanceof NumericField) {
+        final Number n = ((NumericField) field).getNumericValue();
+        writer.add(docID, n.doubleValue());
+        return;
+      } else {
+        final String s = field.stringValue();
+        if (s != null) {
+          writer.add(docID, Double.parseDouble(s));
+          return;
+        }
+      }
+
+      throw new IllegalArgumentException("could not extract float/double from field " + field);
+    }
+
+    @Override
+    public void finish(int docCount) throws IOException {
+      writer.finish(docCount);
+    }
+
+    @Override
+    public void files(Collection<String> files) {
+      SimpleFloatsImpl.files(id, files);
+    }
+  }
+
+  static class IntValuesProcessor extends IndexValuesProcessor {
+    private final Ints.Writer writer;
+    private final String id;
+
+    public IntValuesProcessor(Directory dir, String id, boolean fixedArray) throws IOException {
+      this.id = id;
+      writer = PackedIntsImpl.getWriter(dir, id, fixedArray);
+    }
+
+    @Override
+    public void add(int docID, Fieldable field) throws IOException {
+      if (field instanceof NumericField) {
+        final Number n = ((NumericField) field).getNumericValue();
+        writer.add(docID, n.longValue());
+        return;
+      } else {
+        final String s = field.stringValue();
+        if (s != null) {
+          writer.add(docID, Long.parseLong(s));
+          return;
+        }
+      }
+
+      throw new IllegalArgumentException("could not extract int/long from field " + field);
+    }
+
+    @Override
+    public void finish(int docCount) throws IOException {
+      writer.finish(docCount);
+    }
+
+    @Override
+    public void files(Collection<String> files) throws IOException {
+      PackedIntsImpl.files(id, files);
+    }
+  }
+
+  static class BytesValuesProcessor extends IndexValuesProcessor {
+    private final Bytes.Writer writer;
+    private final String id;
+    private final Directory dir;
+
+    public BytesValuesProcessor(Directory dir, String id, boolean fixedSize, Comparator<BytesRef> comp, BytesBaseImpl.Mode mode) throws IOException {
+      this.id = id;
+      writer = BytesBaseImpl.getWriter(dir, id, mode,comp, fixedSize);
+      this.dir = dir;
+    }
+
+    private final BytesRef bytesRef = new BytesRef();
+
+    // nocommit -- make this thread private and not sync'd
+    @Override
+    public synchronized void add(int docID, Fieldable field) throws IOException {
+      // nocommit -- should we insist on upfront binding, ie,
+      // "this field will be binary" or "this field will be
+      // String", and enforce here?  as is, one could flip
+      // back and forth per document
+      // nocommit -- also, we could use this to
+      // differentiate best default Comparator
+      if (field.isBinary()) {
+        bytesRef.bytes = field.getBinaryValue();
+        bytesRef.length = field.getBinaryLength();
+        bytesRef.offset = field.getBinaryOffset();
+        writer.add(docID, bytesRef);
+        return;
+      } 
+        final String v = field.stringValue();
+        if (v != null) {
+          UnicodeUtil.UTF16toUTF8(v, 0, v.length(), bytesRef);
+          writer.add(docID, bytesRef);
+          return;
+      }
+      throw new IllegalArgumentException("could not extract byte[] from field " + field);
+    }
+
+    @Override
+    public void finish(int docCount) throws IOException {
+      writer.finish(docCount);
+    }
+
+    @Override
+    public void files(Collection<String> files) throws IOException {
+      BytesBaseImpl.files(dir, id, files);
+    }
+  }
 
   public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) {
     this.docWriter = docWriter;
@@ -63,6 +265,14 @@
     fieldsWriter.flush(state);
     consumer.flush(childThreadsAndFields, state);
 
+    for(IndexValuesProcessor p : indexValues.values()) {
+      if (p != null) {
+        p.finish(state.numDocs);
+        p.files(state.flushedFiles);
+      }
+    }
+    indexValues.clear();
+
     // Important to save after asking consumer to flush so
     // consumer can alter the FieldInfo* if necessary.  EG,
     // FreqProxTermsWriter does this with
Index: src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
===================================================================
--- src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java	(revision 959077)
+++ src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java	(working copy)
@@ -244,9 +244,16 @@
     // here.
     quickSort(fields, 0, fieldCount-1);
 
-    for(int i=0;i<fieldCount;i++)
-      fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
-
+    for(int i=0;i<fieldCount;i++) {
+	DocFieldProcessorPerField perField = fields[i];
+	// TODO(simonw): shall we clean up the fields in processFields? make sure we get the fieldable first here!!
+	final Fieldable fieldable = perField.fields[0];
+	perField.consumer.processFields(perField.fields, perField.fieldCount);
+	final DocFieldProcessor.IndexValuesProcessor processor = docFieldProcessor.getProcessor(docState.docWriter.directory,
+		docState.docWriter.segment, fieldable, perField.fieldInfo );
+	if(processor != null)
+	    processor.add(docState.docID, fieldable);
+    }
     if (docState.maxTermPrefix != null && docState.infoStream != null) {
       docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); 
       docState.maxTermPrefix = null;
Index: src/java/org/apache/lucene/index/DocumentsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DocumentsWriter.java	(revision 959077)
+++ src/java/org/apache/lucene/index/DocumentsWriter.java	(working copy)
@@ -1316,7 +1316,7 @@
     
     /* Allocate another byte[] from the shared pool */
     @Override
-    byte[] getByteBlock() {
+    public byte[] getByteBlock() {
       synchronized(DocumentsWriter.this) {
         final int size = freeByteBlocks.size();
         final byte[] b;
@@ -1332,7 +1332,7 @@
     /* Return byte[]'s to the pool */
 
     @Override
-    void recycleByteBlocks(byte[][] blocks, int start, int end) {
+    public void recycleByteBlocks(byte[][] blocks, int start, int end) {
       synchronized(DocumentsWriter.this) {
         for(int i=start;i<end;i++) {
           freeByteBlocks.add(blocks[i]);
@@ -1342,7 +1342,7 @@
     }
 
     @Override
-    void recycleByteBlocks(List<byte[]> blocks) {
+   public void recycleByteBlocks(List<byte[]> blocks) {
       synchronized(DocumentsWriter.this) {
         final int size = blocks.size();
         for(int i=0;i<size;i++) {
Index: src/java/org/apache/lucene/index/FieldInfo.java
===================================================================
--- src/java/org/apache/lucene/index/FieldInfo.java	(revision 959077)
+++ src/java/org/apache/lucene/index/FieldInfo.java	(working copy)
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import org.apache.lucene.document.Field;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -22,6 +24,8 @@
   public String name;
   public boolean isIndexed;
   public int number;
+  Field.Values indexValues;
+
 
   // true if term vector for this field should be stored
   boolean storeTermVector;
@@ -88,4 +92,18 @@
       }
     }
   }
+
+  void setIndexValues(Field.Values v) {
+    if (indexValues != null) {
+      if (indexValues != v) {
+        throw new IllegalArgumentException("indexValues is already set to " + indexValues + "; cannot change to " + v);
+      }
+    } else{
+	   indexValues = v;
+    }
+  }
+
+  Field.Values getIndexValues() {
+    return indexValues;
+  }
 }
Index: src/java/org/apache/lucene/index/FieldInfos.java
===================================================================
--- src/java/org/apache/lucene/index/FieldInfos.java	(revision 959077)
+++ src/java/org/apache/lucene/index/FieldInfos.java	(working copy)
@@ -18,6 +18,7 @@
  */
 
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
@@ -39,7 +40,10 @@
   // First used in 2.9; prior to 2.9 there was no format header
   public static final int FORMAT_START = -2;
 
-  static final int CURRENT_FORMAT = FORMAT_START;
+  // Records index values for this field
+  public static final int FORMAT_INDEX_VALUES = -3;
+
+  static final int CURRENT_FORMAT = FORMAT_INDEX_VALUES;
   
   static final byte IS_INDEXED = 0x1;
   static final byte STORE_TERMVECTOR = 0x2;
@@ -298,9 +302,51 @@
       if (fi.omitNorms) bits |= OMIT_NORMS;
       if (fi.storePayloads) bits |= STORE_PAYLOADS;
       if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS;
-      
+
       output.writeString(fi.name);
       output.writeByte(bits);
+
+      final byte b;
+
+      if (fi.indexValues == null) {
+        b = 0;
+      } else {
+        switch(fi.indexValues) {
+        case PACKED_INTS:
+          b = 1;
+          break;
+        case SIMPLE_FLOAT_4BYTE:
+          b = 2;
+          break;
+        case SIMPLE_FLOAT_8BYTE:
+          b = 3;
+          break;
+        case BYTES_FIXED_STRAIGHT:
+          b = 4;
+          break;
+        case BYTES_FIXED_DEREF:
+          b = 5;
+          break;
+        case BYTES_FIXED_SORTED:
+          b = 6;
+          break;
+        case BYTES_VAR_STRAIGHT:
+          b = 7;
+          break;
+        case BYTES_VAR_DEREF:
+          b = 8;
+          break;
+        case BYTES_VAR_SORTED:
+          b = 9;
+          break;
+        case PACKED_INTS_FIXED:
+          b = 10;
+          break;
+        default:
+          throw new IllegalStateException("unhandled indexValues type " + fi.indexValues);
+        }
+      }
+      output.writeByte(b);
     }
   }
 
@@ -324,7 +370,49 @@
       boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
       boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
       
-      addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+      FieldInfo fi = addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+
+      if (format <= FORMAT_INDEX_VALUES) {
+        final byte b = input.readByte();
+
+        switch(b) {
+        case 0:
+          fi.indexValues = null;
+          break;
+        case 1:
+          fi.indexValues = Field.Values.PACKED_INTS;
+          break;
+        case 2:
+          fi.indexValues = Field.Values.SIMPLE_FLOAT_4BYTE;
+          break;
+        case 3:
+          fi.indexValues = Field.Values.SIMPLE_FLOAT_8BYTE;
+          break;
+        case 4:
+          fi.indexValues = Field.Values.BYTES_FIXED_STRAIGHT;
+          break;
+        case 5:
+          fi.indexValues = Field.Values.BYTES_FIXED_DEREF;
+          break;
+        case 6:
+          fi.indexValues = Field.Values.BYTES_FIXED_SORTED;
+          break;
+        case 7:
+          fi.indexValues = Field.Values.BYTES_VAR_STRAIGHT;
+          break;
+        case 8:
+          fi.indexValues = Field.Values.BYTES_VAR_DEREF;
+          break;
+        case 9:
+          fi.indexValues = Field.Values.BYTES_VAR_SORTED;
+          break;
+        case 10:
+          fi.indexValues = Field.Values.PACKED_INTS_FIXED;
+          break;
+        default:
+          throw new IllegalStateException("unhandled indexValues type " + b);
+        }
+      }
     }
 
     if (input.getFilePointer() != input.length()) {
Index: src/java/org/apache/lucene/index/IndexFileDeleter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileDeleter.java	(revision 959077)
+++ src/java/org/apache/lucene/index/IndexFileDeleter.java	(working copy)
@@ -106,7 +106,8 @@
 
   /** Change to true to see details of reference counts when
    *  infoStream != null */
-  public static boolean VERBOSE_REF_COUNTS = false;
+  // nocommit -- false again
+  public static boolean VERBOSE_REF_COUNTS = true;
 
   void setInfoStream(PrintStream infoStream) {
     this.infoStream = infoStream;
Index: src/java/org/apache/lucene/index/IndexFileNames.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileNames.java	(revision 959077)
+++ src/java/org/apache/lucene/index/IndexFileNames.java	(working copy)
@@ -78,6 +78,12 @@
 
   /** Extension of separate norms */
   public static final String SEPARATE_NORMS_EXTENSION = "s";
+  
+  /** Extension of Column-Stride Filed data files */
+  public static final String CSF_DATA_EXTENSION = "dat";
+  
+  /** Extension of Column-Stride Filed index files */
+  public static final String CSF_INDEX_EXTENSION = "idx";
 
   /**
    * This array contains all filename extensions used by
@@ -98,6 +104,8 @@
     GEN_EXTENSION,
     NORMS_EXTENSION,
     COMPOUND_FILE_STORE_EXTENSION,
+    CSF_DATA_EXTENSION,
+    CSF_INDEX_EXTENSION
   };
 
   public static final String[] STORE_INDEX_EXTENSIONS = new String[] {
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/IndexReader.java	(revision 959077)
+++ src/java/org/apache/lucene/index/IndexReader.java	(working copy)
@@ -21,6 +21,10 @@
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.values.Bytes;
+import org.apache.lucene.index.values.Cache;
+import org.apache.lucene.index.values.Floats;
+import org.apache.lucene.index.values.Ints;
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -1292,6 +1296,25 @@
     throw new UnsupportedOperationException("This reader does not support this method.");
   }
 
+    public  Ints.Reader getIndexValuesInts(String field) {
+	
+	throw new UnsupportedOperationException();
+    }
+
+    public Floats.Reader getIndexValuesFloats(String field) {
+	throw new UnsupportedOperationException();
+    }
+
+    public Bytes.Reader getIndexValuesBytes(String field) {
+	throw new UnsupportedOperationException();
+    }
+
+    private final Cache indexValuesCache = new Cache(this);
+
+    // nocommit -- don't expose readers if we have this?
+    public Cache getIndexValuesCache() {
+	return indexValuesCache;
+    }
 
   private Fields fields;
 
Index: src/java/org/apache/lucene/index/SegmentInfo.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentInfo.java	(revision 959077)
+++ src/java/org/apache/lucene/index/SegmentInfo.java	(working copy)
@@ -473,7 +473,11 @@
     if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) {
       fileSet.add(delFileName);
     }
-
+    //nocommit - need something cleaner
+      for(int i=0;i<10;i++) {
+	     addIfExists(fileSet,  IndexFileNames.fileNameFromGeneration(name,"dat", i));
+	     addIfExists(fileSet,  IndexFileNames.fileNameFromGeneration(name, "idx", i));
+      }
     if (normGen != null) {
       for (int i = 0; i < normGen.length; i++) {
         long gen = normGen[i];
Index: src/java/org/apache/lucene/index/SegmentMerger.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentMerger.java	(revision 959077)
+++ src/java/org/apache/lucene/index/SegmentMerger.java	(working copy)
@@ -25,12 +25,19 @@
 import java.util.List;
 
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader.FieldOption;
 import org.apache.lucene.index.MergePolicy.MergeAbortedException;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.MergeState;
 import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.values.Bytes;
+import org.apache.lucene.index.values.BytesBaseImpl;
+import org.apache.lucene.index.values.Floats;
+import org.apache.lucene.index.values.Ints;
+import org.apache.lucene.index.values.PackedIntsImpl;
+import org.apache.lucene.index.values.SimpleFloatsImpl;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -157,6 +164,8 @@
     if (mergeDocStores && fieldInfos.hasVectors())
       mergeVectors();
 
+    mergeIndexValues();
+
     return mergedDocs;
   }
 
@@ -183,19 +192,28 @@
                              !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
         fileSet.add(IndexFileNames.segmentFileName(segment, "", ext));
     }
-
     codec.files(directory, info, fileSet);
     
     // Fieldable norm files
     int numFIs = fieldInfos.size();
     for (int i = 0; i < numFIs; i++) {
       FieldInfo fi = fieldInfos.fieldInfo(i);
+      if(fi.indexValues != null) {
+	      fileSet.add(IndexFileNames.segmentFileName(segment,Integer.toString(fi.number), IndexFileNames.CSF_DATA_EXTENSION));
+        final String idxFile = IndexFileNames.segmentFileName(segment, Integer
+            .toString(fi.number), IndexFileNames.CSF_INDEX_EXTENSION);
+        if (directory.fileExists(idxFile)) {
+          fileSet.add(idxFile);
+        }
+      }
+   
       if (fi.isIndexed && !fi.omitNorms) {
         fileSet.add(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
         break;
       }
     }
 
+    
     // Vector files
     if (fieldInfos.hasVectors() && mergeDocStores) {
       for (String ext : IndexFileNames.VECTOR_EXTENSIONS) {
@@ -288,10 +306,18 @@
         int numReaderFieldInfos = readerFieldInfos.size();
         for (int j = 0; j < numReaderFieldInfos; j++) {
           FieldInfo fi = readerFieldInfos.fieldInfo(j);
-          fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector,
-              fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
-              !reader.hasNorms(fi.name), fi.storePayloads,
-              fi.omitTermFreqAndPositions);
+          FieldInfo merged = fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector,
+                                            fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
+                                            !reader.hasNorms(fi.name), fi.storePayloads,
+                                            fi.omitTermFreqAndPositions);
+          final Field.Values fiIndexValues = fi.indexValues;
+          final Field.Values mergedIndexValues = merged.indexValues;
+          if (mergedIndexValues == null) {
+            merged.setIndexValues(fiIndexValues);
+          } else if (mergedIndexValues != fiIndexValues) {
+            // nocommit -- what to do?
+            throw new IllegalStateException("cannot merge field " + fi.name + " indexValues changed from " + mergedIndexValues + " to " + fiIndexValues);
+          }
         }
       } else {
         addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
@@ -302,6 +328,8 @@
         addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
         addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false);
         fieldInfos.add(reader.getFieldNames(FieldOption.UNINDEXED), false);
+
+        // nocommit -- how should we handle index values here?
       }
     }
     fieldInfos.write(directory, segment + ".fnm");
@@ -362,6 +390,144 @@
     return docCount;
   }
 
+  private void mergeIndexValues() throws IOException {
+    final int numFields = fieldInfos.size();
+    for(int i=0;i<numFields;i++) {
+      final FieldInfo fieldInfo = fieldInfos.fieldInfo(i);
+
+      final Field.Values v = fieldInfo.indexValues;
+
+      if (v != null) {
+
+        int docBase = 0;
+
+        List<Ints.Writer.MergeState> intsMergeStates = null;
+        List<Floats.Writer.MergeState> floatsMergeStates = null;
+        List<Bytes.Writer.MergeState> bytesMergeStates = null;
+
+        for(IndexReader reader : readers) {
+          Ints.Reader ints = reader.getIndexValuesInts(fieldInfo.name);
+          if (ints != null) {
+            if (intsMergeStates == null) {
+              intsMergeStates = new ArrayList<Ints.Writer.MergeState>();
+            }
+            final Ints.Writer.MergeState state = new Ints.Writer.MergeState();
+            state.intsReader = ints;
+            state.reader = reader;
+            state.docBase = docBase;
+            state.docCount = reader.maxDoc();
+            intsMergeStates.add(state);
+          }
+
+          Floats.Reader floats = reader.getIndexValuesFloats(fieldInfo.name);
+          if (floats != null) {
+            if (floatsMergeStates == null) {
+              floatsMergeStates = new ArrayList<Floats.Writer.MergeState>();
+            }
+            final Floats.Writer.MergeState state = new Floats.Writer.MergeState();
+            state.floatsReader = floats;
+            state.reader = reader;
+            state.docBase = docBase;
+            state.docCount = reader.maxDoc();
+            floatsMergeStates.add(state);
+          }
+
+          Bytes.Reader bytes = reader.getIndexValuesBytes(fieldInfo.name);
+          if (bytes != null) {
+            if (bytesMergeStates == null) {
+              bytesMergeStates = new ArrayList<Bytes.Writer.MergeState>();
+            }
+            final Bytes.Writer.MergeState state = new Bytes.Writer.MergeState();
+            state.bytesReader = bytes;
+            state.reader = reader;
+            state.docBase = docBase;
+            state.docCount = reader.maxDoc();
+            bytesMergeStates.add(state);
+          }
+
+          docBase += reader.numDocs();
+        }
+
+        final String id = segment + "_" + fieldInfo.number;
+
+        switch(v) {
+        case PACKED_INTS:
+          {
+            Ints.Writer writer = PackedIntsImpl.getWriter(directory, id, false);
+            writer.add(intsMergeStates);
+            writer.finish(mergedDocs);
+            break;
+          }
+        case PACKED_INTS_FIXED:
+          {
+            Ints.Writer writer = PackedIntsImpl.getWriter(directory, id, true);
+            writer.add(intsMergeStates);
+            writer.finish(mergedDocs);
+            break;
+          }
+        case SIMPLE_FLOAT_4BYTE:
+          {
+            Floats.Writer writer = SimpleFloatsImpl.getWriter(directory, id, 4);
+            writer.add(floatsMergeStates);
+            writer.finish(mergedDocs);
+            break;
+          }
+        case SIMPLE_FLOAT_8BYTE:
+          {
+            Floats.Writer writer = SimpleFloatsImpl.getWriter(directory, id, 8);
+            writer.add(floatsMergeStates);
+            writer.finish(mergedDocs);
+            break;
+          }
+        case BYTES_FIXED_STRAIGHT:
+          {
+            Bytes.Writer writer = BytesBaseImpl.getWriter(directory, id, BytesBaseImpl.Mode.STRAIGHT, null, true);
+            writer.add(bytesMergeStates);
+            writer.finish(mergedDocs);
+            break;
+          }
+        case BYTES_FIXED_DEREF:
+          {
+            Bytes.Writer writer = BytesBaseImpl.getWriter(directory, id, BytesBaseImpl.Mode.DEREF, null, true);
+            writer.add(bytesMergeStates);
+            writer.finish(mergedDocs);
+            break;
+          }
+        case BYTES_FIXED_SORTED:
+          {
+            // nocommit -- enable setting Comparator
+            Bytes.Writer writer = BytesBaseImpl.getWriter(directory, id, BytesBaseImpl.Mode.SORTED, null, true);
+            writer.add(bytesMergeStates);
+            writer.finish(mergedDocs);
+            break;
+          }
+        case BYTES_VAR_STRAIGHT:
+          {
+            Bytes.Writer writer = BytesBaseImpl.getWriter(directory, id, BytesBaseImpl.Mode.STRAIGHT, null, false);
+            writer.add(bytesMergeStates);
+            writer.finish(mergedDocs);
+            break;
+          }
+        case BYTES_VAR_DEREF:
+          {
+            Bytes.Writer writer = BytesBaseImpl.getWriter(directory, id, BytesBaseImpl.Mode.DEREF, null, false);
+            writer.add(bytesMergeStates);
+            writer.finish(mergedDocs);
+            break;
+          }
+        case BYTES_VAR_SORTED:
+          {
+            // nocommit -- enable setting Comparator
+            Bytes.Writer writer = BytesBaseImpl.getWriter(directory, id, BytesBaseImpl.Mode.SORTED, null, false);
+            writer.add(bytesMergeStates);
+            writer.finish(mergedDocs);
+            break;
+          }
+        }
+      }
+    }
+  }
+
   private int copyFieldsWithDeletions(final FieldsWriter fieldsWriter, final IndexReader reader,
                                       final FieldsReader matchingFieldsReader)
     throws IOException, MergeAbortedException, CorruptIndexException {
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentReader.java	(revision 959077)
+++ src/java/org/apache/lucene/index/SegmentReader.java	(working copy)
@@ -30,6 +30,7 @@
 
 import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.BufferedIndexInput;
@@ -41,8 +42,15 @@
 import org.apache.lucene.util.CloseableThreadLocal;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.values.Bytes;
+import org.apache.lucene.index.values.BytesBaseImpl;
+import org.apache.lucene.index.values.Floats;
+import org.apache.lucene.index.values.Ints;
+import org.apache.lucene.index.values.PackedIntsImpl;
+import org.apache.lucene.index.values.SimpleFloatsImpl;
 import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;
 
 /**
  * @lucene.experimental
@@ -134,7 +142,7 @@
         // Ask codec for its Fields
         fields = si.getCodec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor));
         assert fields != null;
-
+        openIndexValuesReaders(cfsDir, si);
         success = true;
       } finally {
         if (!success) {
@@ -149,6 +157,56 @@
       this.origInstance = origInstance;
     }
 
+    final Map<String,Ints.Reader> intValues = new HashMap<String,Ints.Reader>();
+    final Map<String,Floats.Reader> floatValues = new HashMap<String,Floats.Reader>();
+    final Map<String,Bytes.Reader> bytesValues = new HashMap<String,Bytes.Reader>();
+
+    // Only opens files... doesn't actually load any values
+    private void openIndexValuesReaders(Directory dir, SegmentInfo si) throws IOException {
+      final int numFields = fieldInfos.size();
+      for(int i=0;i<numFields;i++) {
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(i);
+        final Field.Values v = fieldInfo.getIndexValues();
+        if (v != null) {
+          final String field = fieldInfo.name;
+          final String id = IndexFileNames.segmentFileName(segment, Integer
+              .toString(fieldInfo.number), "");
+          switch(v) {
+          case PACKED_INTS:
+          case PACKED_INTS_FIXED:
+            intValues.put(field, PackedIntsImpl.getReader(dir, id));
+            break;
+          case SIMPLE_FLOAT_4BYTE:
+            floatValues.put(field, SimpleFloatsImpl.getReader(dir, id, si.docCount));
+            break;
+          case SIMPLE_FLOAT_8BYTE:
+            floatValues.put(field, SimpleFloatsImpl.getReader(dir, id, si.docCount));
+            break;
+          case BYTES_FIXED_STRAIGHT:
+            bytesValues.put(field, BytesBaseImpl.getReader(dir, id, BytesBaseImpl.Mode.STRAIGHT, true, si.docCount));
+            break;
+          case BYTES_FIXED_DEREF:
+            bytesValues.put(field, BytesBaseImpl.getReader(dir, id, BytesBaseImpl.Mode.DEREF, true, si.docCount));
+            break;
+          case BYTES_FIXED_SORTED:
+            bytesValues.put(field, BytesBaseImpl.getReader(dir, id, BytesBaseImpl.Mode.SORTED, true, si.docCount));
+            break;
+          case BYTES_VAR_STRAIGHT:
+            bytesValues.put(field, BytesBaseImpl.getReader(dir, id, BytesBaseImpl.Mode.STRAIGHT, false, si.docCount));
+            break;
+          case BYTES_VAR_DEREF:
+            bytesValues.put(field, BytesBaseImpl.getReader(dir, id, BytesBaseImpl.Mode.DEREF, false, si.docCount));
+            break;
+          case BYTES_VAR_SORTED:
+            bytesValues.put(field, BytesBaseImpl.getReader(dir, id, BytesBaseImpl.Mode.SORTED, false, si.docCount));
+            break;
+          default:
+            throw new IllegalStateException("unrecognized index values mode " + v);
+          }
+        }
+      }
+    }
+
     synchronized TermVectorsReader getTermVectorsReaderOrig() {
       return termVectorsReaderOrig;
     }
@@ -166,9 +224,7 @@
     }
 
     synchronized void decRef() throws IOException {
-
       if (ref.decrementAndGet() == 0) {
-
         if (fields != null) {
           fields.close();
         }
@@ -197,9 +253,23 @@
         if (origInstance != null) {
           FieldCache.DEFAULT.purge(origInstance);
         }
+        closeIndexValuesReaders();
       }
     }
 
+	private void closeIndexValuesReaders() throws IOException {
+	    for (Ints.Reader reader : intValues.values()) {
+		reader.close();
+	    }
+	    for (Floats.Reader reader : floatValues.values()) {
+		reader.close();
+	    }
+	    for (Bytes.Reader reader : bytesValues.values()) {
+		reader.close();
+	    }
+	}
+
+
     synchronized void openDocStores(SegmentInfo si) throws IOException {
 
       assert si.name.equals(segment);
@@ -1228,4 +1298,19 @@
   public int getTermInfosIndexDivisor() {
     return core.termsIndexDivisor;
   }
+
+  @Override
+  public Ints.Reader getIndexValuesInts(String field) {
+    return core.intValues.get(field);
+  }
+
+  @Override
+  public Floats.Reader getIndexValuesFloats(String field) {
+    return core.floatValues.get(field);
+  }
+
+  @Override
+  public Bytes.Reader getIndexValuesBytes(String field) {
+    return core.bytesValues.get(field);
+  }
 }
Index: src/java/org/apache/lucene/index/values/Bytes.java
===================================================================
--- src/java/org/apache/lucene/index/values/Bytes.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/Bytes.java	(revision 0)
@@ -0,0 +1,116 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.ConsumesRAM;
+import org.apache.lucene.index.IndexReader;
+
+import java.io.IOException;
+import java.io.Closeable;
+import java.util.Comparator;
+import java.util.List;
+
+/** Provides concrete Writer/Reader impls for byte[] value
+ *  per document.  There are 6 package-private impls of
+ *  this, for all combinations of STRAIGHT/DEREF/SORTED X
+ *  fixed/not fixed.
+ *
+ * <p>NOTE: The total amount of byte[] data stored (across a
+ *    single segment) cannot exceed 2GB. </p>
+ * <p>NOTE: Each byte[] must be <= 32768 bytes in length</p> */
+
+public class Bytes {
+
+  public static abstract class Source implements ConsumesRAM {
+    public abstract BytesRef get(int docID);
+
+    /** Returns number of unique values.  Some impls may
+     * throw UnsupportedOperationException. */
+    public abstract int getValueCount();
+  }
+
+  public static abstract class SortedSource extends Source {
+
+    @Override
+    public BytesRef get(int docID) {
+      return getByOrd(ord(docID));
+    }
+
+    /** Returns ord for specified docID.  If this docID had
+     *  not been added to the Writer, the ord is 0.  Ord is
+     *  dense, ie, starts at 0, then increments by 1 for the
+     *  next (as defined by {@link Comparator} value.  */
+    public abstract int ord(int docID);
+
+    /** Returns value for specified ord. */
+    public abstract BytesRef getByOrd(int ord);
+
+    public static class LookupResult {
+      public boolean found;
+      public int ord;
+    }
+
+    /** Finds the largest ord whose value is <= the
+     *  requested value.  If {@link LookupResult#found} is
+     *  true, then ord is an exact match.  The returned
+     *  {@link LookupResult} may be reused across calls. */
+    public abstract LookupResult getByValue(BytesRef value);
+  }
+
+
+  /** Writes one segment's worth of values */
+  public static abstract class Writer implements ConsumesRAM {
+
+    /** Records the specfied value for the docID */
+    public abstract void add(int docID, BytesRef value) throws IOException;
+
+    /** Finish writing, close any files */
+    public abstract void finish(int docCount) throws IOException;
+
+    public static class MergeState {
+      public Reader bytesReader;
+      public IndexReader reader;                         // TODO: change to Bits with flex
+      public int docBase;
+      public int docCount;
+    }
+
+    public void add(List<MergeState> states) throws IOException {
+      // TODO: replace with more efficient, non RAM resident merging
+      for(MergeState state : states) {
+        final Source s = state.bytesReader.load();
+        int docID = state.docBase;
+        for(int i=0;i<state.docCount;i++) {
+          if (!state.reader.isDeleted(i)) {
+            add(docID++, s.get(i));
+          }
+        }
+      }
+    }
+  }
+
+  public static abstract class Reader implements Closeable {
+    public abstract Source load() throws IOException;
+
+    /** NOTE: the comparator provided here must match the
+     *  one used at indexing. */
+    public SortedSource loadSorted(Comparator<BytesRef> comp) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  }
+}

Property changes on: src/java/org/apache/lucene/index/values/Bytes.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/BytesBaseImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/BytesBaseImpl.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/BytesBaseImpl.java	(revision 0)
@@ -0,0 +1,171 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Base class for specific Bytes Reader/Writer implementations */
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+import java.util.Collection;
+import java.util.Comparator;
+import java.io.IOException;
+
+public final class BytesBaseImpl {
+
+  // don't instantiate!
+  private BytesBaseImpl() {}
+
+
+  public static enum Mode {STRAIGHT, DEREF, SORTED};
+
+  public static void files(Directory dir, String id, Collection<String> files) throws IOException {
+    files.add(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+    final String idxFile = IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_INDEX_EXTENSION);
+    if (dir.fileExists(idxFile)) {
+      files.add(idxFile);
+    }
+  }
+
+  // nocommit -- i shouldn't have to specify fixed?  can
+  // track itself & do the write thing at write time?
+  public static Bytes.Writer getWriter(Directory dir, String id, Mode mode, Comparator<BytesRef> comp, boolean fixedSize)
+    throws IOException {
+
+    if (comp == null) {
+      // nocommit
+      comp = BytesRef.getByteOrderComparator();
+    }
+
+    if (fixedSize) {
+      if (mode == Mode.STRAIGHT) {
+        return new FixedStraightBytesImpl.Writer(dir, id);
+      } else if (mode == Mode.DEREF) {
+        return new FixedDerefBytesImpl.Writer(dir, id);
+      } else if (mode == Mode.SORTED) {
+        return new FixedSortedBytesImpl.Writer(dir, id, comp);
+      }
+    } else {
+      if (mode == Mode.STRAIGHT) {
+        return new VarStraightBytesImpl.Writer(dir, id);
+      } else if (mode == Mode.DEREF) {
+        return new VarDerefBytesImpl.Writer(dir, id);
+      } else if (mode == Mode.SORTED) {
+        return new VarSortedBytesImpl.Writer(dir, id, comp);
+      }
+    }
+
+    throw new IllegalArgumentException("");
+  }
+
+  // nocommit -- I can peek @ header to determing fixed/mode?
+  public static Bytes.Reader getReader(Directory dir, String id, Mode mode, boolean fixedSize, int maxDoc)
+    throws IOException {
+    if (fixedSize) {
+      if (mode == Mode.STRAIGHT) {
+        return new FixedStraightBytesImpl.Reader(dir, id, maxDoc);
+      } else if (mode == Mode.DEREF) {
+        return new FixedDerefBytesImpl.Reader(dir, id, maxDoc);
+      } else if (mode == Mode.SORTED) {
+        return new FixedSortedBytesImpl.Reader(dir, id, maxDoc);
+      }
+    } else {
+      if (mode == Mode.STRAIGHT) {
+        return new VarStraightBytesImpl.Reader(dir, id, maxDoc);
+      } else if (mode == Mode.DEREF) {
+        return new VarDerefBytesImpl.Reader(dir, id, maxDoc);
+      } else if (mode == Mode.SORTED) {
+        return new VarSortedBytesImpl.Reader(dir, id, maxDoc);
+      }
+    }
+
+    throw new IllegalArgumentException("");
+  }
+
+  public static abstract class Writer extends Bytes.Writer {
+
+    protected final IndexOutput idxOut;
+    protected final IndexOutput datOut;
+
+    protected Writer(Directory dir, String id,  String codecName, int version, boolean doIndex) throws IOException {
+      // nocommit -- shouold we create only when really
+      // necessary?  many impls don't write anything until finish
+      datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+      CodecUtil.writeHeader(datOut, codecName, version);
+      assert datOut.getFilePointer() == CodecUtil.headerLength(codecName);
+      if (doIndex) {
+        idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_INDEX_EXTENSION));
+        CodecUtil.writeHeader(idxOut, codecName, version);
+      } else {
+        idxOut = null;
+      }
+    }
+
+    /** Must be called only with increasing docIDs. It's OK
+     *  for some docIDs to be skipped; they will be filled
+     *  with 0 bytes. */
+    @Override
+    public abstract void add(int docID, BytesRef bytes) throws IOException;
+
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      datOut.close();
+      if (idxOut != null) {
+        idxOut.close();
+      }
+    }
+  }
+
+  /** Opens all necessary files, but does not read any data
+   *  in until you call {@link #load}. */
+  public static abstract class Reader extends Bytes.Reader {
+
+    protected final IndexInput idxIn;
+    protected final IndexInput datIn;
+    protected final int version;
+    protected final String id;
+
+    protected Reader(Directory dir, String id, String codecName, int maxVersion, boolean doIndex) throws IOException {
+      this.id = id;
+      datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+      version = CodecUtil.checkHeader(datIn, codecName, maxVersion);
+
+      if (doIndex) {
+        idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_INDEX_EXTENSION));
+        final int version2 = CodecUtil.checkHeader(idxIn, codecName, maxVersion);
+        assert version == version2;
+      } else {
+        idxIn = null;
+      }
+    }
+
+    @Override
+    public abstract Bytes.Source load() throws IOException;
+
+    public void close() throws IOException {
+      if (datIn != null) {
+        datIn.close();
+      }
+      if (idxIn != null) {
+        idxIn.close();
+      }
+    }
+  }
+}
\ No newline at end of file

Property changes on: src/java/org/apache/lucene/index/values/BytesBaseImpl.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/Cache.java
===================================================================
--- src/java/org/apache/lucene/index/values/Cache.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/Cache.java	(revision 0)
@@ -0,0 +1,96 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.BytesRef;
+
+import java.util.Comparator;
+import java.util.Map;
+import java.util.HashMap;
+import java.io.IOException;
+
+public class Cache {
+  final IndexReader r;
+  final Map<String,Ints.Source> ints = new HashMap<String,Ints.Source>();
+  final Map<String,Floats.Source> floats = new HashMap<String,Floats.Source>();
+  final Map<String,Bytes.Source> bytes = new HashMap<String,Bytes.Source>();
+  final Map<String,Bytes.SortedSource> sortedBytes = new HashMap<String,Bytes.SortedSource>();
+
+  public Cache(IndexReader r) {
+    this.r = r;
+  }
+
+  synchronized public Ints.Source getInts(String id) throws IOException {
+    Ints.Source s = ints.get(id);
+    if (s == null) {
+      s = r.getIndexValuesInts(id).load();
+      ints.put(id, s);
+    }
+
+    return s;
+  }
+
+  synchronized public Floats.Source getFloats(String id) throws IOException {
+    Floats.Source s = floats.get(id);
+    if (s == null) {
+      s = r.getIndexValuesFloats(id).load();
+      floats.put(id, s);
+    }
+
+    return s;
+  }
+
+  synchronized public Bytes.SortedSource getSortedBytes(String id, Comparator<BytesRef> comp) throws IOException {
+    Bytes.SortedSource s = sortedBytes.get(id);
+    if (s == null) {
+      s = r.getIndexValuesBytes(id).loadSorted(comp);
+      sortedBytes.put(id, s);
+    } else {
+      // TODO(simonw): verify comp is the same!
+    }
+
+    return s;
+  }
+
+  synchronized public Bytes.Source getBytes(String id) throws IOException {
+    Bytes.Source s = bytes.get(id);
+    if (s == null) {
+      s = r.getIndexValuesBytes(id).load();
+      bytes.put(id, s);
+    }
+
+    return s;
+  }
+
+  public void purgeInts(String id) {
+    ints.remove(id);
+  }
+
+  public void purgeFloats(String id) {
+    floats.remove(id);
+  }
+
+  public void purgeBytes(String id) {
+    bytes.remove(id);
+  }
+
+  public void purgeSortedBytes(String id) {
+    sortedBytes.remove(id);
+  }
+}

Property changes on: src/java/org/apache/lucene/index/values/Cache.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java	(revision 0)
@@ -0,0 +1,171 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesHash;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.CodecUtil;
+import java.io.IOException;
+
+// Stores fixed-length byte[] by deref, ie when two docs
+// have the same value, they store only 1 byte[]
+
+class FixedDerefBytesImpl {
+
+  static final String CODEC_NAME = "FixedDerefBytes";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static class Writer extends BytesBaseImpl.Writer {
+    private int size = -1;
+    private int idUpto = 1;
+    private int[] docToID;
+
+    final class Entry extends BytesHash.Entry {
+      int id;
+      public Entry() {
+        id = idUpto++;
+      }
+    }
+
+    private final BytesHash<Entry> hash = new BytesHash<Entry>(Entry.class) {
+      @Override
+      protected FixedDerefBytesImpl.Writer.Entry newEntry() {
+        return new FixedDerefBytesImpl.Writer.Entry();
+      }
+      @Override
+      public long bytesPerEntry() {
+        return super.bytesPerEntry() + RamUsageEstimator.NUM_BYTES_INT;
+      }
+    };
+
+    public Writer(Directory dir, String id) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, true);
+      docToID = new int[1];
+    }
+
+    @Override
+    synchronized public void add(int docID, BytesRef bytes) throws IOException {
+      if (size == -1) {
+        size = bytes.length;
+        datOut.writeInt(size);
+      } else if (bytes.length != size) {
+        throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length);
+      }
+      final int idUptoStart = idUpto;
+      final Entry e = hash.add(bytes);
+
+      if (e.id == idUptoStart) {
+        // new added entry
+        datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+      }
+
+      if (docID >= docToID.length) {
+        docToID = ArrayUtil.grow(docToID, 1+docID);
+      }
+      docToID[docID] = e.id;
+    }
+
+    synchronized public long ramBytesUsed() {
+      return RamUsageEstimator.NUM_BYTES_INT * docToID.length + hash.ramBytesUsed();
+    }
+
+    // Important that we get docCount, in case there were
+    // some last docs that we didn't see
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      idxOut.writeInt(idUpto-1);
+      // write index
+      // nocommit -- optionally use force bits:
+      final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(idUpto-1));
+      final int limit = docCount > docToID.length? docToID.length: docCount;
+      for(int i=0;i<limit;i++) {
+        w.add(docToID[i]);
+      }
+      // fill up remaining doc with zeros
+      for(int i=limit;i<docCount;i++) {
+        w.add(0);
+      }
+      w.finish();
+      super.finish(docCount);
+    }
+  }
+
+  public static class Reader extends BytesBaseImpl.Reader {
+    private final int size;
+
+    Reader(Directory dir, String id, int maxDoc)
+      throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_START, true);
+
+      size = datIn.readInt();
+    }
+
+    @Override
+    public Bytes.Source load() throws IOException {
+      return new Source();
+    }
+
+    private class Source extends Bytes.Source {
+      // TODO: paged data
+      private final byte[] data;
+      private final BytesRef bytesRef = new BytesRef();
+      private final PackedInts.Reader index;
+      private final int numValue;
+
+      public Source() throws IOException {
+        datIn.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
+        idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
+
+        numValue = idxIn.readInt();
+        data = new byte[size*numValue];
+        datIn.readBytes(data, 0, size*numValue);
+
+        index = PackedInts.getReader(idxIn);
+        bytesRef.bytes = data;
+        bytesRef.length = size;
+      }
+
+      @Override
+      public BytesRef get(int docID) {
+        final int id = (int) index.get(docID);
+        if (id == 0) {
+          return null;
+        } else {
+          bytesRef.offset = (int) ((id-1) * size);
+          return bytesRef;
+        }
+      }
+
+      public long ramBytesUsed() {
+        // TODO(simonw): move ram calculation to PackedInts?!
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length +
+               (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size());
+      }
+
+      @Override
+      public int getValueCount() {
+        return numValue;
+      }
+    }
+  }
+}

Property changes on: src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java	(revision 0)
@@ -0,0 +1,235 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesHash;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.CodecUtil;
+import java.io.IOException;
+
+import java.util.Comparator;
+
+// Stores fixed-length byte[] by deref, ie when two docs
+// have the same value, they store only 1 byte[]
+
+class FixedSortedBytesImpl {
+
+  static final String CODEC_NAME = "FixedSortedBytes";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static class Writer extends BytesBaseImpl.Writer {
+    private int size = -1;
+    private Entry[] docToEntry;
+    private final Comparator<BytesRef> comp;
+
+    final static class Entry extends BytesHash.Entry {
+      int address;
+    }
+
+    private final BytesHash<Entry> hash = new BytesHash<Entry>(Entry.class) {
+      @Override
+      protected FixedSortedBytesImpl.Writer.Entry newEntry() {
+        return new FixedSortedBytesImpl.Writer.Entry();
+      }
+      @Override
+      public long bytesPerEntry() {
+        return super.bytesPerEntry() + RamUsageEstimator.NUM_BYTES_INT;
+      }
+    };
+
+    public Writer(Directory dir, String id,  Comparator<BytesRef> comp) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, true);
+      docToEntry = new Entry[1];
+      this.comp = comp;
+    }
+
+    @Override
+    synchronized public void add(int docID, BytesRef bytes) throws IOException {
+      if (size == -1) {
+        size = bytes.length;
+        datOut.writeInt(size);
+      } else if (bytes.length != size) {
+        throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length);
+      }
+      if (docID >= docToEntry.length) {
+        Entry[] newArray = new Entry[ArrayUtil.oversize(1+docID, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+        System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
+        docToEntry = newArray;
+      }
+      docToEntry[docID] = hash.add(bytes);
+    }
+
+    synchronized public long ramBytesUsed() {
+      return RamUsageEstimator.NUM_BYTES_OBJ_REF * docToEntry.length + hash.ramBytesUsed();
+    }
+
+    // Important that we get docCount, in case there were
+    // some last docs that we didn't see
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+
+      Entry[] sortedEntries = hash.sort(comp);
+      final int count = hash.size();
+
+      // first dump bytes data, recording address as we go
+      for(int i=0;i<count;i++) {
+        final Entry e = sortedEntries[i];
+        final BytesRef bytes = hash.getBytes(e);
+        assert bytes.length == size;
+        datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+        e.address = 1+i;
+      }
+
+      idxOut.writeInt(count);
+
+      // nocommit -- allow forcing bit size (not just -1)
+      // for better perf
+      // next write index
+      PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(count));
+      final int limit;
+      if (docCount > docToEntry.length) {
+        limit = docToEntry.length;
+      } else {
+        limit = docCount;
+      }
+      for(int i=0;i<limit;i++) {
+        final Entry e = docToEntry[i];
+        if (e == null) {
+          // null is encoded as zero
+          w.add(0);
+        } else {
+          w.add(e.address);
+        }
+      }
+
+      for(int i=limit;i<docCount;i++) {
+        w.add(0);
+      }
+      w.finish();
+
+      super.finish(docCount);
+    }
+  }
+  
+  public static class Reader extends BytesBaseImpl.Reader {
+    // nocommit -- allow/require byte[] paging here?
+    private final int size;
+
+    public Reader(Directory dir, String id, int maxDoc)
+      throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_START, true);
+      size = datIn.readInt();
+    }
+
+    @Override
+    public Bytes.Source load() throws IOException {
+      return loadSorted(null);
+    }
+
+    @Override
+    public Bytes.SortedSource loadSorted(Comparator<BytesRef> comp) throws IOException {
+      return new Source(comp);
+    }
+
+    private class Source extends Bytes.SortedSource {
+
+      // TODO: paged data
+      private final byte[] data;
+      private final BytesRef bytesRef = new BytesRef();
+      private final PackedInts.Reader index;
+      private final LookupResult lookupResult = new LookupResult();
+      private final int numValue;
+      private final Comparator<BytesRef> comp;
+
+      public Source(Comparator<BytesRef> comp) throws IOException {
+        datIn.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
+        idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
+
+        numValue = idxIn.readInt();
+        data = new byte[size*numValue];
+        datIn.readBytes(data, 0, size*numValue);
+        datIn.close();
+
+        index = PackedInts.getReader(idxIn);
+        idxIn.close();
+
+        bytesRef.bytes = data;
+        bytesRef.length = size;
+        // default byte sort order 
+        this.comp = comp==null?BytesRef.getByteOrderComparator():comp;
+      }
+
+      @Override
+      public int ord(int docID) {
+        return (int) index.get(docID);
+      }
+
+      @Override
+      public BytesRef getByOrd(int ord) {
+        if (ord == 0) {
+          return null;
+        } else {
+          bytesRef.offset = (int) ((ord-1) * size);
+          return bytesRef;
+        }
+      }
+
+      @Override
+      public LookupResult getByValue(BytesRef bytes) {
+        return binarySearch(bytes, 0, numValue-1);
+      }
+
+      public long ramBytesUsed() {
+        // TODO(simonw): move ram calcultation to PackedInts?
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length +
+            (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size());
+      }
+
+      @Override
+      public int getValueCount() {
+        return numValue;
+      }
+
+      private LookupResult binarySearch(BytesRef b, int low, int high) {
+        
+        while (low <= high) {
+          int mid = (low + high) >>> 1;
+          bytesRef.offset = mid * size;
+          int cmp = comp.compare(bytesRef, b);
+          if (cmp < 0) {
+            low = mid + 1;
+          } else if (cmp > 0) {
+            high = mid - 1;
+          } else {
+            lookupResult.ord = mid+1;
+            lookupResult.found = true;
+            return lookupResult;
+          }
+        }
+        lookupResult.ord = low;
+        lookupResult.found = false;
+        return lookupResult;
+      }
+    }
+  }
+}

Property changes on: src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java	(revision 0)
@@ -0,0 +1,128 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+import java.io.IOException;
+
+// Simplest storage: stores fixed length byte[] per
+// document, with no dedup and no sorting.
+
+class FixedStraightBytesImpl {
+
+  static final String CODEC_NAME = "FixedStraightBytes";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static class Writer extends BytesBaseImpl.Writer {
+    private int size = -1;
+    private int lastDocID;
+
+    protected Writer(Directory dir, String id) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, false);
+    }
+
+    @Override
+    synchronized public void add(int docID, BytesRef bytes) throws IOException {
+      if (size == -1) {
+        size = bytes.length;
+        datOut.writeInt(size);
+      } else if (bytes.length != size) {
+        throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length);
+      }
+      fill(docID);
+      datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+    }
+
+    // Fills up to but not including this docID
+    private void fill(int docID) throws IOException {
+      final int fill = size*(docID - lastDocID - 1);
+      for(int i=0;i<fill;i++) {
+        datOut.writeByte((byte) 0);
+      }
+      lastDocID = docID;
+    }
+
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      fill(docCount);
+      super.finish(docCount);
+    }
+
+    synchronized public long ramBytesUsed() {
+      return 0;
+    }
+  }
+
+  public static class Reader extends BytesBaseImpl.Reader {
+    private final int size;
+    private final int maxDoc;
+
+    Reader(Directory dir, String id, int maxDoc)
+      throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_START, false);
+
+      size = datIn.readInt();
+      this.maxDoc = maxDoc;
+    }
+
+    @Override
+    public Bytes.Source load() throws IOException {
+      return new Source();
+    }
+
+    @Override
+    public void close() throws IOException {
+      datIn.close();
+    }
+
+    private class Source extends Bytes.Source {
+      // TODO: paged data
+      private final byte[] data;
+      private final BytesRef bytesRef = new BytesRef();
+
+      public Source() throws IOException {
+        datIn.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
+
+        data = new byte[size*maxDoc];
+        datIn.readBytes(data, 0, size*maxDoc);
+
+        bytesRef.bytes = data;
+        bytesRef.length = size;
+      }
+
+      @Override
+      public BytesRef get(int docID) {
+        bytesRef.offset = docID * size;
+        return bytesRef;
+      }
+
+      public long ramBytesUsed() {
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length;
+      }
+
+      @Override
+      public int getValueCount() {
+        throw new UnsupportedOperationException();
+      }
+    }
+  }
+}

Property changes on: src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/Floats.java
===================================================================
--- src/java/org/apache/lucene/index/values/Floats.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/Floats.java	(revision 0)
@@ -0,0 +1,74 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.ConsumesRAM;
+
+import java.io.IOException;
+import java.io.Closeable;
+import java.util.List;
+
+/** Represents writers & readers of a single float value per
+ *  field X document. */
+
+public final class Floats {
+
+  /** Source of floating point number (returned as java
+   *  double), per document.  The underlying implementation
+   *  may use different numbers of bits per value; double is
+   *  only used since it can handle all precisions.  */
+  public static abstract class Source implements ConsumesRAM {
+    public abstract double get(int docID);
+  }
+
+  /** Writes one segment's worth of values */
+  public static abstract class Writer implements ConsumesRAM {
+    /** Records the specfied value for the docID */
+    public abstract void add(int docID, double value) throws IOException;
+
+    /** Finish writing, close any files */
+    public abstract void finish(int docCount) throws IOException;
+
+    public static class MergeState {
+      public Reader floatsReader;
+      public IndexReader reader;                         // TODO: change to Bits with flex
+      public int docBase;
+      public int docCount;
+    }
+
+    public void add(List<MergeState> states) throws IOException {
+      // TODO: replace with more efficient, non RAM resident merging
+      for(MergeState state : states) {
+        final Source s = state.floatsReader.load();
+        int docID = state.docBase;
+        for(int i=0;i<state.docCount;i++) {
+          if (!state.reader.isDeleted(i)) {
+            add(docID++, s.get(i));
+          }
+        }
+      }
+    }
+  }
+
+  /** Opens files but does not read them until load() is
+   *  called. */
+  public static abstract class Reader implements Closeable {
+    public abstract Source load() throws IOException;
+  }
+}

Property changes on: src/java/org/apache/lucene/index/values/Floats.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/Ints.java
===================================================================
--- src/java/org/apache/lucene/index/values/Ints.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/Ints.java	(revision 0)
@@ -0,0 +1,75 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.ConsumesRAM;
+
+import java.io.IOException;
+import java.io.Closeable;
+import java.util.List;
+
+/** Represents writers & readers of a single int value per
+ *  field X document. */
+
+public final class Ints {
+
+  /** Source of integer (returned as java long), per
+   *  document.  The underlying implementation may use
+   *  different numbers of bits per value; long is only
+   *  used since it can handle all precisions.  */
+  public static abstract class Source implements ConsumesRAM {
+    public abstract long get(int docID);
+  }
+
+  /** Writes one segment's worth of values */
+  public static abstract class Writer implements ConsumesRAM {
+
+    /** Records the specfied value for the docID */
+    public abstract void add(int docID, long value) throws IOException;
+
+    /** Finish writing, close any files */
+    public abstract void finish(int docCount) throws IOException;
+
+    public static class MergeState {
+      public Reader intsReader;
+      public IndexReader reader;                         // TODO: change to Bits with flex
+      public int docBase;
+      public int docCount;
+    }
+
+    public void add(List<MergeState> states) throws IOException {
+      // TODO: replace with more efficient, non RAM resident merging
+      for(MergeState state : states) {
+        final Source s = state.intsReader.load();
+        int docID = state.docBase;
+        for(int i=0;i<state.docCount;i++) {
+          if (!state.reader.isDeleted(i)) {
+            add(docID++, s.get(i));
+          }
+        }
+      }
+    }
+  }
+
+  /** Opens files but does not read them until load() is
+   *  called. */
+  public static abstract class Reader implements Closeable {
+    public abstract Source load() throws IOException;
+  }
+}

Property changes on: src/java/org/apache/lucene/index/values/Ints.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/PackedIntsImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/PackedIntsImpl.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/PackedIntsImpl.java	(revision 0)
@@ -0,0 +1,162 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.util.Collection;
+import java.io.IOException;
+
+/** Stores ints packed with fixed-bit precision. */
+public class PackedIntsImpl {
+
+  private static final String CODEC_NAME = "PackedInts";
+
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  public static void files(String id, Collection<String> files) throws IOException {
+    files.add(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+  }
+
+  public static Writer getWriter(Directory dir, String id, boolean useFixedArray) throws IOException {
+    return new Writer(dir, id, useFixedArray);
+  }
+
+  public static Reader getReader(Directory dir, String id) throws IOException {
+    return new Reader(dir, id);
+  }
+
+  private static class Writer extends Ints.Writer {
+
+    private long[] docToValue;
+    private long minValue;
+    private long maxValue;
+    private boolean started;
+    private final Directory dir;
+    private final String id;
+
+    protected Writer(Directory dir, String id, boolean useFixedArray) throws IOException {
+      this.dir = dir;
+      this.id = id;
+      docToValue = new long[1];
+    }
+
+    @Override
+    synchronized public void add(int docID, long v) throws IOException {
+      if (!started) {
+        minValue = maxValue = v;
+        started = true;
+      } else {
+        if (v < minValue) {
+          minValue = v;
+        } else if (v > maxValue) {
+          maxValue = v;
+        }
+      }
+      if (docID >= docToValue.length) {
+        docToValue = ArrayUtil.grow(docToValue, 1+docID);
+      }
+      docToValue[docID] = v;
+    }
+
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      final IndexOutput datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+      CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
+
+      // nocommit -- long can't work right since it's signed
+
+      datOut.writeLong(minValue);
+      PackedInts.Writer w = PackedInts.getWriter(datOut, docCount, PackedInts.bitsRequired(maxValue-minValue));
+
+      final int limit;
+      if (docCount > docToValue.length) {
+        limit = docToValue.length;
+      } else {
+        limit = docCount;
+      }
+      for(int i=0;i<limit;i++) {
+        w.add(docToValue[i] - minValue);
+      }
+      for(int i=limit;i<docCount;i++) {
+        w.add(0);
+      }
+      w.finish();
+
+      datOut.close();
+    }
+
+    public long ramBytesUsed() {
+      return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToValue.length * RamUsageEstimator.NUM_BYTES_LONG;
+    }
+  }
+
+  /** Opens all necessary files, but does not read any data
+   *  in until you call {@link #load}. */
+  private static class Reader extends Ints.Reader {
+    private final IndexInput datIn;
+
+    protected Reader(Directory dir, String id) throws IOException {
+      datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+      CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START);
+    }
+
+    /** Loads the actual values.  You may call this more
+     *  than once, eg if you already previously loaded but
+     *  then discarded the Source. */
+    @Override
+    public Source load() throws IOException {
+      return new Source();
+    }
+
+    private class Source extends Ints.Source {
+      private final long minValue;
+      private final PackedInts.Reader values;
+
+      public Source() throws IOException {
+        datIn.seek(CodecUtil.headerLength(CODEC_NAME));
+        minValue = datIn.readLong();
+        values = PackedInts.getReader(datIn);
+      }
+
+      @Override
+      public long get(int docID) {
+        // nocommit -- can we somehow avoid 2X method calls
+        // on each get?  must push minValue down, and make
+        // PackedInts implement Ints.Source
+        return minValue + values.get(docID);
+      }
+
+      public long ramBytesUsed() {
+        // TODO(simonw): move that to PackedInts?
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.getBitsPerValue() * values.size();
+      }
+    }
+
+    public void close() throws IOException {
+      datIn.close();
+    }
+  }
+}
\ No newline at end of file

Property changes on: src/java/org/apache/lucene/index/values/PackedIntsImpl.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/SimpleFloatsImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/SimpleFloatsImpl.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/SimpleFloatsImpl.java	(revision 0)
@@ -0,0 +1,208 @@
+package org.apache.lucene.index.values;
+
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.DoubleBuffer;
+import java.nio.FloatBuffer;
+import java.util.Collection;
+
+/** Exposes writer/reader for floating point values.  You
+ *  can specify 4 (java float) or 8 (java double) byte
+ *  precision. */
+
+public class SimpleFloatsImpl {
+  private static final String CODEC_NAME = "SimpleFloats";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  public static void files(String id, Collection<String> files) {
+    files.add(id + "." + IndexFileNames.CSF_DATA_EXTENSION);
+  }
+
+  public static Floats.Writer getWriter(Directory dir, String id, int precisionBytes) throws IOException {
+    if (precisionBytes != 4 && precisionBytes != 8) {
+      throw new IllegalArgumentException("precisionBytes must be 4 or 8; got " + precisionBytes);
+    }
+    if (precisionBytes == 4) {
+      return new Float4Writer(dir, id);
+    } else {
+      return new Float8Writer(dir, id);
+    }
+  }
+
+  public static Reader getReader(Directory dir, String id, int maxDoc) throws IOException {
+    return new Reader(dir, id, maxDoc);
+  }
+
+  // Writes 4 bytes (float) per value
+  public static class Float4Writer extends Floats.Writer {
+
+    private float[] docToValue;
+    private final Directory dir;
+    private final String id;
+
+    protected Float4Writer(Directory dir, String id) throws IOException {
+      this.dir = dir;
+      this.id = id;
+      docToValue = new float[1];
+    }
+
+    @Override
+    synchronized public void add(int docID, double v) throws IOException {
+      if (docID >= docToValue.length) {
+        float[] newArray = new float[ArrayUtil.oversize(1+docID, RamUsageEstimator.NUM_BYTES_FLOAT)];
+        System.arraycopy(docToValue, 0, newArray, 0, docToValue.length);
+        docToValue = newArray;
+      }
+      docToValue[docID] = (float) v;
+    }
+
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      final IndexOutput datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+      CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
+      assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME);
+      datOut.writeByte((byte) 4);
+
+      final int limit;
+      if (docCount > docToValue.length) {
+        limit = docToValue.length;
+      } else {
+        limit = docCount;
+      }
+      for(int i=0;i<limit;i++) {
+        datOut.writeInt(Float.floatToRawIntBits((float) docToValue[i]));
+      }
+      for(int i=limit;i<docCount;i++) {
+        datOut.writeInt(0);
+      }
+
+      datOut.close();
+    }
+
+    public long ramBytesUsed() {
+      return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToValue.length * RamUsageEstimator.NUM_BYTES_FLOAT;
+    }
+  }
+
+  // Writes 8 bytes (double) per value
+  public static class Float8Writer extends Floats.Writer {
+
+    private double[] docToValue;
+
+    private final Directory dir;
+    private final String id;
+
+    protected Float8Writer(Directory dir, String id) throws IOException {
+      this.dir = dir;
+      this.id = id;
+      docToValue = new double[1];
+    }
+
+    @Override
+    synchronized public void add(int docID, double v) throws IOException {
+      if (docID >= docToValue.length) {
+        double[] newArray = new double[ArrayUtil.oversize(1+docID, RamUsageEstimator.NUM_BYTES_DOUBLE)];
+        System.arraycopy(docToValue, 0, newArray, 0, docToValue.length);
+        docToValue = newArray;
+      }
+      docToValue[docID] = v;
+    }
+
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      final IndexOutput datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+      CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
+      assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME);
+      datOut.writeByte((byte) 8);
+
+      final int limit = docCount > docToValue.length ? docToValue.length : docCount;
+      for(int i=0;i<limit;i++) {
+        datOut.writeLong(Double.doubleToRawLongBits(docToValue[i]));
+      }
+      for(int i=limit;i<docCount;i++) {
+        datOut.writeLong(0);
+      }
+
+      datOut.close();
+    }
+
+    public long ramBytesUsed() {
+      return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToValue.length * RamUsageEstimator.NUM_BYTES_DOUBLE;
+    }
+  }
+
+  /** Opens all necessary files, but does not read any data
+   *  in until you call {@link #load}. */
+  public static class Reader extends Floats.Reader {
+
+    private final IndexInput datIn;
+    private final int precisionBytes;
+    // TODO(simonw) is ByteBuffer the way to go here?
+    private final ByteBuffer buffer;
+
+    protected Reader(Directory dir, String id, int maxDoc) throws IOException {
+      datIn = dir.openInput(IndexFileNames.segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+      CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START);
+      precisionBytes = datIn.readByte();
+      assert precisionBytes == 4 || precisionBytes == 8;
+      buffer = ByteBuffer.allocate(precisionBytes * maxDoc);
+    }
+    
+    /** Loads the actual values.  You may call this more
+     *  than once, eg if you already previously loaded but
+     *  then discarded the Source. */
+    @Override
+    public Floats.Source load() throws IOException {
+      datIn.seek(CodecUtil.headerLength(CODEC_NAME));
+      // skip precision:
+      datIn.readByte();
+      assert buffer.hasArray(): "Buffer must support Array";
+      final byte[] arr = buffer.array();
+      datIn.readBytes(arr, 0, arr.length);
+      if (precisionBytes == 4) {
+        return new Source4();
+      } else {
+        return new Source8();
+      }
+    }
+
+    private class Source4 extends Floats.Source {
+      private final FloatBuffer values = buffer.asFloatBuffer();
+
+      @Override
+      public double get(int docID) {
+        return values.get(docID);
+      }
+
+      public long ramBytesUsed() {
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.limit() * RamUsageEstimator.NUM_BYTES_FLOAT;
+      }
+    }
+
+    private class Source8 extends Floats.Source {
+      private final DoubleBuffer values = buffer.asDoubleBuffer();
+
+      @Override
+      public double get(int docID) {
+        return values.get(docID);
+      }
+
+      public long ramBytesUsed() {
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.limit() * RamUsageEstimator.NUM_BYTES_DOUBLE;
+      }
+    }
+
+    public void close() throws IOException {
+      datIn.close();
+    }
+  }
+}
\ No newline at end of file

Property changes on: src/java/org/apache/lucene/index/values/SimpleFloatsImpl.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java	(revision 0)
@@ -0,0 +1,184 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesHash;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.CodecUtil;
+import java.io.IOException;
+
+// Stores variable-length byte[] by deref, ie when two docs
+// have the same value, they store only 1 byte[] and both
+// docs reference that single source
+
+class VarDerefBytesImpl {
+
+  static final String CODEC_NAME = "VarDerefBytes";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static class Writer extends BytesBaseImpl.Writer {
+    private int[] docToAddress;
+    private int address = 1;
+
+    final class Entry extends BytesHash.Entry {
+      int address;
+    }
+
+    private final BytesHash<Entry> hash = new BytesHash<Entry>(Entry.class) {
+      @Override
+      protected VarDerefBytesImpl.Writer.Entry newEntry() {
+        return new VarDerefBytesImpl.Writer.Entry();
+      }
+      @Override
+      public long bytesPerEntry() {
+        return super.bytesPerEntry() + RamUsageEstimator.NUM_BYTES_INT;
+      }
+    };
+
+    public Writer(Directory dir, String id) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, true);
+      docToAddress = new int[1];
+    }
+
+    @Override
+    synchronized public void add(int docID, BytesRef bytes) throws IOException {
+      Entry e = hash.add(bytes);
+
+      if (docID >= docToAddress.length) {
+        docToAddress = ArrayUtil.grow(docToAddress, 1+docID);
+      }
+      if (e.address == 0) {
+        e.address = address;
+        // New
+        if (bytes.length < 128) {
+          // 1 byte to store length
+          datOut.writeByte((byte) bytes.length);
+          address++;
+        } else {
+          // 2 byte to store length
+          datOut.writeByte((byte) (0x80 | (bytes.length & 0x7f)));
+          datOut.writeByte((byte) ((bytes.length>>7) & 0xff));
+          address += 2;
+        }
+        datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+        address += bytes.length;
+      }
+
+      docToAddress[docID] = e.address;
+    }
+
+    synchronized public long ramBytesUsed() {
+      return RamUsageEstimator.NUM_BYTES_INT * docToAddress.length + hash.ramBytesUsed();
+    }
+
+    // Important that we get docCount, in case there were
+    // some last docs that we didn't see
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+
+      idxOut.writeInt(address-1);
+
+      // write index
+      // nocommit -- allow forcing fixed array (not -1)
+      // TODO(simonw): check the address calculation / make it more intuitive
+      PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address-1));
+      final int limit;
+      if (docCount > docToAddress.length) {
+        limit = docToAddress.length;
+      } else {
+        limit = docCount;
+      }
+      for(int i=0;i<limit;i++) {
+        w.add(docToAddress[i]);
+      }
+      for(int i=limit;i<docCount;i++) {
+        w.add(0);
+      }
+      w.finish();
+
+      super.finish(docCount);
+    }
+  }
+
+  public static class Reader extends BytesBaseImpl.Reader {
+
+    Reader(Directory dir, String id, int maxDoc)
+      throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_START, true);
+    }
+
+    @Override
+    public Bytes.Source load() throws IOException {
+      return new Source();
+    }
+
+    private class Source extends Bytes.Source {
+      // TODO: paged data
+      private final byte[] data;
+      private final BytesRef bytesRef = new BytesRef();
+      private final PackedInts.Reader index;
+
+      public Source() throws IOException {
+        datIn.seek(CodecUtil.headerLength(CODEC_NAME));
+        idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
+
+        final int totBytes = idxIn.readInt();
+        data = new byte[totBytes];
+        datIn.readBytes(data, 0, totBytes);
+
+        index = PackedInts.getReader(idxIn);
+        bytesRef.bytes = data;
+      }
+
+      @Override
+      public BytesRef get(int docID) {
+        int address = (int) index.get(docID);
+        if (address == 0) {
+          return null;
+        } else {
+          address--;
+          if ((data[address] & 0x80) == 0) {
+            // length is 1 byte
+            bytesRef.length = data[address];
+            bytesRef.offset = address+1;
+          } else {
+            bytesRef.length = (data[address]&0x7f) + ((data[address+1]&0xff)<<7);
+            bytesRef.offset = address+2;
+          }
+          return bytesRef;
+        }
+      }
+      
+      @Override
+      public int getValueCount() {
+        return index.size();
+      }
+
+      public long ramBytesUsed() {
+        // TODO(simonw): move address ram usage to PackedInts? 
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length + 
+        (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size());
+      }
+    }
+  }
+}

Property changes on: src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java	(revision 0)
@@ -0,0 +1,255 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesHash;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.PackedInts;
+
+// Stores variable-length byte[] by deref, ie when two docs
+// have the same value, they store only 1 byte[] and both
+// docs reference that single source
+
+class VarSortedBytesImpl {
+
+  static final String CODEC_NAME = "VarDerefBytes";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static class Writer extends BytesBaseImpl.Writer {
+    private Entry[] docToEntry;
+    private final Comparator<BytesRef> comp;
+
+    final class Entry extends BytesHash.Entry {
+      int index;
+      long offset;
+    }
+
+    private final BytesHash<Entry> hash = new BytesHash<Entry>(Entry.class) {
+      @Override
+      protected VarSortedBytesImpl.Writer.Entry newEntry() {
+        return new VarSortedBytesImpl.Writer.Entry();
+      }
+      @Override
+      public long bytesPerEntry() {
+        return super.bytesPerEntry() + RamUsageEstimator.NUM_BYTES_INT + RamUsageEstimator.NUM_BYTES_LONG;
+      }
+    };
+
+    public Writer(Directory dir, String id, Comparator<BytesRef> comp) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, true);
+      this.comp = comp;
+      docToEntry = new Entry[1];
+    }
+
+    @Override
+    synchronized public void add(int docID, BytesRef bytes) throws IOException {
+      if (docID >= docToEntry.length) {
+        Entry[] newArray = new Entry[ArrayUtil.oversize(1+docID, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+        System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
+        docToEntry = newArray;
+      }
+      docToEntry[docID] = hash.add(bytes);
+    }      
+
+    synchronized public long ramBytesUsed() {
+      return RamUsageEstimator.NUM_BYTES_OBJ_REF * docToEntry.length + hash.ramBytesUsed();
+    }
+
+    // Important that we get docCount, in case there were
+    // some last docs that we didn't see
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+
+      Entry[] sortedEntries = hash.sort(comp);
+      final int count = hash.size();
+
+      // first dump bytes data, recording index & offset as
+      // we go
+      long offset = 0;
+      long lastOffset = 0;
+      for(int i=0;i<count;i++) {
+        final Entry e = sortedEntries[i];
+        e.offset = offset;
+        e.index = 1+i;
+
+        final BytesRef bytes = hash.getBytes(e);
+        // TODO: we could prefix code...
+        datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+        lastOffset = offset;
+        offset += bytes.length;
+      }
+
+      // total bytes of data
+      idxOut.writeLong(offset);
+
+      // write index -- first doc -> 1+ord
+      // nocommit -- allow not -1:
+      final PackedInts.Writer indexWriter = PackedInts.getWriter(idxOut, docCount,PackedInts.bitsRequired(count));
+      final int limit = docCount > docToEntry.length ? docToEntry.length: docCount;
+      for(int i=0;i<limit;i++) {
+        final Entry e = docToEntry[i];
+        indexWriter.add(e==null? 0: e.index);
+      }
+      for(int i=limit;i<docCount;i++) {
+        indexWriter.add(0);
+      }
+      indexWriter.finish();
+
+      // next ord (0-based) -> offset
+      // nocommit -- allow not -1:
+      PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count, PackedInts.bitsRequired(lastOffset));
+      for(int i=0;i<count;i++) {
+	  offsetWriter.add(sortedEntries[i].offset);
+      }
+      offsetWriter.finish();
+
+      super.finish(docCount);
+    }
+  }
+
+  public static class Reader extends BytesBaseImpl.Reader {
+
+    Reader(Directory dir, String id, int maxDoc)
+      throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_START, true);
+    }
+
+    @Override
+    public Bytes.Source load() throws IOException {
+      return loadSorted(null);
+    }
+
+    @Override
+    public Bytes.SortedSource loadSorted(Comparator<BytesRef> comp) throws IOException {
+      return new Source(comp);
+    }
+
+    private class Source extends Bytes.SortedSource {
+      // TODO: paged data
+      private final byte[] data;
+      private final BytesRef bytesRef = new BytesRef();
+      private final PackedInts.Reader docToOrdIndex;
+      private final PackedInts.Reader ordToOffsetIndex;   // 0-based
+      private final long totBytes;
+      private final int valueCount;
+      private final LookupResult lookupResult = new LookupResult();
+      private final Comparator<BytesRef> comp;
+
+      public Source(Comparator<BytesRef> comp) throws IOException {
+        datIn.seek(CodecUtil.headerLength(CODEC_NAME));
+        idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
+
+        totBytes = idxIn.readLong();
+        data = new byte[(int) totBytes];
+        datIn.readBytes(data, 0, (int) totBytes);
+        docToOrdIndex = PackedInts.getReader(idxIn);
+        ordToOffsetIndex = PackedInts.getReader(idxIn);
+        valueCount = ordToOffsetIndex.size();
+        bytesRef.bytes = data;
+        if (comp == null) {
+          // default byte sort order 
+          this.comp = new Comparator<BytesRef>() {
+              // TODO(simonw): factor this out
+	    public int compare(BytesRef left, BytesRef right) {
+		return left.compareTo(right);
+	    }
+	};
+        } else {
+          this.comp = comp;
+        }
+      }
+
+      @Override
+      public BytesRef getByOrd(int ord) {
+        if (ord == 0) {
+          return null;
+        }
+         ord -= 1;
+         return deref(ord);
+      }
+
+      @Override
+      public int ord(int docID) {
+        return (int) docToOrdIndex.get(docID);
+      }
+
+      @Override
+      public LookupResult getByValue(BytesRef bytes) {
+        return binarySearch(bytes, 0, valueCount-1);
+      }
+
+      public long ramBytesUsed() {
+        // TODO(simonw): move ram usage to PackedInts? 
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length + 
+        (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToOrdIndex.getBitsPerValue() * docToOrdIndex.getBitsPerValue()) + 
+        (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + ordToOffsetIndex.getBitsPerValue() * ordToOffsetIndex.getBitsPerValue());
+
+      }
+      
+
+      @Override
+      public int getValueCount() {
+        return valueCount;
+      }
+
+      // ord is 0-based
+      private BytesRef deref(int ord) {
+        bytesRef.offset = (int) ordToOffsetIndex.get(ord);
+        final long nextOffset;
+        if (ord == valueCount-1) {
+          nextOffset = totBytes;
+        } else {
+          nextOffset = ordToOffsetIndex.get(1+ord);
+        }
+        bytesRef.length = (int) (nextOffset - bytesRef.offset);
+        return bytesRef;
+      }
+
+      // TODO: share w/ FixedSortedBytesValues?
+      private LookupResult binarySearch(BytesRef b, int low, int high) {
+
+        while (low <= high) {
+          int mid = (low + high) >>> 1;
+          deref(mid);
+          int cmp = comp.compare(bytesRef, b);
+          if (cmp < 0) {
+            low = mid + 1;
+          } else if (cmp > 0) {
+            high = mid - 1;
+          } else {
+            lookupResult.ord = mid+1;
+            lookupResult.found = true;
+            return lookupResult;
+          }
+        }
+        assert comp.compare(bytesRef, b) != 0;
+        lookupResult.ord = low;
+        lookupResult.found = false;
+        return lookupResult;
+      }
+    }
+  }
+}

Property changes on: src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
===================================================================
--- src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java	(revision 0)
+++ src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java	(revision 0)
@@ -0,0 +1,144 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.RamUsageEstimator;
+
+// Variable length byte[] per document, no sharing
+
+class VarStraightBytesImpl {
+
+  static final String CODEC_NAME = "VarStraightBytes";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static class Writer extends BytesBaseImpl.Writer {
+    private int address;
+    private int lastDocID;
+    private int[] docToAddress;
+
+    public Writer(Directory dir, String id) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, true);
+      docToAddress = new int[1];
+    }
+
+    // Fills up to but not including this docID
+    private void fill(final int docID) {
+      if (docID >= docToAddress.length) {
+        docToAddress = ArrayUtil.grow(docToAddress, 1+docID);
+      }
+      for(int i=lastDocID+1;i<docID;i++) {
+        docToAddress[i] = address;
+      }
+      lastDocID = docID;
+    }
+
+    @Override
+    synchronized public void add(int docID, BytesRef bytes) throws IOException {
+      fill(docID);
+      docToAddress[docID] = address;
+      datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+      address += bytes.length;
+    }
+
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      // write all lengths to index
+      // write index
+      fill(docCount);
+      idxOut.writeVInt(address);
+      // nocommit -- allow not -1
+      PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address));
+      final int limit;
+      for(int i=0;i<docCount;i++) {
+        w.add(docToAddress[i]);
+      }
+      w.finish();
+
+      super.finish(docCount);
+    }
+
+    synchronized public long ramBytesUsed() {
+      return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + RamUsageEstimator.NUM_BYTES_INT * docToAddress.length;
+    }
+  }
+
+  public static class Reader extends BytesBaseImpl.Reader {
+    private final int maxDoc;
+
+    Reader(Directory dir, String id, int maxDoc)
+      throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_START, true);
+      this.maxDoc = maxDoc;
+    }
+
+    @Override
+    public Bytes.Source load() throws IOException {
+      return new Source();
+    }
+
+    private class Source extends Bytes.Source {
+      private final int totBytes;
+      // TODO: paged data
+      private final byte[] data;
+      private final BytesRef bytesRef = new BytesRef();
+      private final PackedInts.Reader addresses;
+
+      public Source() throws IOException {
+        datIn.seek(CodecUtil.headerLength(CODEC_NAME));
+        idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
+        totBytes = idxIn.readVInt();
+        data = new byte[totBytes];
+        datIn.readBytes(data, 0, totBytes);
+        addresses = PackedInts.getReader(idxIn);
+        bytesRef.bytes = data;
+      }
+
+      @Override
+      public BytesRef get(int docID) {
+        final int address = (int) addresses.get(docID);
+        bytesRef.offset = address;
+        if (docID == maxDoc-1) {
+          bytesRef.length = totBytes - bytesRef.offset;
+        } else {
+          bytesRef.length = (int) addresses.get(1+docID) - bytesRef.offset;
+        }
+        return bytesRef;
+      }
+
+      @Override
+      public int getValueCount() {
+        throw new UnsupportedOperationException();
+      }
+
+      
+      public long ramBytesUsed() {
+        // TODO(simonw): move address ram usage to PackedInts? 
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length + 
+          (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + addresses.getBitsPerValue() * addresses.size()) ;
+      }
+    }
+  }
+}

Property changes on: src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/search/FieldComparator.java
===================================================================
--- src/java/org/apache/lucene/search/FieldComparator.java	(revision 959077)
+++ src/java/org/apache/lucene/search/FieldComparator.java	(working copy)
@@ -22,6 +22,8 @@
 import java.util.Locale;
 
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.values.Floats;
+import org.apache.lucene.index.values.Ints;
 import org.apache.lucene.search.FieldCache.DoubleParser;
 import org.apache.lucene.search.FieldCache.LongParser;
 import org.apache.lucene.search.FieldCache.ByteParser;
@@ -142,7 +144,6 @@
    * @param reader current reader
    * @param docBase docBase of this reader 
    * @throws IOException
-   * @throws IOException
    */
   public abstract void setNextReader(IndexReader reader, int docBase) throws IOException;
 
@@ -317,6 +318,64 @@
     }
   }
 
+  /** Uses float index values to sort by ascending value */
+  public static final class FloatIndexValuesComparator extends FieldComparator {
+    private final double[] values;
+    private Floats.Source currentReaderValues;
+    private final String field;
+    private double bottom;
+
+    FloatIndexValuesComparator(int numHits, String field) {
+      values = new double[numHits];
+      this.field = field;
+    }
+
+    @Override
+    public int compare(int slot1, int slot2) {
+      final double v1 = values[slot1];
+      final double v2 = values[slot2];
+      if (v1 > v2) {
+        return 1;
+      } else if (v1 < v2) {
+        return -1;
+      } else {
+        return 0;
+      }
+    }
+
+    @Override
+    public int compareBottom(int doc) {
+      final double v2 = currentReaderValues.get(doc);
+      if (bottom > v2) {
+        return 1;
+      } else if (bottom < v2) {
+        return -1;
+      } else {
+        return 0;
+      }
+    }
+
+    @Override
+    public void copy(int slot, int doc) {
+      values[slot] = currentReaderValues.get(doc);
+    }
+
+    @Override
+    public void setNextReader(IndexReader reader, int docBase) throws IOException {
+      currentReaderValues = reader.getIndexValuesCache().getFloats(field);
+    }
+    
+    @Override
+    public void setBottom(final int bottom) {
+      this.bottom = values[bottom];
+    }
+
+    @Override
+    public Comparable value(int slot) {
+      return Double.valueOf(values[slot]);
+    }
+  }
+
   /** Parses field's values as float (using {@link
    *  FieldCache#getFloats} and sorts by ascending value */
   public static final class FloatComparator extends FieldComparator {
@@ -451,6 +510,68 @@
     }
   }
 
+  /** Loads int index values and sorts by ascending value. */
+  public static final class IntIndexValuesComparator extends FieldComparator {
+    private final long[] values;
+    private Ints.Source currentReaderValues;
+    private final String field;
+    private long bottom;
+
+    IntIndexValuesComparator(int numHits, String field) {
+      values = new long[numHits];
+      this.field = field;
+    }
+
+    @Override
+    public int compare(int slot1, int slot2) {
+      // TODO: there are sneaky non-branch ways to compute
+      // -1/+1/0 sign
+      final long v1 = values[slot1];
+      final long v2 = values[slot2];
+      if (v1 > v2) {
+        return 1;
+      } else if (v1 < v2) {
+        return -1;
+      } else {
+        return 0;
+      }
+    }
+
+    @Override
+    public int compareBottom(int doc) {
+      // TODO: there are sneaky non-branch ways to compute
+      // -1/+1/0 sign
+      final long v2 = currentReaderValues.get(doc);
+      if (bottom > v2) {
+        return 1;
+      } else if (bottom < v2) {
+        return -1;
+      } else {
+        return 0;
+      }
+    }
+
+    @Override
+    public void copy(int slot, int doc) {
+      values[slot] = currentReaderValues.get(doc);
+    }
+
+    @Override
+    public void setNextReader(IndexReader reader, int docBase) throws IOException {
+      currentReaderValues = reader.getIndexValuesCache().getInts(field);
+    }
+    
+    @Override
+    public void setBottom(final int bottom) {
+      this.bottom = values[bottom];
+    }
+
+    @Override
+    public Comparable value(int slot) {
+      return Long.valueOf(values[slot]);
+    }
+  }
+
   /** Parses field's values as long (using {@link
    *  FieldCache#getLongs} and sorts by ascending value */
   public static final class LongComparator extends FieldComparator {
Index: src/java/org/apache/lucene/search/SortField.java
===================================================================
--- src/java/org/apache/lucene/search/SortField.java	(revision 959077)
+++ src/java/org/apache/lucene/search/SortField.java	(working copy)
@@ -19,9 +19,15 @@
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.Comparator;
 import java.util.Locale;
 
 import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.BytesRef;
+
+// nocommit -- for cleaner transition, maybe we should make
+// a new SortField that subclasses this one and always uses
+// index values?
 
 /**
  * Stores information about how to sort documents by terms in an individual
@@ -83,6 +89,9 @@
    * uses ordinals to do the sorting. */
   public static final int STRING_VAL = 11;
   
+  /** Sort use byte[] index values. */
+  public static final int BYTES = 12;
+  
   /** Represents sorting by document score (relevancy). */
   public static final SortField FIELD_SCORE = new SortField (null, SCORE);
 
@@ -358,6 +367,26 @@
       field = StringHelper.intern(field);
   }
 
+  private boolean useIndexValues;
+
+  public void setUseIndexValues(boolean b) {
+    useIndexValues = b;
+  }
+
+  public boolean getUseIndexValues() {
+    return useIndexValues;
+  }
+
+  private Comparator<BytesRef> bytesComparator = BytesRef.getByteOrderComparator();
+
+  public void setBytesComparator(Comparator<BytesRef> b) {
+    bytesComparator = b;
+  }
+
+  public Comparator<BytesRef> getBytesComparator() {
+    return bytesComparator;
+  }
+
   /** Returns the {@link FieldComparator} to use for
    * sorting.
    *
@@ -387,10 +416,18 @@
       return new FieldComparator.DocComparator(numHits);
 
     case SortField.INT:
-      return new FieldComparator.IntComparator(numHits, field, parser);
+      if (useIndexValues) {
+        return new FieldComparator.IntIndexValuesComparator(numHits, field);
+      } else {
+        return new FieldComparator.IntComparator(numHits, field, parser);
+      }
 
     case SortField.FLOAT:
-      return new FieldComparator.FloatComparator(numHits, field, parser);
+      if (useIndexValues) {
+        return new FieldComparator.FloatIndexValuesComparator(numHits, field);
+      } else {
+        return new FieldComparator.FloatComparator(numHits, field, parser);
+      }
 
     case SortField.LONG:
       return new FieldComparator.LongComparator(numHits, field, parser);
Index: src/java/org/apache/lucene/store/RAMDirectory.java
===================================================================
--- src/java/org/apache/lucene/store/RAMDirectory.java	(revision 959077)
+++ src/java/org/apache/lucene/store/RAMDirectory.java	(working copy)
@@ -150,8 +150,9 @@
     synchronized (this) {
       file = fileMap.get(name);
     }
-    if (file==null)
+    if (file==null){
       throw new FileNotFoundException(name);
+    }
     return file.getLength();
   }
   
Index: src/java/org/apache/lucene/util/BytesHash.java
===================================================================
--- src/java/org/apache/lucene/util/BytesHash.java	(revision 0)
+++ src/java/org/apache/lucene/util/BytesHash.java	(revision 0)
@@ -0,0 +1,377 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// nocommit -- move to util?
+import java.lang.reflect.Array;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.index.ByteBlockPool;
+
+/**
+ * Hash's BytesRefs.  BytesRef must be no longer than XXX in
+ * length.
+ *
+ * <p>NOTE: this class is meant only to be used internally
+ * by Lucene; it's only public so it can be shared across
+ * packages.  This means the API is freely subject to
+ * change, and, the class could be removed entirely, in any
+ * Lucene release.  Use directly at your own risk!
+ */
+
+// nocommit -- reuse Entry instances?
+public abstract class BytesHash<T extends BytesHash.Entry> {
+
+  // nocommit -- factor properly so the byte pool uses this
+  // NOT DW's
+  public final static int BYTES_BLOCK_SHIFT = 15;
+  public final static int BYTES_BLOCK_SIZE = 1 << BYTES_BLOCK_SHIFT;
+  public final static int BYTES_BLOCK_MASK = BYTES_BLOCK_SIZE - 1;
+
+  // nocommit -- reuse?
+  private static class ByteBlockAllocator extends ByteBlockPool.Allocator {
+    int blockUsedCount;
+
+    @Override
+    public byte[] getByteBlock() {
+      blockUsedCount++;
+      return new byte[BYTES_BLOCK_SIZE];
+    }
+
+    @Override
+    public void recycleByteBlocks(byte[][] blocks, int start, int end) {
+      blockUsedCount -= end-start;
+    }
+
+    public long ramBytesUsed() {
+      return blockUsedCount * BYTES_BLOCK_SIZE;
+    }
+
+    @Override
+    public void recycleByteBlocks(List<byte[]> blocks) {
+      blockUsedCount -= blocks.size();
+    }
+  }
+
+  public static class Entry {
+    public int bytesStart;
+  }
+
+  private final Class<T> cl;
+  public final ByteBlockPool pool;
+  private int hashSize = 4;
+  private int hashHalfSize = hashSize/2;
+  private int hashMask = hashSize-1;
+  private int count;
+  private int lastCount = -1;
+  private final ByteBlockAllocator allocator;
+  private T[] hash;
+
+  @SuppressWarnings("unchecked")
+  public BytesHash(Class<T> cl) {
+    this.cl = cl;
+    allocator = new ByteBlockAllocator();
+    pool = new ByteBlockPool(allocator);
+    hash = (T[]) Array.newInstance(cl, hashSize);
+  }
+
+  public int size() {
+    return count;
+  }
+
+  public BytesRef getBytes(T e) {
+    return deref(e.bytesStart, scratch1);
+  }
+
+  /** Destructive operation -- returns all Entry instances,
+   *  in arbitrary order */
+  public T[] compact() {
+    int upto = 0;
+    for(int i=0;i<hashSize;i++) {
+      if (hash[i] != null) {
+        if (upto < i) {
+          hash[upto] = hash[i];
+          hash[i] = null;
+        }
+        upto++;
+      }
+    }
+
+    assert upto == count;
+    lastCount = count;
+
+    return hash;
+  }
+
+  /** Destructive operation -- returns all Entry instances sorted */
+  public T[] sort(Comparator<BytesRef>comp) {
+    compact();
+    quickSort(comp, hash, 0, count-1);
+    return hash;
+  }
+
+  void quickSort(Comparator<BytesRef> comp, T[] entries, int lo, int hi) {
+    if (lo >= hi)
+      return;
+    else if (hi == 1+lo) {
+      if (compare(comp, entries[lo], entries[hi]) > 0) {
+        final T tmp = entries[lo];
+        entries[lo] = entries[hi];
+        entries[hi] = tmp;
+      }
+      return;
+    }
+    int mid = (lo + hi) >>> 1;
+    if (compare(comp, entries[lo], entries[mid]) > 0) {
+      T tmp = entries[lo];
+      entries[lo] = entries[mid];
+      entries[mid] = tmp;
+    }
+
+    if (compare(comp, entries[mid], entries[hi]) > 0) {
+      T tmp = entries[mid];
+      entries[mid] = entries[hi];
+      entries[hi] = tmp;
+
+      if (compare(comp, entries[lo], entries[mid]) > 0) {
+        T tmp2 = entries[lo];
+        entries[lo] = entries[mid];
+        entries[mid] = tmp2;
+      }
+    }
+
+    int left = lo + 1;
+    int right = hi - 1;
+
+    if (left >= right)
+      return;
+
+    T partition = entries[mid];
+
+    for (; ;) {
+      while (compare(comp, entries[right], partition) > 0)
+        --right;
+
+      while (left < right && compare(comp, entries[left], partition) <= 0)
+        ++left;
+
+      if (left < right) {
+        T tmp = entries[left];
+        entries[left] = entries[right];
+        entries[right] = tmp;
+        --right;
+      } else {
+        break;
+      }
+    }
+
+    quickSort(comp, entries, lo, left);
+    quickSort(comp, entries, left + 1, hi);
+  }
+
+  private final BytesRef scratch1 = new BytesRef();
+  private final BytesRef scratch2 = new BytesRef();
+
+  private final BytesRef deref(int bytesStart, BytesRef b) {
+    b.bytes = pool.buffers[bytesStart >> BYTES_BLOCK_SHIFT];
+    int pos = bytesStart & BYTES_BLOCK_MASK;
+
+    if ((b.bytes[pos] & 0x80) == 0) {
+      // length is 1 byte
+      b.length = b.bytes[pos];
+      pos += 1;
+    } else {
+      // length is 2 bytes
+      b.length = (b.bytes[pos]&0x7f) + ((b.bytes[pos+1]&0xff)<<7);
+      pos += 2;
+    }
+    b.offset = pos;
+    return b;
+  }
+
+  private boolean equals(T e, BytesRef b) {
+    return deref(e.bytesStart, scratch1).bytesEquals(b);
+  }
+
+  private int compare(Comparator<BytesRef> comp, T e1, T e2) {
+    return comp.compare(deref(e1.bytesStart, scratch1),
+                        deref(e2.bytesStart, scratch2));
+  }
+
+  @SuppressWarnings("unchecked")
+  private boolean shrink(int targetSize) {
+
+    // Cannot use ArrayUtil.shrink because we require power
+    // of 2:
+    int newSize = hashSize;
+    while(newSize >= 8 && newSize/4 > targetSize) {
+      newSize /= 2;
+    }
+
+    if (newSize != hashSize) {
+      hashSize = newSize;
+      hash = (T[]) Array.newInstance(cl, hashSize);
+      hashHalfSize = newSize/2;
+      hashMask = newSize-1;
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  public void clear() {
+    lastCount = count;
+    count = 0;
+    if (lastCount != -1) {
+      if (shrink(lastCount)) {
+        // shrink clears the hash entries
+        return;
+      }
+    }
+    Arrays.fill(hash, null);
+  }
+
+  public T add(BytesRef bytes) {
+    int code = 0;
+    final int end = bytes.offset + bytes.length;
+    // build hash
+    for(int i=bytes.offset;i<end;i++) {
+      code = 31*code + bytes.bytes[i];
+    }
+
+    // final position
+    int hashPos = code & hashMask;
+    T e = hash[hashPos];
+
+    if (e != null && !equals(e, bytes)) {
+      // Conflict: keep searching different locations in
+      // the hash table.
+      final int inc = ((code>>8)+code)|1;
+      do {
+        code += inc;
+        hashPos = code & hashMask;
+        e = hash[hashPos];
+      } while (e != null && !equals(e, bytes));
+    }
+
+    if (e == null) {
+      // new entry
+      final int len2 = 2+bytes.length;
+      if (len2 + pool.byteUpto > BYTES_BLOCK_SIZE) {
+        if (len2 > BYTES_BLOCK_SIZE) {
+          throw new IllegalArgumentException("bytes can be at most " + (BYTES_BLOCK_SIZE-2) + " in length; got " + bytes.length);
+        }
+        pool.nextBuffer();
+      }
+
+      e = newEntry();
+
+      final byte[] buffer = pool.buffer;
+      final int bufferUpto = pool.byteUpto;
+      e.bytesStart = bufferUpto + pool.byteOffset;
+
+      // We first encode the length, followed by the
+      // bytes.  Length is encoded as vInt, but will consume
+      // 1 or 2 bytes at most (we reject too-long terms,
+      // above).
+      if (bytes.length < 128) {
+        // 1 byte to store length
+        buffer[bufferUpto] = (byte) bytes.length;
+        pool.byteUpto += bytes.length + 1;
+        System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto+1, bytes.length);
+      } else {
+        // 2 byte to store length
+        buffer[bufferUpto] = (byte) (0x80 | (bytes.length & 0x7f));
+        buffer[bufferUpto+1] = (byte) ((bytes.length>>7) & 0xff);
+        pool.byteUpto += bytes.length + 2;
+        System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto+2, bytes.length);
+      }
+      assert hash[hashPos] == null;
+      hash[hashPos] = e;
+      count++;
+
+      if (count == hashHalfSize) {
+        rehash(2*hashSize);
+      }
+    }
+    return e;
+  }
+
+  /** Called when postings hash is too small (> 50%
+   *  occupied) or too large (< 20% occupied). */
+  void rehash(final int newSize) {
+
+    final int newMask = newSize-1;
+
+    @SuppressWarnings("unchecked")
+    T[] newHash = (T[]) Array.newInstance(cl, newSize);
+    for(int i=0;i<hashSize;i++) {
+      T e0 = hash[i];
+      if (e0 != null) {
+        int code;
+        final int start = e0.bytesStart & BYTES_BLOCK_MASK;
+        final byte[] bytes = pool.buffers[e0.bytesStart >> BYTES_BLOCK_SHIFT];
+        code = 0;
+
+        final int len;
+        int pos;
+        if ((bytes[start] & 0x80) == 0) {
+          // length is 1 byte
+          len = bytes[start];
+          pos = start+1;
+        } else {
+          len = (bytes[start]&0x7f) + ((bytes[start+1]&0xff)<<7);
+          pos = start+2;
+        }
+
+        final int endPos = pos+len;
+        while(pos < endPos) {
+          code = (code*31) + bytes[pos++];
+        }
+
+        int hashPos = code & newMask;
+        assert hashPos >= 0;
+        if (newHash[hashPos] != null) {
+          final int inc = ((code>>8)+code)|1;
+          do {
+            code += inc;
+            hashPos = code & newMask;
+          } while (newHash[hashPos] != null);
+        }
+        newHash[hashPos] = e0;
+      }
+    }
+
+    hashMask = newMask;
+    hash = newHash;
+    hashSize = newSize;
+    hashHalfSize = newSize >> 1;
+  }
+
+  protected abstract T newEntry();
+
+  public long ramBytesUsed() {
+    return allocator.ramBytesUsed() + RamUsageEstimator.NUM_BYTES_OBJ_REF * hashSize + count * bytesPerEntry();
+  }
+
+  protected long bytesPerEntry() {
+    return RamUsageEstimator.NUM_BYTES_OBJ_HEADER + RamUsageEstimator.NUM_BYTES_INT;
+  }
+}

Property changes on: src/java/org/apache/lucene/util/BytesHash.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/util/BytesRef.java
===================================================================
--- src/java/org/apache/lucene/util/BytesRef.java	(revision 959077)
+++ src/java/org/apache/lucene/util/BytesRef.java	(working copy)
@@ -18,6 +18,7 @@
  */
 
 import java.util.Comparator;
+import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
 import java.io.ObjectInput;
 import java.io.ObjectOutput;
@@ -240,12 +241,29 @@
   }
 
   private final static Comparator<BytesRef> utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();
+  
+  private final static Comparator<BytesRef> byteOrderComparator = new ByteOrderComparator();
+    
+  @SuppressWarnings("serial")// serializable to work with contrib/remote
+  private static final class ByteOrderComparator implements Serializable, Comparator<BytesRef> {
+    
+    private ByteOrderComparator() {;}
+    
+    public int compare(BytesRef left, BytesRef right) {
+      return left.compareTo(right);
+    }
+  }
+  
+  public static Comparator<BytesRef> getByteOrderComparator() {
+    return byteOrderComparator;
+  }
 
   public static Comparator<BytesRef> getUTF8SortedAsUnicodeComparator() {
     return utf8SortedAsUnicodeSortOrder;
   }
 
-  private static class UTF8SortedAsUnicodeComparator implements Comparator<BytesRef> {
+  @SuppressWarnings("serial") // serializable to work with contrib/remote
+  private static final class UTF8SortedAsUnicodeComparator implements Serializable, Comparator<BytesRef> {
     // Only singleton
     private UTF8SortedAsUnicodeComparator() {};
 
Index: src/java/org/apache/lucene/util/ConsumesRAM.java
===================================================================
--- src/java/org/apache/lucene/util/ConsumesRAM.java	(revision 0)
+++ src/java/org/apache/lucene/util/ConsumesRAM.java	(revision 0)
@@ -0,0 +1,22 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface ConsumesRAM {
+  public long ramBytesUsed();
+}

Property changes on: src/java/org/apache/lucene/util/ConsumesRAM.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/java/org/apache/lucene/util/packed/Packed64.java
===================================================================
--- src/java/org/apache/lucene/util/packed/Packed64.java	(revision 959077)
+++ src/java/org/apache/lucene/util/packed/Packed64.java	(working copy)
@@ -182,7 +182,7 @@
     final int bitPos =     (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
 
     final int base = bitPos * FAC_BITPOS;
-
+    assert elementPos < blocks.length : "elementPos: " + elementPos + "; blocks.len: " + blocks.length;
     return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) |
             ((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]);
   }
Index: src/test/org/apache/lucene/index/TestByteSlices.java
===================================================================
--- src/test/org/apache/lucene/index/TestByteSlices.java	(revision 959077)
+++ src/test/org/apache/lucene/index/TestByteSlices.java	(working copy)
@@ -27,7 +27,7 @@
     
     /* Allocate another byte[] from the shared pool */
     @Override
-    synchronized byte[] getByteBlock() {
+    public synchronized byte[] getByteBlock() {
       final int size = freeByteBlocks.size();
       final byte[] b;
       if (0 == size)
@@ -39,13 +39,13 @@
 
     /* Return a byte[] to the pool */
     @Override
-    synchronized void recycleByteBlocks(byte[][] blocks, int start, int end) {
+    public synchronized void recycleByteBlocks(byte[][] blocks, int start, int end) {
       for(int i=start;i<end;i++)
         freeByteBlocks.add(blocks[i]);
     }
 
     @Override
-    synchronized void recycleByteBlocks(List<byte[]> blocks) {
+    public synchronized void recycleByteBlocks(List<byte[]> blocks) {
       final int size = blocks.size();
       for(int i=0;i<size;i++)
         freeByteBlocks.add(blocks.get(i));
Index: src/test/org/apache/lucene/index/values/TestIndexValues.java
===================================================================
--- src/test/org/apache/lucene/index/values/TestIndexValues.java	(revision 0)
+++ src/test/org/apache/lucene/index/values/TestIndexValues.java	(revision 0)
@@ -0,0 +1,343 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.lucene.util.*;
+import org.apache.lucene.store.*;
+import org.apache.lucene.index.*;
+import org.apache.lucene.index.values.Ints.Reader;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.*;
+
+public class TestIndexValues extends LuceneTestCase {
+
+  public void testBytesStraight() throws IOException {
+    final Random rand = newRandom();
+    runTestBytes(BytesBaseImpl.Mode.STRAIGHT, true, rand);
+    runTestBytes(BytesBaseImpl.Mode.STRAIGHT, false, rand);
+
+  }
+
+  public void testBytesDeref() throws IOException {
+    final Random rand = newRandom();
+    runTestBytes(BytesBaseImpl.Mode.DEREF, true, rand);
+    runTestBytes(BytesBaseImpl.Mode.DEREF, false, rand);
+  }
+
+  public void testBytesSorted() throws IOException {
+    final Random rand = newRandom();
+    runTestBytes(BytesBaseImpl.Mode.SORTED, true, rand);
+    runTestBytes(BytesBaseImpl.Mode.SORTED, false, rand);
+  }
+
+  // nocommit -- for sorted test, do our own Sort of the
+  // values and verify it's identical
+  public void runTestBytes(final BytesBaseImpl.Mode mode,
+      final boolean fixedSize, Random rand) throws IOException {
+
+    final BytesRef bytesRef = new BytesRef();
+
+    final Comparator<BytesRef> comp = mode == BytesBaseImpl.Mode.SORTED ? BytesRef
+        .getByteOrderComparator()
+        : null;
+
+    Directory dir = new MockRAMDirectory();
+    Bytes.Writer w = BytesBaseImpl
+        .getWriter(dir, "test", mode, comp, fixedSize);
+
+    final String[] values = new String[220];
+    final int lenMin, lenMax;
+    if (fixedSize) {
+      lenMin = lenMax = 6;
+    } else {
+      lenMin = 1;
+      lenMax = 20;
+    }
+    for (int i = 0; i < 100; i++) {
+      final String s;
+      if (i > 0 && rand.nextInt(5) <= 2) {
+        // use prior value
+        s = values[2 * rand.nextInt(i)];
+      } else {
+        s = new String(_TestUtil.getRandomText(rand, lenMin, lenMax, true, true));
+      }
+      values[2 * i] = s;
+
+      UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytesRef);
+
+      w.add(2 * i, bytesRef);
+    }
+    w.finish(220);
+
+    Bytes.Reader r = BytesBaseImpl.getReader(dir, "test", mode, fixedSize, 220);
+
+    // Verify we can load source twice:
+    for (int iter = 0; iter < 2; iter++) {
+      Bytes.Source s;
+      Bytes.SortedSource ss;
+      if (mode == BytesBaseImpl.Mode.SORTED) {
+        s = ss = r.loadSorted(comp);
+      } else {
+        s = r.load();
+        ss = null;
+      }
+
+      for (int i = 0; i < 100; i++) {
+        final int idx = 2 * i;
+        assertNotNull("doc " + idx + "; value=" + values[idx], s.get(idx));
+        assertEquals("doc " + idx, values[idx], s.get(idx).utf8ToString());
+        if (ss != null) {
+          assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx))
+              .utf8ToString());
+          Bytes.SortedSource.LookupResult result = ss.getByValue(new BytesRef(
+              values[idx]));
+          assertTrue(result.found);
+          assertEquals(ss.ord(idx), result.ord);
+        }
+      }
+
+      // Lookup random strings:
+      if (mode == BytesBaseImpl.Mode.SORTED) {
+        BytesRef b2 = ss.getByOrd(1);
+        final String first = new String(b2.bytes, b2.offset, b2.length, "UTF-8");
+        final int numValues = ss.getValueCount();
+        b2 = ss.getByOrd(numValues);
+        final String last = new String(b2.bytes, b2.offset, b2.length, "UTF-8");
+        for (int i = 0; i < 100; i++) {
+          final String s2 = new String(_TestUtil.getRandomText(rand, lenMin,
+              lenMax, true, true));
+          BytesRef b = new BytesRef(s2);
+          Bytes.SortedSource.LookupResult result = ss.getByValue(b);
+          if (result.found) {
+            assert result.ord > 0;
+            assertTrue(b.bytesEquals(ss.getByOrd(result.ord)));
+            int count = 0;
+            for (int k = 0; k < 100; k++) {
+              if (s2.equals(values[2 * k])) {
+                assertEquals(ss.ord(2 * k), result.ord);
+                count++;
+              }
+            }
+            assertTrue(count > 0);
+          } else {
+            assert result.ord >= 0;
+            if (result.ord == 0) {
+              // random string was before our first
+              assertTrue(first.compareTo(s2) > 0);
+            } else if (result.ord == numValues) {
+              // random string was after our last
+              assertTrue(last.compareTo(s2) < 0);
+            } else {
+              // random string fell between two of our
+              // values
+              b2 = ss.getByOrd(result.ord);
+              final String s3 = new String(b2.bytes, b2.offset, b2.length,
+                  "UTF-8");
+              b2 = ss.getByOrd(result.ord + 1);
+              final String s4 = new String(b2.bytes, b2.offset, b2.length,
+                  "UTF-8");
+              assertTrue(s3.compareTo(s2) < 0);
+              assertTrue(s2.compareTo(s4) < 0);
+            }
+          }
+        }
+      }
+    }
+
+    r.close();
+    dir.close();
+  }
+
+  public void testInts() throws IOException {
+    long maxV = 1;
+    final Random rand = newRandom();
+    final int NUM_VALUES = 1000;
+    final long[] values = new long[NUM_VALUES];
+    for (int rx = 1; rx < 63; rx++) {
+      for (int b = 0; b < 2; b++) {
+        // System.out.println("TEST: maxV=" + maxV + " b=" + b + " r=" +
+        // rx);
+        Directory dir = new MockRAMDirectory();
+        boolean useFixedArrays = b == 0;
+        Ints.Writer w = PackedIntsImpl.getWriter(dir, "test", useFixedArrays);
+        for (int i = 0; i < NUM_VALUES; i++) {
+          final long v = rand.nextLong() % (1 + maxV);
+          values[i] = v;
+          w.add(i, v);
+          // System.out.println("  write i=" + i + " v=" + v);
+        }
+        w.finish(NUM_VALUES + 10);
+
+        Ints.Reader r = PackedIntsImpl.getReader(dir, "test");
+        for (int iter = 0; iter < 2; iter++) {
+          Ints.Source s = r.load();
+          for (int i = 0; i < NUM_VALUES; i++) {
+            final long v = s.get(i);
+            // System.out.println("  read i=" + i + " v=" + v);
+            assertEquals(values[i], v);
+          }
+        }
+        r.close();
+        dir.close();
+      }
+
+      maxV *= 2;
+    }
+  }
+
+  public void testFloats4() throws IOException {
+    Directory dir = new MockRAMDirectory();
+    Floats.Writer w = SimpleFloatsImpl.getWriter(dir, "test", 4);
+    final int NUM_VALUES = 1000;
+    final float[] values = new float[NUM_VALUES];
+    final Random rand = newRandom();
+    for (int i = 0; i < NUM_VALUES; i++) {
+      final float v = rand.nextFloat();
+      values[i] = v;
+      w.add(i, v);
+    }
+    w.finish(NUM_VALUES + 10);
+
+    Floats.Reader r = SimpleFloatsImpl.getReader(dir, "test", NUM_VALUES + 10);
+    for (int iter = 0; iter < 2; iter++) {
+      Floats.Source s = r.load();
+      for (int i = 0; i < NUM_VALUES; i++) {
+        assertEquals(values[i], s.get(i), 0.00001);
+      }
+    }
+
+    r.close();
+    dir.close();
+  }
+  
+  public void testFloats8() throws IOException {
+    Directory dir = new MockRAMDirectory();
+    Floats.Writer w = SimpleFloatsImpl.getWriter(dir, "test", 8);
+    final int NUM_VALUES = 1000;
+    final double[] values = new double[NUM_VALUES];
+    final Random rand = newRandom();
+    for (int i = 0; i < NUM_VALUES; i++) {
+      final double v = rand.nextDouble();
+      values[i] = v;
+      w.add(i, v);
+    }
+    w.finish(NUM_VALUES + 10);
+
+    Floats.Reader r = SimpleFloatsImpl.getReader(dir, "test", NUM_VALUES + 10);
+    for (int iter = 0; iter < 2; iter++) {
+      Floats.Source s = r.load();
+      for (int i = 0; i < NUM_VALUES; i++) {
+        assertEquals(values[i], s.get(i));
+      }
+    }
+
+    r.close();
+    dir.close();
+  }
+
+  public void testCFSIndex() throws IOException {
+    IndexWriterConfig cfg = new IndexWriterConfig(TEST_VERSION_CURRENT,
+        new MockAnalyzer());
+    LogMergePolicy policy = new LogDocMergePolicy();
+    policy.setUseCompoundDocStore(false);
+    policy.setUseCompoundFile(true);
+    cfg.setMergePolicy(policy);
+    runTestIndex(cfg);
+  }
+
+  public void testIndex() throws IOException {
+    IndexWriterConfig cfg = new IndexWriterConfig(TEST_VERSION_CURRENT,
+        new MockAnalyzer());
+    LogMergePolicy policy = new LogDocMergePolicy();
+    policy.setUseCompoundDocStore(false);
+    policy.setUseCompoundFile(false);
+    cfg.setMergePolicy(policy);
+    runTestIndex(cfg);
+  }
+
+  public void runTestIndex(IndexWriterConfig cfg) throws IOException {
+
+    Directory d = new MockRAMDirectory();
+    IndexWriter w = new IndexWriter(d, cfg);
+    Document doc = new Document();
+    NumericField intField = new NumericField("int");
+    doc.add(intField);
+    intField.setIndexValues(Field.Values.PACKED_INTS);
+
+    NumericField floatField = new NumericField("float");
+    doc.add(floatField);
+    floatField.setIndexValues(Field.Values.SIMPLE_FLOAT_4BYTE);
+
+    byte[] b = new byte[7];
+
+    // nocommit - test other options for bytes too:
+    Field bytesField = new Field("bytes", b, 0, b.length);
+    doc.add(bytesField);
+    bytesField.setIndexValues(Field.Values.BYTES_FIXED_SORTED);
+    final int num = 777;
+    byte upto = 0;
+    for (int i = 0; i < num; i++) {
+      intField.setLongValue(i);
+      floatField.setFloatValue((float) (2.0 * i));
+      for (int j = 0; j < b.length; j++) {
+        b[j] = upto++;
+      }
+      w.addDocument(doc);
+
+      if (i % 7 == 0) {
+        w.commit();
+      }
+    }
+    w.optimize();
+    List<Closeable> closeables = new ArrayList<Closeable>();
+    IndexReader r = w.getReader();
+    assertEquals(num, r.numDocs());
+    Reader intsReader = r.getIndexValuesInts("int");
+    Ints.Source ints = intsReader.load();
+    closeables.add(intsReader);
+    Floats.Reader floatReader = r.getIndexValuesFloats("float");
+    Floats.Source floats = floatReader.load();
+    closeables.add(floatReader);
+    Bytes.Reader bytesReader = r.getIndexValuesBytes("bytes");
+    Bytes.Source bytes = bytesReader.load();
+    closeables.add(bytesReader);
+    upto = 0;
+    for (int i = 0; i < num; i++) {
+      assertEquals(i, ints.get(i));
+      assertEquals(2.0 * i, floats.get(i), 0.00001);
+      BytesRef br = bytes.get(i);
+      for (int j = 0; j < 7; j++) {
+        assertEquals(upto++, br.bytes[br.offset + j]);
+      }
+    }
+    closeables.add(r);
+    closeables.add(w);
+    closeables.add(d);
+    for (Closeable toClose : closeables) {
+      toClose.close();
+    }
+    // nocommit -- test with deletions
+  }
+}

Property changes on: src/test/org/apache/lucene/index/values/TestIndexValues.java
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Date Author Id Revision HeadURL

Index: src/test/org/apache/lucene/store/MockRAMDirectory.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMDirectory.java	(revision 959077)
+++ src/test/org/apache/lucene/store/MockRAMDirectory.java	(working copy)
@@ -19,9 +19,13 @@
 
 import java.io.IOException;
 import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Random;
 import java.util.Map;
 import java.util.HashMap;
@@ -53,6 +57,8 @@
   // member initialization vs when it calls super.  It seems
   // like super is called, then our members are initialized:
   Map<String,Integer> openFiles;
+  Set<MockRAMInputStream> openInputStreams = new HashSet<MockRAMInputStream>();
+  
 
   private synchronized void init() {
     if (openFiles == null)
@@ -229,7 +235,8 @@
       fileMap.put(name, file);
     }
 
-    return new MockRAMOutputStream(this, file, name);
+    MockRAMOutputStream stream = new MockRAMOutputStream(this, file, name);
+    return stream;
   }
 
   @Override
@@ -246,7 +253,9 @@
          openFiles.put(name, Integer.valueOf(1));
       }
     }
-    return new MockRAMInputStream(this, name, file);
+    MockRAMInputStream stream = new MockRAMInputStream(this, name, file);
+    openInputStreams.add(stream);
+    return stream;
   }
 
   /** Provided for testing purposes.  Use sizeInBytes() instead. */
@@ -279,7 +288,14 @@
     if (noDeleteOpenFile && openFiles.size() > 0) {
       // RuntimeException instead of IOException because
       // super() does not throw IOException currently:
-      throw new RuntimeException("MockRAMDirectory: cannot close: there are still open files: " + openFiles);
+	Set<MockRAMInputStream> streams = this.openInputStreams;
+	StringWriter sw = new StringWriter();
+	PrintWriter pw = new PrintWriter(sw);
+	for (MockRAMInputStream stream : streams) {
+	    stream.ex.printStackTrace(pw);
+	    pw.println();
+	}
+      throw new RuntimeException("MockRAMDirectory: cannot close: there are still open files: " + openFiles +" opened at: " + sw.toString() );
     }
   }
 
Index: src/test/org/apache/lucene/store/MockRAMInputStream.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMInputStream.java	(revision 959077)
+++ src/test/org/apache/lucene/store/MockRAMInputStream.java	(working copy)
@@ -28,6 +28,7 @@
   private MockRAMDirectory dir;
   private String name;
   private boolean isClone;
+  Exception ex;
 
   /** Construct an empty output buffer. 
    * @throws IOException */
@@ -35,6 +36,8 @@
     super(f);
     this.name = name;
     this.dir = dir;
+    // store the stacktrace
+    ex = new Exception();
   }
 
   @Override
@@ -45,6 +48,7 @@
     // all clones get closed:
     if (!isClone) {
       synchronized(dir) {
+	dir.openInputStreams.remove(this);
         Integer v = dir.openFiles.get(name);
         // Could be null when MockRAMDirectory.crash() was called
         if (v != null) {
Index: src/test/org/apache/lucene/store/MockRAMOutputStream.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMOutputStream.java	(revision 959077)
+++ src/test/org/apache/lucene/store/MockRAMOutputStream.java	(working copy)
@@ -30,6 +30,8 @@
   private MockRAMDirectory dir;
   private boolean first=true;
   private final String name;
+  //TODO(simonw): finish this
+  final Exception ex;
   
   byte[] singleByte = new byte[1];
 
@@ -38,6 +40,8 @@
     super(f);
     this.dir = dir;
     this.name = name;
+    // safe the stacktrace
+    ex = new Exception();
   }
 
   @Override
Index: src/test/org/apache/lucene/util/_TestUtil.java
===================================================================
--- src/test/org/apache/lucene/util/_TestUtil.java	(revision 959077)
+++ src/test/org/apache/lucene/util/_TestUtil.java	(working copy)
@@ -151,4 +151,39 @@
   public static int getRandomMultiplier() {
     return Integer.parseInt(System.getProperty("random.multiplier", "1"));
   }
+  
+  
+  public static char[] getRandomText(Random rand, int lenMin, int lenMax, boolean doSurrogates, boolean simpleASCII) {
+    final int len;
+    if (lenMin == lenMax) {
+      len = lenMin;
+    } else {
+      len = lenMin+rand.nextInt(lenMax-lenMin);
+    }
+    char[] buffer = new char[len];
+    for(int i=0;i<len;i++) {
+      if (simpleASCII) {
+        buffer[i] = (char) nextInt(rand, 97, 122);
+      } else {
+        final int t = rand.nextInt(5);
+        if (0 == t && i < len-1 && doSurrogates) {
+          // Make a surrogate pair
+          // High surrogate
+          buffer[i++] = (char) nextInt(rand, 0xd800, 0xdc00);
+          // Low surrogate
+          buffer[i] = (char) nextInt(rand, 0xdc00, 0xe000);
+        } else if (t <= 1) {
+          buffer[i] = (char) rand.nextInt(0x80);
+        } else if (2 == t) {
+          buffer[i] = (char) nextInt(rand, 0x80, 0x800);
+        } else if (3 == t) {
+          buffer[i] = (char) nextInt(rand, 0x800, 0xd800);
+        } else {
+          buffer[i] = (char) nextInt(rand, 0xe000, 0xffff);
+        }
+      }
+    }
+    return buffer;
+  }
+
 }
