Index: lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java	(révision 1418336)
+++ lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java	(copie de travail)
@@ -21,6 +21,7 @@
 
 import org.apache.lucene.codecs.FilterCodec;
 import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.lucene41.Lucene41Codec;
 
 import com.carrotsearch.randomizedtesting.generators.RandomInts;
@@ -54,10 +55,12 @@
   }
 
   private final CompressingStoredFieldsFormat storedFieldsFormat;
+  private final CompressingTermVectorsFormat termVectorsFormat;
 
   public CompressingCodec(String name, CompressionMode compressionMode, int chunkSize) {
     super(name, new Lucene41Codec());
     this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, compressionMode, chunkSize);
+    this.termVectorsFormat = new CompressingTermVectorsFormat(name, compressionMode, chunkSize);
   }
 
   @Override
@@ -66,7 +69,12 @@
   }
 
   @Override
+  public TermVectorsFormat termVectorsFormat() {
+    return termVectorsFormat;
+  }
+
+  @Override
   public String toString() {
-    return getName() + "(storedFieldsFormat=" + storedFieldsFormat + ")";
+    return getName() + "(storedFieldsFormat=" + storedFieldsFormat + ", termVectorsFormat=" + termVectorsFormat + ")";
   }
 }
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java	(révision 0)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java	(révision 0)
@@ -0,0 +1,563 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.TermVectorsWriter;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.PackedInts;
+
+final class CompressingTermVectorsWriter extends TermVectorsWriter {
+
+  static final String VECTORS_EXTENSION = "tvd";
+  static final String VECTORS_INDEX_EXTENSION = "tvx";
+
+  static final String CODEC_SFX_IDX = "Index";
+  static final String CODEC_SFX_DAT = "Data";
+
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static final int POSITIONS = 0x01;
+  static final int   OFFSETS = 0x02;
+  static final int  PAYLOADS = 0x04;
+  static final int FLAGS_MASK = POSITIONS | OFFSETS | PAYLOADS;
+  static final int FLAGS_BITS = PackedInts.bitsRequired(FLAGS_MASK);
+
+  private final Directory directory;
+  private final String segment;
+  private CompressingStoredFieldsIndexWriter indexWriter;
+  private IndexOutput vectorsStream;
+
+  private final CompressionMode compressionMode;
+  private final Compressor compressor;
+  private final int chunkSize;
+
+  private static class DocData {
+    final int numFields;
+    final List<FieldData> fields;
+    DocData(int numFields) {
+      this.numFields = numFields;
+      this.fields = new ArrayList<FieldData>(numFields);
+    }
+  }
+
+  private static class FieldData {
+    int fieldNum, flags, numTerms, endOffset;
+  }
+
+  private static class PositionData {
+    int position, startOffset, endOffset;
+    BytesRef payload;
+  }
+
+  private int numDocs; // total number of docs seen
+  private final List<DocData> pendingDocs; // pending docs
+  private DocData curDoc; // current document
+  private FieldData curField; // current field
+  private int curFreq; // frequency of the current term
+  private final List<PositionData> pendingPositions; // pending positions
+  private final GrowableByteArrayDataOutput bufferedTerms; // buffered terms and positions to compress
+
+  CompressingTermVectorsWriter(Directory directory, SegmentInfo si, IOContext context,
+      String formatName, CompressionMode compressionMode, int chunkSize) throws IOException {
+    assert directory != null;
+    this.directory = directory;
+    this.segment = si.name;
+    this.compressionMode = compressionMode;
+    this.compressor = compressionMode.newCompressor();
+    this.chunkSize = chunkSize;
+
+    numDocs = 0;
+    pendingDocs = new ArrayList<DocData>();
+    pendingPositions = new ArrayList<PositionData>();
+    bufferedTerms = new GrowableByteArrayDataOutput(ArrayUtil.oversize(chunkSize, 1));
+
+    boolean success = false;
+    IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION), context);
+    try {
+      vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", VECTORS_EXTENSION), context);
+
+      final String codecNameIdx = formatName + CODEC_SFX_IDX;
+      final String codecNameDat = formatName + CODEC_SFX_DAT;
+      CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT);
+      CodecUtil.writeHeader(vectorsStream, codecNameDat, VERSION_CURRENT);
+      assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
+      assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
+
+      indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
+      indexStream = null;
+
+      vectorsStream.writeVInt(PackedInts.VERSION_CURRENT);
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(indexStream);
+        abort();
+      }
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      IOUtils.close(vectorsStream, indexWriter);
+    } finally {
+      vectorsStream = null;
+      indexWriter = null;
+    }
+  }
+
+  @Override
+  public void abort() {
+    IOUtils.closeWhileHandlingException(this);
+    IOUtils.deleteFilesIgnoringExceptions(directory,
+        IndexFileNames.segmentFileName(segment, "", VECTORS_EXTENSION),
+        IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION));
+  }
+
+  @Override
+  public void startDocument(int numVectorFields) throws IOException {
+    if (curField != null) {
+      assert curDoc.numFields == curDoc.fields.size();
+      curField.endOffset = bufferedTerms.length;
+    }
+
+    if (shouldFlush()) {
+      flush();
+    }
+    ++numDocs;
+    curDoc = new DocData(numVectorFields);
+    pendingDocs.add(curDoc);
+    curField = null;
+  }
+
+  @Override
+  public void startField(FieldInfo info, int numTerms, boolean positions,
+      boolean offsets, boolean payloads) throws IOException {
+    if (curField != null) {
+      curField.endOffset = bufferedTerms.length;
+    }
+    curField = new FieldData();
+    curField.fieldNum = info.number;
+    curField.numTerms = numTerms;
+    curField.flags = (positions ? POSITIONS : 0) | (offsets ? OFFSETS : 0) | (payloads ? PAYLOADS : 0);
+    curDoc.fields.add(curField);
+  }
+
+  @Override
+  public void startTerm(BytesRef term, int freq) throws IOException {
+    assert pendingPositions.isEmpty();
+    curFreq = freq;
+    // try storing freq + term on a single byte
+    // works if term length <= 30 and freq <= 6
+    final int token = (Math.min(term.length, 0x1F) << 3) | Math.min(freq, 0x07);
+    bufferedTerms.writeByte((byte) token);
+    if (term.length >= 0x1F) {
+      bufferedTerms.writeVInt(term.length - 0x1F);
+    }
+    if (freq >= 0x07) {
+      bufferedTerms.writeVInt(freq - 0x07);
+    }
+    bufferedTerms.writeBytes(term.bytes, term.offset, term.length);
+  }
+
+  private static boolean equals(BytesRef a, BytesRef b) {
+    if (a == null) {
+      return b == null;
+    } else {
+      return a.equals(b);
+    }
+  }
+
+  private void flushTermPositions(List<PositionData> termPositions) throws IOException {
+    assert curField.flags != 0;
+    final int termFreq = termPositions.size();
+
+    if ((curField.flags & POSITIONS) != 0) {
+      if (termFreq == 1) {
+        bufferedTerms.writeVInt(termPositions.get(0).position);
+      } else {
+        int maxPositionDelta = termPositions.get(0).position;
+        for (int i = 1; i < termFreq; ++i) {
+          maxPositionDelta |= termPositions.get(i).position - termPositions.get(i - 1).position;
+        }
+        if (maxPositionDelta < 0) {
+          assert false;
+          // can this really happen?
+          bufferedTerms.writeVInt(0);
+          for (int i = 0; i < termFreq; ++i) {
+            bufferedTerms.writeVInt(termPositions.get(i).position);
+          }
+        } else {
+          final int bitsRequired = PackedInts.bitsRequired(maxPositionDelta);
+          assert bitsRequired > 0 && bitsRequired <= 31;
+          bufferedTerms.writeVInt(bitsRequired);
+          final PackedInts.Writer writer = PackedInts.getWriterNoHeader(bufferedTerms, PackedInts.Format.PACKED, termFreq, bitsRequired, 1);
+          writer.add(termPositions.get(0).position);
+          for (int i = 1; i < termFreq; ++i) {
+            writer.add(termPositions.get(i).position - termPositions.get(i - 1).position);
+          }
+          writer.finish();
+        }
+      }
+    }
+
+    if ((curField.flags & OFFSETS) != 0) {
+      if (termFreq == 1) {
+        final PositionData tp = termPositions.get(0);
+        bufferedTerms.writeVInt(tp.startOffset);
+        bufferedTerms.writeVInt(tp.endOffset - tp.startOffset);
+      } else {
+        boolean allLengthEqual = true;
+        int maxStartOffsetDelta = termPositions.get(0).startOffset;
+        int maxLength = termPositions.get(0).endOffset - termPositions.get(0).startOffset;
+        for (int i = 1; i < termFreq; ++i) {
+          maxStartOffsetDelta |= termPositions.get(i).startOffset - termPositions.get(i - 1).startOffset;
+          final int length = termPositions.get(i).endOffset - termPositions.get(i).startOffset;
+          maxLength |= length;
+          if (length != termPositions.get(0).endOffset - termPositions.get(0).startOffset) {
+            allLengthEqual = false;
+          }
+        }
+
+        if (maxStartOffsetDelta < 0 || maxLength < 0) {
+          bufferedTerms.writeVInt(0);
+          for (PositionData pd : termPositions) {
+            bufferedTerms.writeVInt(pd.startOffset);
+            bufferedTerms.writeVInt(pd.endOffset - pd.startOffset);
+          }
+        } else {
+          final int offBitsRequired = PackedInts.bitsRequired(maxStartOffsetDelta);
+          assert offBitsRequired > 0 && offBitsRequired <= 31;
+          final int lenBitsRequired = allLengthEqual ? 0 : PackedInts.bitsRequired(maxLength);
+          // try to pack both values on a single byte
+          // works if lenBitsRequired <= 6 (very likely)
+          final int token = (offBitsRequired << 3) | Math.min(lenBitsRequired, 0x07);
+          bufferedTerms.writeByte((byte) token);
+          if (lenBitsRequired >= 0x07) {
+            bufferedTerms.writeVInt(lenBitsRequired - 0x07);
+          }
+
+          PackedInts.Writer writer = PackedInts.getWriterNoHeader(bufferedTerms, PackedInts.Format.PACKED, termFreq, offBitsRequired, 1);
+          writer.add(termPositions.get(0).startOffset);
+          for (int i = 1; i < termFreq; ++i) {
+            writer.add(termPositions.get(i).startOffset - termPositions.get(i - 1).startOffset);
+          }
+          writer.finish();
+
+          if (lenBitsRequired == 0) {
+            bufferedTerms.writeVInt(maxLength);
+          } else {
+            writer = PackedInts.getWriterNoHeader(bufferedTerms, PackedInts.Format.PACKED, termFreq, lenBitsRequired, 1);
+            for (PositionData td : termPositions) {
+              writer.add(td.endOffset - td.startOffset);
+            }
+            writer.finish();
+          }
+        }
+      }
+    }
+
+    if ((curField.flags & PAYLOADS) != 0) {
+      if (termFreq == 1) {
+        final PositionData pd = termPositions.get(0);
+        if (pd.payload == null) {
+          bufferedTerms.writeVInt(0);
+        } else {
+          bufferedTerms.writeVInt(pd.payload.length);
+          bufferedTerms.writeBytes(pd.payload.bytes, pd.payload.offset, pd.payload.length);
+        }
+      } else {
+        int maxLength = 0;
+        boolean allEqual = true;
+        for (PositionData td : termPositions) {
+          if (td.payload != null) {
+            maxLength |= td.payload.length;
+          }
+          if (!equals(td.payload, termPositions.get(0).payload)) {
+            allEqual = false;
+          }
+        }
+
+        if (allEqual) {
+          bufferedTerms.writeVInt(0);
+          final BytesRef payload = termPositions.get(0).payload;
+          if (payload == null) {
+            bufferedTerms.writeVInt(0);
+          } else {
+            bufferedTerms.writeVInt(payload.length);
+            bufferedTerms.writeBytes(payload.bytes, payload.offset, payload.length);
+          }
+        } else {
+          final int bitsRequired = PackedInts.bitsRequired(maxLength);
+          bufferedTerms.writeVInt(bitsRequired);
+          final PackedInts.Writer writer = PackedInts.getWriterNoHeader(bufferedTerms, PackedInts.Format.PACKED, termFreq, bitsRequired, 1);
+          for (PositionData td : termPositions) {
+            writer.add(td.payload == null ? 0 : td.payload.length);
+          }
+          writer.finish();
+          for (PositionData td : termPositions) {
+            if (td.payload != null) {
+              bufferedTerms.writeBytes(td.payload.bytes, td.payload.offset, td.payload.length);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  @Override
+  public void addPosition(int position, int startOffset, int endOffset,
+      BytesRef payload) throws IOException {
+    final PositionData pd = new PositionData();
+    pd.position = position;
+    pd.startOffset = startOffset;
+    pd.endOffset = endOffset;
+    if ((curField.flags & PAYLOADS) != 0 && payload != null && payload.length > 0) {
+      pd.payload = new BytesRef(Arrays.copyOfRange(payload.bytes, payload.offset, payload.offset + payload.length));
+    }
+    pendingPositions.add(pd);
+
+    assert pendingPositions.size() <= curFreq;
+    if (pendingPositions.size() == curFreq) {
+      flushTermPositions(pendingPositions);
+      pendingPositions.clear();
+    }
+  }
+
+  private boolean shouldFlush() {
+    return bufferedTerms.length >= chunkSize || pendingDocs.size() >= chunkSize;
+  }
+
+  private void flush() throws IOException {
+    final int chunkDocs = pendingDocs.size();
+    assert chunkDocs > 0 : chunkDocs;
+
+    // write the index file
+    indexWriter.writeIndex(chunkDocs, vectorsStream.getFilePointer());
+
+    // flush metadata
+    final int docBase = numDocs - chunkDocs;
+    vectorsStream.writeVInt(docBase);
+    vectorsStream.writeVInt(chunkDocs);
+
+    int totalFields = flushNumFields(chunkDocs);
+
+    if (totalFields > 0) {
+      final int[] fieldNums = flushFieldNums();
+      flushFields(totalFields, fieldNums);
+      flushFlags(totalFields);
+      flushNumTerms(totalFields);
+      flushLengths(totalFields);
+
+      // compress data and write it to the output
+      compressor.compress(bufferedTerms.bytes, 0, bufferedTerms.length, vectorsStream);
+    }
+
+    // reset
+    pendingDocs.clear();
+    curDoc = null;
+    curField = null;
+    bufferedTerms.length = 0;
+  }
+
+  private int flushNumFields(int chunkDocs) throws IOException {
+    if (chunkDocs == 1) {
+      final int numFields = pendingDocs.get(0).numFields;
+      vectorsStream.writeVInt(numFields);
+      return numFields;
+    } else {
+      boolean allEqual = true;
+      int maxNumField = 0;
+      int totalFields = 0;
+      for (DocData dd : pendingDocs) {
+        if (dd.numFields != pendingDocs.get(0).numFields) {
+          allEqual = false;
+        }
+        maxNumField |= dd.numFields;
+        totalFields += dd.numFields;
+      }
+      if (allEqual) {
+        vectorsStream.writeVInt(0);
+        vectorsStream.writeVInt(maxNumField);
+      } else {
+        final int bitsRequired = PackedInts.bitsRequired(maxNumField);
+        vectorsStream.writeVInt(bitsRequired);
+        final PackedInts.Writer writer = PackedInts.getWriterNoHeader(
+            vectorsStream, PackedInts.Format.PACKED, chunkDocs, bitsRequired, 1);
+        for (DocData dd : pendingDocs) {
+          writer.add(dd.numFields);
+        }
+        assert writer.ord() == chunkDocs - 1;
+        writer.finish();
+      }
+      return totalFields;
+    }
+  }
+
+  private int[] flushFieldNums() throws IOException {
+    SortedSet<Integer> fieldNums = new TreeSet<Integer>();
+    for (DocData dd : pendingDocs) {
+      for (FieldData fd : dd.fields) {
+        fieldNums.add(fd.fieldNum);
+      }
+    }
+
+    final int numDistinctFields = fieldNums.size();
+    if (numDistinctFields == 0) {
+      vectorsStream.writeVInt(0);
+    } else {
+      final int bitsRequired = PackedInts.bitsRequired(fieldNums.last());
+      final int token = (Math.min(numDistinctFields, 0x07) << 5) | bitsRequired;
+      vectorsStream.writeByte((byte) token);
+      if (numDistinctFields >= 0x07) {
+        vectorsStream.writeVInt(numDistinctFields - 0x07);
+      }
+      final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, fieldNums.size(), bitsRequired, 1);
+      for (Integer fieldNum : fieldNums) {
+        writer.add(fieldNum);
+      }
+      writer.finish();
+    }
+
+    int[] fns = new int[fieldNums.size()];
+    int i = 0;
+    for (Integer key : fieldNums) {
+      fns[i++] = key;
+    }
+    return fns;
+  }
+
+  private void flushFields(int totalFields, int[] fieldNums) throws IOException {
+    final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, totalFields, PackedInts.bitsRequired(fieldNums.length - 1), 1);
+    for (DocData dd : pendingDocs) {
+      for (FieldData fd : dd.fields) {
+        final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum);
+        assert fieldNumIndex >= 0;
+        writer.add(fieldNumIndex);
+      }
+    }
+    writer.finish();
+  }
+
+  private void flushFlags(int totalFields) throws IOException {
+    final PackedInts.Writer writer = PackedInts.getWriterNoHeader(vectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1);
+    for (DocData dd : pendingDocs) {
+      for (FieldData fd : dd.fields) {
+        writer.add(fd.flags);
+      }
+    }
+    writer.finish();
+  }
+
+  private void flushNumTerms(int totalFields) throws IOException {
+    int maxNumTerms = 0;
+    for (DocData dd : pendingDocs) {
+      for (FieldData fd : dd.fields) {
+        maxNumTerms |= fd.numTerms;
+      }
+    }
+    final int bitsRequired = PackedInts.bitsRequired(maxNumTerms);
+    vectorsStream.writeVInt(bitsRequired);
+    final PackedInts.Writer writer = PackedInts.getWriterNoHeader(
+        vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
+    for (DocData dd : pendingDocs) {
+      for (FieldData fd : dd.fields) {
+        writer.add(fd.numTerms);
+      }
+    }
+    assert writer.ord() == totalFields - 1;
+    writer.finish();
+  }
+
+  private void flushLengths(int totalFields) throws IOException {
+    int[] lengths = new int[totalFields];
+    int i = 0;
+    for (DocData dd : pendingDocs) {
+      for (FieldData fd : dd.fields) {
+        lengths[i++] = fd.endOffset;
+      }
+    }
+    for (i = totalFields - 1; i >= 1; --i) {
+      lengths[i] -= lengths[i-1];
+      assert lengths[i] >= 0 : Arrays.toString(lengths);
+    }
+    assert bufferedTerms.length == sum(lengths);
+
+    int maxLength = 0;
+    for (int length : lengths) {
+      maxLength |= length;
+    }
+    final int bitsRequired = PackedInts.bitsRequired(maxLength);
+    vectorsStream.writeVInt(bitsRequired);
+    final PackedInts.Writer writer = PackedInts.getWriterNoHeader(
+        vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
+    for (int length : lengths) {
+      writer.add(length);
+    }
+    writer.finish();
+  }
+
+  private static int sum(int[] arr) {
+    int sum = 0;
+    for (int el : arr) {
+      sum += el;
+    }
+    return sum;
+  }
+
+  @Override
+  public void finish(FieldInfos fis, int numDocs) throws IOException {
+    if (pendingDocs.size() > 0) {
+      if (curField != null) {
+        assert curDoc.numFields == curDoc.fields.size();
+        curField.endOffset = bufferedTerms.length;
+      }
+      flush();
+    }
+    if (numDocs != this.numDocs) {
+      throw new RuntimeException("Wrote " + this.numDocs + " docs, finish called with numDocs=" + numDocs);
+    }
+    indexWriter.finish(numDocs);
+  }
+
+  @Override
+  public Comparator<BytesRef> getComparator() throws IOException {
+    return BytesRef.getUTF8SortedAsUnicodeComparator();
+  }
+
+}

Modification de propriétés sur lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
___________________________________________________________________
Ajouté : svn:eol-style
   + native

Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java	(révision 0)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java	(révision 0)
@@ -0,0 +1,844 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_DAT;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_IDX;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.FLAGS_BITS;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.OFFSETS;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.PAYLOADS;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.POSITIONS;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_EXTENSION;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT;
+import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.PackedInts;
+
+
+final class CompressingTermVectorsReader extends TermVectorsReader {
+
+  private final FieldInfos fieldInfos;
+  private final CompressingStoredFieldsIndexReader indexReader;
+  private final IndexInput vectorsStream;
+  private final int packedIntsVersion;
+  private final CompressionMode compressionMode;
+  private final Decompressor decompressor;
+  private final BytesRef bytes;
+  private final int numDocs;
+  private boolean closed;
+
+  // used by clone
+  private CompressingTermVectorsReader(CompressingTermVectorsReader reader) {
+    this.fieldInfos = reader.fieldInfos;
+    this.vectorsStream = reader.vectorsStream.clone();
+    this.indexReader = reader.indexReader.clone();
+    this.packedIntsVersion = reader.packedIntsVersion;
+    this.compressionMode = reader.compressionMode;
+    this.decompressor = reader.decompressor.clone();
+    this.numDocs = reader.numDocs;
+    this.bytes = new BytesRef(reader.bytes.bytes.length);
+    this.closed = false;
+  }
+
+  public CompressingTermVectorsReader( Directory d, SegmentInfo si, FieldInfos fn,
+      IOContext context, String formatName, CompressionMode compressionMode) throws IOException {
+    this.compressionMode = compressionMode;
+    final String segment = si.name;
+    boolean success = false;
+    fieldInfos = fn;
+    numDocs = si.getDocCount();
+    IndexInput indexStream = null;
+    try {
+      vectorsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", VECTORS_EXTENSION), context);
+      final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
+      indexStream = d.openInput(indexStreamFN, context);
+
+      final String codecNameIdx = formatName + CODEC_SFX_IDX;
+      final String codecNameDat = formatName + CODEC_SFX_DAT;
+      CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
+      CodecUtil.checkHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
+      assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
+      assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
+
+      indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
+      indexStream = null;
+
+      packedIntsVersion = vectorsStream.readVInt();
+      decompressor = compressionMode.newDecompressor();
+      this.bytes = new BytesRef();
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this, indexStream);
+      }
+    }
+  }
+
+  /**
+   * @throws AlreadyClosedException if this FieldsReader is closed
+   */
+  private void ensureOpen() throws AlreadyClosedException {
+    if (closed) {
+      throw new AlreadyClosedException("this FieldsReader is closed");
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (!closed) {
+      IOUtils.close(vectorsStream, indexReader);
+      closed = true;
+    }
+  }
+
+  @Override
+  public TermVectorsReader clone() {
+    return new CompressingTermVectorsReader(this);
+  }
+
+  @Override
+  public Fields get(int doc) throws IOException {
+    ensureOpen();
+
+    // seek to the right place
+    {
+      final long startPointer = indexReader.getStartPointer(doc);
+      vectorsStream.seek(startPointer);
+    }
+
+    // decode metadata
+    final int docBase = vectorsStream.readVInt();
+    final int chunkDocs = vectorsStream.readVInt();
+    if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) {
+      throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc);
+    }
+
+    // how many fields should we skip and how many fields does the doc have?
+    final int skip, numFields, totalFields;
+    if (chunkDocs == 1) {
+      skip = 0;
+      numFields = totalFields = vectorsStream.readVInt();
+    } else {
+      final int bitsPerValue = vectorsStream.readVInt();
+      if (bitsPerValue > 32) {
+        throw new CorruptIndexException(bitsPerValue + " > 32");
+      }
+      if (bitsPerValue == 0) {
+        numFields = vectorsStream.readVInt();
+        skip = (doc - docBase) * numFields;
+        totalFields = chunkDocs * numFields;
+      } else {
+        int sum = 0;
+        final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerValue, 1);
+        for (int i = docBase; i < doc; ++i) {
+          sum += it.next();
+        }
+        skip = sum;
+        numFields = (int) it.next();
+        sum += numFields;
+        for (int i = doc + 1; i < docBase + chunkDocs; ++i) {
+          sum += it.next();
+        }
+        totalFields = sum;
+      }
+    }
+
+    if (numFields == 0) {
+      return null;
+    }
+
+    // read field numbers that have term vectors
+    final int[] fieldNums;
+    {
+      final int token = vectorsStream.readByte() & 0xFF;
+      assert token != 0; // means no term vectors, cannot happend since we checked for numFields == 0
+      final int bitsPerFieldNum = token & 0x1F;
+      int totalDistinctFields = token >>> 5;
+      if (totalDistinctFields == 0x07) {
+        totalDistinctFields += vectorsStream.readVInt();
+      }
+      final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1);
+      fieldNums = new int[totalDistinctFields];
+      for (int i = 0; i < totalDistinctFields; ++i) {
+        fieldNums[i] = (int) it.next();
+      }
+    }
+
+    // read the field numbers of the doc
+    final int[] fieldNumOffs = new int[numFields];
+    {
+      final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
+      final long startPointer = vectorsStream.getFilePointer();
+      final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff, 1);
+      for (int i = 0; i < skip; ++i) {
+        it.next(); // skip
+      }
+      for (int i = 0; i < numFields; ++i) {
+        fieldNumOffs[i] = (int) it.next();
+      }
+      vectorsStream.seek(startPointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, totalFields, bitsPerOff));
+    }
+
+    // read flags
+    final int[] fieldFlags = new int[numFields];
+    {
+      final long startPointer = vectorsStream.getFilePointer();
+      final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, FLAGS_BITS, 1);
+      for (int i = 0; i < skip; ++i) {
+        it.next(); // skip
+      }
+      for (int i = 0; i < numFields; ++i) {
+        fieldFlags[i] = (int) it.next();
+      }
+      vectorsStream.seek(startPointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, totalFields, FLAGS_BITS));
+    }
+
+    // read the number of terms
+    final int[] numTerms = new int[numFields];
+    {
+      final int bitsPerNumTerms = vectorsStream.readVInt();
+      final long startPointer = vectorsStream.getFilePointer();
+      if (bitsPerNumTerms >= 32) {
+        throw new CorruptIndexException(bitsPerNumTerms + " >= 32");
+      }
+      final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerNumTerms, 1);
+      for (int i = 0; i < skip; ++i) {
+        it.next();
+      }
+      for (int i = 0; i < numFields; ++i) {
+        numTerms[i] = (int) it.next();
+      }
+      vectorsStream.seek(startPointer + PackedInts.Format.PACKED.byteCount(packedIntsVersion, totalFields, bitsPerNumTerms));
+    }
+
+    // read the start offset and the lengths
+    final int docOff, docLen, totalLen;
+    final int[] lengths = new int[numFields];
+    {
+      final int bitsPerLength = vectorsStream.readVInt();
+      assert bitsPerLength > 0 && bitsPerLength <= 31;
+      final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerLength, 1);
+      int sum = 0;
+      for (int i = 0; i < skip; ++i) {
+        sum += it.next();
+      }
+      docOff = sum;
+      for (int i = 0; i < numFields; ++i) {
+        lengths[i] = (int) it.next();
+        sum += lengths[i];
+      }
+      docLen = sum - docOff;
+      for (int i = skip + numFields; i < totalFields; ++i) {
+        sum += it.next();
+      }
+      totalLen = sum;
+      assert it.ord() == totalFields - 1;
+    }
+
+    decompressor.decompress(vectorsStream, totalLen, docOff, docLen, bytes);
+
+    // copy so that two TVFields instances can be used at the same time
+    final BytesRef copy = new BytesRef(bytes.length);
+    copy.copyBytes(bytes);
+    return new TVFields(fieldNums, fieldFlags, fieldNumOffs, numTerms, lengths, copy);
+  }
+
+  private class TVFields extends Fields {
+
+    private final int[] fieldNums, fieldFlags, fieldNumOffs, numTerms, lengths;
+    private final BytesRef termsAndPositions;
+
+    public TVFields(int[] fieldNums, int[] fieldFlags,
+        int[] fieldNumOffs, int[] numTerms, int[] lengths, BytesRef termsAndPositions) {
+      this.fieldNums = fieldNums;
+      this.fieldFlags = fieldFlags;
+      this.fieldNumOffs = fieldNumOffs;
+      this.numTerms = numTerms;
+      this.lengths = lengths;
+      this.termsAndPositions = termsAndPositions;
+    }
+
+    @Override
+    public Iterator<String> iterator() {
+      return new Iterator<String>() {
+        int i = 0;
+        @Override
+        public boolean hasNext() {
+          return i < fieldNumOffs.length;
+        }
+        @Override
+        public String next() {
+          if (!hasNext()) {
+            throw new NoSuchElementException();
+          }
+          final int fieldNum = fieldNums[fieldNumOffs[i++]];
+          return fieldInfos.fieldInfo(fieldNum).name;
+        }
+        @Override
+        public void remove() {
+          throw new UnsupportedOperationException();
+        }
+      };
+    }
+
+    @Override
+    public Terms terms(String field) throws IOException {
+      final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+      if (fieldInfo == null) {
+        return null;
+      }
+      int idx = -1;
+      for (int i = 0; i < fieldNumOffs.length; ++i) {
+        if (fieldNums[fieldNumOffs[i]] == fieldInfo.number) {
+          idx = i;
+          break;
+        }
+      }
+
+      if (idx == -1 || numTerms[idx] == 0) {
+        // no term
+        return null;
+      }
+      int fieldOff = 0, fieldLen = -1;
+      for (int i = 0; i < fieldNumOffs.length; ++i) {
+        if (i < idx) {
+          fieldOff += lengths[i];
+        } else {
+          fieldLen = lengths[i];
+          break;
+        }
+      }
+      assert fieldLen >= 0;
+      return new TVTerms(numTerms[idx], fieldFlags[idx], new BytesRef(termsAndPositions.bytes, fieldOff, fieldLen));
+    }
+
+    @Override
+    public int size() {
+      return fieldNumOffs.length;
+    }
+
+  }
+
+  private class TVTerms extends Terms {
+
+    private final int numTerms;
+    private final int flags;
+    private final BytesRef termsAndPositions;
+
+    TVTerms(int numTerms, int flags, BytesRef termsAndPositions) {
+      this.numTerms = numTerms;
+      this.flags = flags;
+      this.termsAndPositions = termsAndPositions;
+    }
+
+    @Override
+    public TermsEnum iterator(TermsEnum reuse) throws IOException {
+      final TVTermsEnum termsEnum;
+      if (reuse != null && reuse instanceof TVTermsEnum) {
+        termsEnum = (TVTermsEnum) reuse;
+      } else {
+        termsEnum = new TVTermsEnum();
+      }
+      termsEnum.reset(packedIntsVersion, numTerms, flags, new ByteArrayDataInput(termsAndPositions.bytes, termsAndPositions.offset, termsAndPositions.length));
+      return termsEnum;
+    }
+
+    @Override
+    public Comparator<BytesRef> getComparator() throws IOException {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+
+    @Override
+    public long size() throws IOException {
+      return numTerms;
+    }
+
+    @Override
+    public long getSumTotalTermFreq() throws IOException {
+      return -1L;
+    }
+
+    @Override
+    public long getSumDocFreq() throws IOException {
+      return numTerms;
+    }
+
+    @Override
+    public int getDocCount() throws IOException {
+      return 1;
+    }
+
+    @Override
+    public boolean hasOffsets() {
+      return (flags & OFFSETS) != 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return (flags & POSITIONS) != 0;
+    }
+
+    @Override
+    public boolean hasPayloads() {
+      return (flags & PAYLOADS) != 0;
+    }
+
+  }
+
+  private static class TVTermsEnum extends TermsEnum {
+
+    private int packedIntsVersion;
+    private int numTerms;
+    private int flags;
+    private ByteArrayDataInput in;
+    private int startPos;
+    private final BytesRef term;
+    private int termFreq;
+    private int ord;
+    private int[] positions;
+    private int[] startOffsets;
+    private int[] endOffsets;
+    private BytesRef[] payloads;
+
+    private TVTermsEnum() {
+      positions = new int[2];
+      startOffsets = new int[2];
+      endOffsets = new int[2];
+      payloads = new BytesRef[2];
+      term = new BytesRef();
+    }
+
+    void reset(int packedIntsVersion, int numTerms, int flags, ByteArrayDataInput in) {
+      this.packedIntsVersion = packedIntsVersion;
+      this.numTerms = numTerms;
+      this.flags = flags;
+      this.in = in;
+      startPos = in.getPosition();
+      reset();
+    }
+
+    void reset() {
+      term.length = 0;
+      in.setPosition(startPos);
+      ord = -1;
+    }
+
+    @Override
+    public BytesRef next() throws IOException {
+      if (ord == numTerms - 1) {
+        return null;
+      } else {
+        assert ord < numTerms;
+        ++ord;
+      }
+
+      // read term and freq
+      final int token = in.readByte() & 0xFF;
+      int termLength = token >>> 3;
+      termFreq = token & 0x07;
+      if (termLength == 0x1F) {
+        termLength += in.readVInt();
+      }
+      if (termFreq == 0x07) {
+        termFreq += in.readVInt();
+      }
+      if (termLength > term.bytes.length) {
+        term.bytes = new byte[ArrayUtil.oversize(termLength, 1)];
+      }
+      in.readBytes(term.bytes, 0, termLength);
+      term.offset = 0;
+      term.length = termLength;
+
+      // read positions
+      if ((flags & POSITIONS) != 0) {
+        if (positions.length < termFreq) {
+          positions = new int[ArrayUtil.oversize(termFreq, 4)];
+        }
+        if (termFreq == 1) {
+          positions[0] = in.readVInt();
+        } else {
+          final int bitsPerValue = in.readVInt();
+          if (bitsPerValue == 0) {
+            assert false;
+            for (int i = 0; i < termFreq; ++i) {
+              positions[i] = in.readVInt();
+            }
+          } else {
+            final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, termFreq, bitsPerValue, 1);
+            positions[0] = (int) it.next();
+            for (int i = 1; i < termFreq; ++i) {
+              positions[i] = positions[i-1] + (int) it.next();
+            }
+          }
+        }
+      }
+
+      // read offsets
+      if ((flags & OFFSETS) != 0) {
+        if (startOffsets.length < termFreq) {
+          final int newLength = ArrayUtil.oversize(termFreq, 4);
+          startOffsets = new int[newLength];
+          endOffsets = new int[newLength];
+        }
+        if (termFreq == 1) {
+          startOffsets[0] = in.readVInt();
+          endOffsets[0] = startOffsets[0] + in.readVInt();
+        } else {
+          final int lenToken = in.readByte() & 0xFF;
+          if (lenToken == 0) {
+            for (int i = 0; i < termFreq; ++i) {
+              startOffsets[i] = in.readVInt();
+              endOffsets[i] = startOffsets[i] + in.readVInt();
+            }
+          } else {
+            final int offBitsPerValue = lenToken >>> 3;
+            int lenBitsperValue = lenToken & 0x07;
+            if (lenBitsperValue == 0x07) {
+              lenBitsperValue += in.readVInt();
+            }
+            if (lenBitsperValue >= 32) {
+              throw new CorruptIndexException(lenBitsperValue + " >= 32");
+            }
+
+            final PackedInts.ReaderIterator startOffsetDeltas = PackedInts.getReaderIteratorNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, termFreq, offBitsPerValue, 1);
+            startOffsets[0] = (int) startOffsetDeltas.next();
+            for (int i = 1; i < termFreq; ++i) {
+              startOffsets[i] = startOffsets[i - 1] + (int) startOffsetDeltas.next();
+            }
+
+            if (lenBitsperValue == 0) {
+              final int length = in.readVInt();
+              for (int i = 0; i < termFreq; ++i) {
+                endOffsets[i] = startOffsets[i] + length;
+              }
+            } else {
+              final PackedInts.ReaderIterator lengths = PackedInts.getReaderIteratorNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, termFreq, lenBitsperValue, 1);
+              for (int i = 0; i < termFreq; ++i) {
+                endOffsets[i] = startOffsets[i] + (int) lengths.next();
+              }
+            }
+          }
+        }
+      }
+
+      // read payloads
+      if ((flags & PAYLOADS) != 0) {
+        if (payloads.length < termFreq) {
+          payloads = Arrays.copyOf(payloads, ArrayUtil.oversize(termFreq, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
+        }
+        if (termFreq == 1) {
+          final int length = in.readVInt();
+          BytesRef payload = payloads[0];
+          if (payload == null) {
+            payload = new BytesRef(ArrayUtil.oversize(length, 1));
+            payloads[0] = payload;
+          } else if (payload.bytes.length < length) {
+            payload.bytes = new byte[ArrayUtil.oversize(length, 1)];
+          }
+          payload.offset = 0;
+          payload.length = length;
+          in.readBytes(payload.bytes, 0, length);
+        } else {
+          final int lenBitsRequired = in.readVInt();
+          if (lenBitsRequired >= 32) {
+            throw new CorruptIndexException(lenBitsRequired + " >= 32");
+          }
+          if (lenBitsRequired == 0) {
+            // all equal
+            final int length = in.readVInt();
+            BytesRef payload = payloads[0];
+            if (payload == null) {
+              payload = new BytesRef(length);
+              payloads[0] = payload;
+            } else if (payload.bytes.length < length) {
+              payload.bytes = new byte[ArrayUtil.oversize(length, 1)];
+            }
+            payload.offset = 0;
+            payload.length = length;
+            in.readBytes(payload.bytes, 0, length);
+            for (int i = 1; i < termFreq; ++i) {
+              if (payloads[i] == null) {
+                payloads[i] = new BytesRef(payload.length);
+              } else {
+                payloads[i].offset = 0;
+              }
+              payloads[i].copyBytes(payload);
+            }
+          } else {
+            final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(in, PackedInts.Format.PACKED, packedIntsVersion, termFreq, lenBitsRequired, 1);
+            for (int i = 0; i < termFreq; ++i) {
+              final int length = (int) it.next();
+              if (payloads[i] == null) {
+                payloads[i] = new BytesRef(ArrayUtil.oversize(length, 1));
+              } else if (payloads[i].bytes.length < length) {
+                payloads[i].bytes = new byte[ArrayUtil.oversize(length, 1)];
+              }
+              payloads[i].offset = 0;
+              payloads[i].length = length;
+            }
+            for (int i = 0; i < termFreq; ++i) {
+              in.readBytes(payloads[i].bytes, 0, payloads[i].length);
+            }
+          }
+        }
+      }
+
+      return term;
+    }
+
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+
+    @Override
+    public SeekStatus seekCeil(BytesRef text, boolean useCache)
+        throws IOException {
+      if (ord < numTerms && ord >= 0) {
+        final int cmp = term().compareTo(text);
+        if (cmp == 0) {
+          return SeekStatus.FOUND;
+        } else if (cmp > 0) {
+          reset();
+        }
+      }
+      // linear scan
+      do {
+        next();
+      } while (ord < numTerms - 1 && term().compareTo(text) < 0);
+      return term().equals(text) ? SeekStatus.FOUND : SeekStatus.END;
+    }
+
+    @Override
+    public void seekExact(long ord) throws IOException {
+      if (ord < -1 || ord >= numTerms) {
+        throw new IOException("ord is out of range: ord=" + ord + ", numTerms=" + numTerms);
+      }
+      if (ord < this.ord) {
+        reset();
+      }
+      for (int i = this.ord; i < ord; ++i) {
+        next();
+      }
+      assert ord == this.ord();
+    }
+
+    @Override
+    public BytesRef term() throws IOException {
+      return term;
+    }
+
+    @Override
+    public long ord() throws IOException {
+      return ord;
+    }
+
+    @Override
+    public int docFreq() throws IOException {
+      return 1;
+    }
+
+    @Override
+    public long totalTermFreq() throws IOException {
+      return termFreq;
+    }
+
+    @Override
+    public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
+        throws IOException {
+      return docsAndPositions(liveDocs, reuse, flags);
+    }
+
+    @Override
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
+        DocsAndPositionsEnum reuse, int flags) throws IOException {
+      if ((flags & POSITIONS) == 0 && (flags & OFFSETS) == 0) {
+        return null;
+      }
+      return docsAndPositions(liveDocs, (DocsEnum) reuse, flags);
+    }
+
+    private DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
+        DocsEnum reuse, int ignoredFlags) throws IOException {
+      final TVDocsEnum docsEnum;
+      if (reuse != null && reuse instanceof TVDocsEnum) {
+        docsEnum = (TVDocsEnum) reuse;
+      } else {
+        docsEnum = new TVDocsEnum();
+      }
+      final int[]     positions = ((flags & POSITIONS) == 0) ? null : this.positions;
+      final int[]  startOffsets =   ((flags & OFFSETS) == 0) ? null : this.startOffsets;
+      final int[]    endOffsets =   ((flags & OFFSETS) == 0) ? null : this.endOffsets;
+      final BytesRef[] payloads =  ((flags & PAYLOADS) == 0) ? null : this.payloads;
+      docsEnum.reset(liveDocs, termFreq, positions, startOffsets, endOffsets, payloads);
+      return docsEnum;
+    }
+
+  }
+
+  private static class TVDocsEnum extends DocsAndPositionsEnum {
+
+    private Bits liveDocs;
+    private int doc = -1;
+    private int termFreq;
+    private int[] positions;
+    private int[] startOffsets;
+    private int[] endOffsets;
+    private BytesRef[] payloads;
+    private int i;
+
+    private void reset(Bits liveDocs, int freq, int[] positions, int[] startOffsets, int[] endOffsets, BytesRef[] payloads) {
+      this.liveDocs = liveDocs;
+      this.termFreq = freq;
+      this.positions = positions;
+      this.startOffsets = startOffsets;
+      this.endOffsets = endOffsets;
+      this.payloads = payloads;
+      doc = -1;
+      i = -1;
+
+      assert positions == null || positions.length >= freq;
+      assert startOffsets == null || startOffsets.length >= freq;
+    }
+
+    private void checkDoc() {
+      if (doc == NO_MORE_DOCS) {
+        throw new IllegalStateException("DocsEnum exhausted");
+      } else if (doc == -1) {
+        throw new IllegalStateException("DocsEnum not started");
+      }
+    }
+
+    private void checkPosition() {
+      checkDoc();
+      if (i < 0) {
+        throw new IllegalStateException("Position enum not started");
+      } else if (i >= termFreq) {
+        throw new IllegalStateException("Read past last position");
+      }
+    }
+
+    @Override
+    public int nextPosition() throws IOException {
+      if (doc != 0) {
+        throw new IllegalStateException();
+      } else if (i >= termFreq - 1) {
+        throw new IllegalStateException("Read past last position");
+      }
+
+      ++i;
+
+      if (positions == null) {
+        return -1;
+      } else {
+        return positions[i];
+      }
+    }
+
+    @Override
+    public int startOffset() throws IOException {
+      checkPosition();
+      if (startOffsets == null) {
+        return -1;
+      } else {
+        return startOffsets[i];
+      }
+    }
+
+    @Override
+    public int endOffset() throws IOException {
+      checkPosition();
+      if (endOffsets == null) {
+        return -1;
+      } else {
+        return endOffsets[i];
+      }
+    }
+
+    @Override
+    public BytesRef getPayload() throws IOException {
+      checkPosition();
+      if (payloads == null || payloads[i].length == 0) {
+        return null;
+      } else {
+        return payloads[i];
+      }
+    }
+
+    @Override
+    public int freq() throws IOException {
+      checkDoc();
+      return termFreq;
+    }
+
+    @Override
+    public int docID() {
+      return doc;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      if (doc == -1 && (liveDocs == null || liveDocs.get(0))) {
+        return (doc = 0);
+      } else {
+        return (doc = NO_MORE_DOCS);
+      }
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      if (doc == -1 && target == 0 && (liveDocs == null || liveDocs.get(0))) {
+        return (doc = 0);
+      } else {
+        return (doc = NO_MORE_DOCS);
+      }
+    }
+
+  }
+
+}

Modification de propriétés sur lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
___________________________________________________________________
Ajouté : svn:eol-style
   + native

Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsFormat.java	(révision 0)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsFormat.java	(révision 0)
@@ -0,0 +1,97 @@
+package org.apache.lucene.codecs.compressing;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.codecs.TermVectorsWriter;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+
+/**
+ * A {@link TermVectorsFormat} that compresses chunks of documents together in
+ * order to improve the compression ratio.
+ */
+public class CompressingTermVectorsFormat extends TermVectorsFormat {
+
+  private final String formatName;
+  private final CompressionMode compressionMode;
+  private final int chunkSize;
+
+  /**
+   * Create a new {@link CompressingTermVectorsFormat}.
+   * <p>
+   * <code>formatName</code> is the name of the format. This name will be used
+   * in the file formats to perform
+   * {@link CodecUtil#checkHeader(org.apache.lucene.store.DataInput, String, int, int) codec header checks}.
+   * <p>
+   * The <code>compressionMode</code> parameter allows you to choose between
+   * compression algorithms that have various compression and decompression
+   * speeds so that you can pick the one that best fits your indexing and
+   * searching throughput. You should never instantiate two
+   * {@link CompressingTermVectorsFormat}s that have the same name but
+   * different {@link CompressionMode}s.
+   * <p>
+   * <code>chunkSize</code> is the minimum byte size of a chunk of documents.
+   * Higher values of <code>chunkSize</code> should improve the compression
+   * ratio but will require more memory at indexing time and might make document
+   * loading a little slower (depending on the size of your OS cache compared
+   * to the size of your index).
+   *
+   * @param formatName the name of the {@link StoredFieldsFormat}
+   * @param compressionMode the {@link CompressionMode} to use
+   * @param chunkSize the minimum number of bytes of a single chunk of stored documents
+   * @see CompressionMode
+   */
+  public CompressingTermVectorsFormat(String formatName, CompressionMode compressionMode, int chunkSize) {
+    this.formatName = formatName;
+    this.compressionMode = compressionMode;
+    if (chunkSize < 1) {
+      throw new IllegalArgumentException("chunkSize must be >= 1");
+    }
+    this.chunkSize = chunkSize;
+  }
+
+  @Override
+  public TermVectorsReader vectorsReader(Directory directory,
+      SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context)
+      throws IOException {
+    return new CompressingTermVectorsReader(directory, segmentInfo,
+        fieldInfos, context, formatName, compressionMode);
+  }
+
+  @Override
+  public TermVectorsWriter vectorsWriter(Directory directory,
+      SegmentInfo segmentInfo, IOContext context) throws IOException {
+    return new CompressingTermVectorsWriter(directory, segmentInfo, context,
+        formatName, compressionMode, chunkSize);
+  }
+
+  @Override
+  public String toString() {
+    return getClass().getSimpleName() + "(compressionMode=" + compressionMode
+        + ", chunkSize=" + chunkSize + ")";
+  }
+
+}

Modification de propriétés sur lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsFormat.java
___________________________________________________________________
Ajouté : svn:eol-style
   + native

