Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java (revision 1535645) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java (working copy) @@ -17,6 +17,16 @@ * limitations under the License. */ +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END; +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD; +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH; +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH; +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE; +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES; +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN; +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN; +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE; + import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; @@ -31,11 +41,11 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; @@ -42,16 +52,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.StringHelper; -import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END; -import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD; -import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH; -import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH; -import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE; -import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES; -import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN; -import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN; -import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE; - class SimpleTextDocValuesReader extends DocValuesProducer { static class OneField { @@ -62,8 +62,7 @@ boolean fixedLength; long minValue; long numValues; - - }; + } final int maxDoc; final IndexInput data; @@ -71,7 +70,7 @@ final Map fields = new HashMap(); public SimpleTextDocValuesReader(SegmentReadState state, String ext) throws IOException { - //System.out.println("dir=" + state.directory + " seg=" + state.segmentInfo.name + " ext=" + ext); + // System.out.println("dir=" + state.directory + " seg=" + state.segmentInfo.name + " file=" + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext)); data = state.directory.openInput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context); maxDoc = state.segmentInfo.getDocCount(); while(true) { @@ -83,8 +82,6 @@ assert startsWith(FIELD) : scratch.utf8ToString(); String fieldName = stripPrefix(FIELD); //System.out.println(" field=" + fieldName); - FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldName); - assert fieldInfo != null; OneField field = new OneField(); fields.put(fieldName, field); Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java (revision 1535645) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java (working copy) @@ -55,7 +55,7 @@ private final Set fieldsSeen = new HashSet(); // for asserting public SimpleTextDocValuesWriter(SegmentWriteState state, String ext) throws IOException { - //System.out.println("WRITE: " + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext) + " " + state.segmentInfo.getDocCount() + " docs"); + // System.out.println("WRITE: " + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext) + " " + state.segmentInfo.getDocCount() + " docs"); data = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context); numDocs = state.segmentInfo.getDocCount(); } Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java (revision 1535645) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java (working copy) @@ -47,8 +47,8 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader { @Override - public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException { - final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); + public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException { + final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION); IndexInput input = directory.openInput(fileName, iocontext); BytesRef scratch = new BytesRef(); @@ -105,6 +105,10 @@ final DocValuesType docValuesType = docValuesType(dvType); SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch, DOCVALUES_GEN); + final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch)); + + SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch, NUM_ATTS); int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch)); Map atts = new HashMap(); @@ -122,6 +126,7 @@ infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(atts)); + infos[i].setDocValuesGen(dvGen); } if (input.getFilePointer() != input.length()) { Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java (revision 1535645) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java (working copy) @@ -53,6 +53,7 @@ static final BytesRef NORMS = new BytesRef(" norms "); static final BytesRef NORMS_TYPE = new BytesRef(" norms type "); static final BytesRef DOCVALUES = new BytesRef(" doc values "); + static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen "); static final BytesRef INDEXOPTIONS = new BytesRef(" index options "); static final BytesRef NUM_ATTS = new BytesRef(" attributes "); final static BytesRef ATT_KEY = new BytesRef(" key "); @@ -59,8 +60,8 @@ final static BytesRef ATT_VALUE = new BytesRef(" value "); @Override - public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException { - final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); + public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { + final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION); IndexOutput out = directory.createOutput(fileName, context); BytesRef scratch = new BytesRef(); boolean success = false; @@ -108,6 +109,10 @@ SimpleTextUtil.write(out, DOCVALUES); SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch); SimpleTextUtil.writeNewline(out); + + SimpleTextUtil.write(out, DOCVALUES_GEN); + SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch); + SimpleTextUtil.writeNewline(out); Map atts = fi.attributes(); int numAtts = atts == null ? 0 : atts.size(); Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (revision 1535645) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (working copy) @@ -17,8 +17,16 @@ * limitations under the License. */ +import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY; +import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE; +import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT; +import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE; +import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG; +import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES; +import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND; +import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION; + import java.io.IOException; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -34,8 +42,6 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; -import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.*; - /** * reads plaintext segments files *

@@ -80,22 +86,6 @@ } SimpleTextUtil.readLine(input, scratch); - assert StringHelper.startsWith(scratch, SI_NUM_ATTS); - int numAtts = Integer.parseInt(readString(SI_NUM_ATTS.length, scratch)); - Map attributes = new HashMap(); - - for (int i = 0; i < numAtts; i++) { - SimpleTextUtil.readLine(input, scratch); - assert StringHelper.startsWith(scratch, SI_ATT_KEY); - String key = readString(SI_ATT_KEY.length, scratch); - - SimpleTextUtil.readLine(input, scratch); - assert StringHelper.startsWith(scratch, SI_ATT_VALUE); - String value = readString(SI_ATT_VALUE.length, scratch); - attributes.put(key, value); - } - - SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch, SI_NUM_FILES); int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch)); Set files = new HashSet(); @@ -108,7 +98,7 @@ } SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, - isCompoundFile, null, diagnostics, Collections.unmodifiableMap(attributes)); + isCompoundFile, null, diagnostics); info.setFiles(files); success = true; return info; Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (revision 1535645) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (working copy) @@ -45,9 +45,6 @@ final static BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics "); final static BytesRef SI_DIAG_KEY = new BytesRef(" key "); final static BytesRef SI_DIAG_VALUE = new BytesRef(" value "); - final static BytesRef SI_NUM_ATTS = new BytesRef(" attributes "); - final static BytesRef SI_ATT_KEY = new BytesRef(" key "); - final static BytesRef SI_ATT_VALUE = new BytesRef(" value "); final static BytesRef SI_NUM_FILES = new BytesRef(" files "); final static BytesRef SI_FILE = new BytesRef(" file "); @@ -93,24 +90,6 @@ } } - Map atts = si.attributes(); - int numAtts = atts == null ? 0 : atts.size(); - SimpleTextUtil.write(output, SI_NUM_ATTS); - SimpleTextUtil.write(output, Integer.toString(numAtts), scratch); - SimpleTextUtil.writeNewline(output); - - if (numAtts > 0) { - for (Map.Entry entry : atts.entrySet()) { - SimpleTextUtil.write(output, SI_ATT_KEY); - SimpleTextUtil.write(output, entry.getKey(), scratch); - SimpleTextUtil.writeNewline(output); - - SimpleTextUtil.write(output, SI_ATT_VALUE); - SimpleTextUtil.write(output, entry.getValue(), scratch); - SimpleTextUtil.writeNewline(output); - } - } - Set files = si.files(); int numFiles = files == null ? 0 : files.size(); SimpleTextUtil.write(output, SI_NUM_FILES); Index: lucene/codecs =================================================================== --- lucene/codecs (revision 1535645) +++ lucene/codecs (working copy) Property changes on: lucene/codecs ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/codecs:r1523461,1527154,1527391 Index: lucene/core/src/java/org/apache/lucene/codecs/Codec.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/Codec.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/codecs/Codec.java (working copy) @@ -119,7 +119,7 @@ loader.reload(classloader); } - private static Codec defaultCodec = Codec.forName("Lucene45"); + private static Codec defaultCodec = Codec.forName("Lucene46"); /** expert: returns the default codec used for newly created * {@link IndexWriterConfig}s. Index: lucene/core/src/java/org/apache/lucene/codecs/FieldInfosReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/FieldInfosReader.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/codecs/FieldInfosReader.java (working copy) @@ -35,5 +35,5 @@ /** Read the {@link FieldInfos} previously written with {@link * FieldInfosWriter}. */ - public abstract FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException; + public abstract FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException; } Index: lucene/core/src/java/org/apache/lucene/codecs/FieldInfosWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/FieldInfosWriter.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/codecs/FieldInfosWriter.java (working copy) @@ -35,5 +35,5 @@ /** Writes the provided {@link FieldInfos} to the * directory. */ - public abstract void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException; + public abstract void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException; } Index: lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java (working copy) @@ -21,13 +21,13 @@ * A codec that forwards all its method calls to another codec. *

* Extend this class when you need to reuse the functionality of an existing - * codec. For example, if you want to build a codec that redefines Lucene45's + * codec. For example, if you want to build a codec that redefines Lucene46's * {@link LiveDocsFormat}: *

  *   public final class CustomCodec extends FilterCodec {
  *
  *     public CustomCodec() {
- *       super("CustomCodec", new Lucene45Codec());
+ *       super("CustomCodec", new Lucene46Codec());
  *     }
  *
  *     public LiveDocsFormat liveDocsFormat() {
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java	(working copy)
@@ -57,7 +57,7 @@
   static final byte OMIT_POSITIONS = -128;
 
   @Override
-  public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
+  public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException {
     final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
     IndexInput input = directory.openInput(fileName, iocontext);
     
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfoReader.java	(working copy)
@@ -244,7 +244,7 @@
                                        null, diagnostics, Collections.unmodifiableMap(attributes));
     info.setFiles(files);
 
-    SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, delCount, delGen);
+    SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, delCount, delGen, -1);
     return infoPerCommit;
   }
 
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java	(working copy)
@@ -82,7 +82,7 @@
   }
   
   @Override
-  public final SegmentInfoFormat segmentInfoFormat() {
+  public SegmentInfoFormat segmentInfoFormat() {
     return infosFormat;
   }
   
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java	(working copy)
@@ -49,7 +49,7 @@
   }
 
   @Override
-  public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
+  public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException {
     final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
     IndexInput input = directory.openInput(fileName, iocontext);
     
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java	(working copy)
@@ -67,10 +67,11 @@
  * 
  * @see SegmentInfos
  * @lucene.experimental
+ * @deprecated Only for reading old 4.0-4.5 segments
  */
+@Deprecated
 public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
   private final SegmentInfoReader reader = new Lucene40SegmentInfoReader();
-  private final SegmentInfoWriter writer = new Lucene40SegmentInfoWriter();
 
   /** Sole constructor. */
   public Lucene40SegmentInfoFormat() {
@@ -83,7 +84,7 @@
 
   @Override
   public SegmentInfoWriter getSegmentInfoWriter() {
-    return writer;
+    throw new UnsupportedOperationException("this codec can only be used for reading");
   }
 
   /** File extension used to store {@link SegmentInfo}. */
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java	(working copy)
@@ -18,7 +18,6 @@
  */
 
 import java.io.IOException;
-import java.util.Collections;
 import java.util.Map;
 import java.util.Set;
 
@@ -37,7 +36,9 @@
  * 
  * @see Lucene40SegmentInfoFormat
  * @lucene.experimental
+ * @deprecated Only for reading old 4.0-4.5 segments
  */
+@Deprecated
 public class Lucene40SegmentInfoReader extends SegmentInfoReader {
 
   /** Sole constructor. */
@@ -60,7 +61,7 @@
       }
       final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
       final Map diagnostics = input.readStringStringMap();
-      final Map attributes = input.readStringStringMap();
+      input.readStringStringMap(); // read deprecated attributes
       final Set files = input.readStringSet();
       
       if (input.getFilePointer() != input.length()) {
@@ -67,8 +68,7 @@
         throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
       }
 
-      final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile,
-                                             null, diagnostics, Collections.unmodifiableMap(attributes));
+      final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);
       si.setFiles(files);
 
       success = true;
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java	(working copy)
@@ -1,74 +0,0 @@
-package org.apache.lucene.codecs.lucene40;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.SegmentInfoWriter;
-import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.IOUtils;
-
-/**
- * Lucene 4.0 implementation of {@link SegmentInfoWriter}.
- * 
- * @see Lucene40SegmentInfoFormat
- * @lucene.experimental
- */
-public class Lucene40SegmentInfoWriter extends SegmentInfoWriter {
-
-  /** Sole constructor. */
-  public Lucene40SegmentInfoWriter() {
-  }
-
-  /** Save a single segment's info. */
-  @Override
-  public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
-    final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
-    si.addFile(fileName);
-
-    final IndexOutput output = dir.createOutput(fileName, ioContext);
-
-    boolean success = false;
-    try {
-      CodecUtil.writeHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT);
-      // Write the Lucene version that created this segment, since 3.1
-      output.writeString(si.getVersion());
-      output.writeInt(si.getDocCount());
-
-      output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
-      output.writeStringStringMap(si.getDiagnostics());
-      output.writeStringStringMap(si.attributes());
-      output.writeStringSet(si.files());
-
-      success = true;
-    } finally {
-      if (!success) {
-        IOUtils.closeWhileHandlingException(output);
-        si.dir.deleteFile(fileName);
-      } else {
-        output.close();
-      }
-    }
-  }
-}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java	(working copy)
@@ -101,7 +101,7 @@
   }
   
   @Override
-  public final SegmentInfoFormat segmentInfoFormat() {
+  public SegmentInfoFormat segmentInfoFormat() {
     return infosFormat;
   }
   
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java	(working copy)
@@ -95,12 +95,12 @@
   }
   
   @Override
-  public final FieldInfosFormat fieldInfosFormat() {
+  public FieldInfosFormat fieldInfosFormat() {
     return fieldInfosFormat;
   }
   
   @Override
-  public final SegmentInfoFormat segmentInfoFormat() {
+  public SegmentInfoFormat segmentInfoFormat() {
     return infosFormat;
   }
   
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java	(working copy)
@@ -83,10 +83,11 @@
  * 
  *
  * @lucene.experimental
+ * @deprecated Only for reading old 4.2-4.5 segments
  */
-public final class Lucene42FieldInfosFormat extends FieldInfosFormat {
+@Deprecated
+public class Lucene42FieldInfosFormat extends FieldInfosFormat {
   private final FieldInfosReader reader = new Lucene42FieldInfosReader();
-  private final FieldInfosWriter writer = new Lucene42FieldInfosWriter();
   
   /** Sole constructor. */
   public Lucene42FieldInfosFormat() {
@@ -99,7 +100,7 @@
 
   @Override
   public FieldInfosWriter getFieldInfosWriter() throws IOException {
-    return writer;
+    throw new UnsupportedOperationException("this codec can only be used for reading");
   }
   
   /** Extension of field infos */
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java	(working copy)
@@ -38,8 +38,10 @@
  * Lucene 4.2 FieldInfos reader.
  * 
  * @lucene.experimental
+ * @deprecated Only for reading old 4.2-4.5 segments
  * @see Lucene42FieldInfosFormat
  */
+@Deprecated
 final class Lucene42FieldInfosReader extends FieldInfosReader {
 
   /** Sole constructor. */
@@ -47,7 +49,7 @@
   }
 
   @Override
-  public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
+  public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException {
     final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION);
     IndexInput input = directory.openInput(fileName, iocontext);
     
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java	(working copy)
@@ -1,108 +0,0 @@
-package org.apache.lucene.codecs.lucene42;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.FieldInfosWriter;
-import org.apache.lucene.index.FieldInfo.DocValuesType;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.IOUtils;
-
-/**
- * Lucene 4.2 FieldInfos writer.
- * 
- * @see Lucene42FieldInfosFormat
- * @lucene.experimental
- */
-final class Lucene42FieldInfosWriter extends FieldInfosWriter {
-  
-  /** Sole constructor. */
-  public Lucene42FieldInfosWriter() {
-  }
-  
-  @Override
-  public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException {
-    final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION);
-    IndexOutput output = directory.createOutput(fileName, context);
-    boolean success = false;
-    try {
-      CodecUtil.writeHeader(output, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_CURRENT);
-      output.writeVInt(infos.size());
-      for (FieldInfo fi : infos) {
-        IndexOptions indexOptions = fi.getIndexOptions();
-        byte bits = 0x0;
-        if (fi.hasVectors()) bits |= Lucene42FieldInfosFormat.STORE_TERMVECTOR;
-        if (fi.omitsNorms()) bits |= Lucene42FieldInfosFormat.OMIT_NORMS;
-        if (fi.hasPayloads()) bits |= Lucene42FieldInfosFormat.STORE_PAYLOADS;
-        if (fi.isIndexed()) {
-          bits |= Lucene42FieldInfosFormat.IS_INDEXED;
-          assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
-          if (indexOptions == IndexOptions.DOCS_ONLY) {
-            bits |= Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
-          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
-            bits |= Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
-          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
-            bits |= Lucene42FieldInfosFormat.OMIT_POSITIONS;
-          }
-        }
-        output.writeString(fi.name);
-        output.writeVInt(fi.number);
-        output.writeByte(bits);
-
-        // pack the DV types in one byte
-        final byte dv = docValuesByte(fi.getDocValuesType());
-        final byte nrm = docValuesByte(fi.getNormType());
-        assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
-        byte val = (byte) (0xff & ((nrm << 4) | dv));
-        output.writeByte(val);
-        output.writeStringStringMap(fi.attributes());
-      }
-      success = true;
-    } finally {
-      if (success) {
-        output.close();
-      } else {
-        IOUtils.closeWhileHandlingException(output);
-      }
-    }
-  }
-  
-  private static byte docValuesByte(DocValuesType type) {
-    if (type == null) {
-      return 0;
-    } else if (type == DocValuesType.NUMERIC) {
-      return 1;
-    } else if (type == DocValuesType.BINARY) {
-      return 2;
-    } else if (type == DocValuesType.SORTED) {
-      return 3;
-    } else if (type == DocValuesType.SORTED_SET) {
-      return 4;
-    } else {
-      throw new AssertionError();
-    }
-  }  
-}
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java	(revision 1535645)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45Codec.java	(working copy)
@@ -45,10 +45,12 @@
  *
  * @see org.apache.lucene.codecs.lucene45 package documentation for file format details.
  * @lucene.experimental
+ * @deprecated Only for reading old 4.3-4.5 segments
  */
 // NOTE: if we make largish changes in a minor release, easier to just make Lucene46Codec or whatever
 // if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
 // (it writes a minor version, etc).
+@Deprecated
 public class Lucene45Codec extends Codec {
   private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
   private final TermVectorsFormat vectorsFormat = new Lucene42TermVectorsFormat();
@@ -92,12 +94,12 @@
   }
   
   @Override
-  public final FieldInfosFormat fieldInfosFormat() {
+  public FieldInfosFormat fieldInfosFormat() {
     return fieldInfosFormat;
   }
   
   @Override
-  public final SegmentInfoFormat segmentInfoFormat() {
+  public SegmentInfoFormat segmentInfoFormat() {
     return infosFormat;
   }
   
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46Codec.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46Codec.java	(revision 0)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46Codec.java	(working copy)
@@ -0,0 +1,138 @@
+package org.apache.lucene.codecs.lucene46;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
+import org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
+import org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+
+/**
+ * Implements the Lucene 4.6 index format, with configurable per-field postings
+ * and docvalues formats.
+ * 

+ * If you want to reuse functionality of this codec in another codec, extend + * {@link FilterCodec}. + * + * @see org.apache.lucene.codecs.lucene46 package documentation for file format details. + * @lucene.experimental + */ +// NOTE: if we make largish changes in a minor release, easier to just make Lucene46Codec or whatever +// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader +// (it writes a minor version, etc). +public class Lucene46Codec extends Codec { + private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat(); + private final TermVectorsFormat vectorsFormat = new Lucene42TermVectorsFormat(); + private final FieldInfosFormat fieldInfosFormat = new Lucene46FieldInfosFormat(); + private final SegmentInfoFormat segmentInfosFormat = new Lucene46SegmentInfoFormat(); + private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat(); + + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return Lucene46Codec.this.getPostingsFormatForField(field); + } + }; + + private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return Lucene46Codec.this.getDocValuesFormatForField(field); + } + }; + + /** Sole constructor. */ + public Lucene46Codec() { + super("Lucene46"); + } + + @Override + public final StoredFieldsFormat storedFieldsFormat() { + return fieldsFormat; + } + + @Override + public final TermVectorsFormat termVectorsFormat() { + return vectorsFormat; + } + + @Override + public final PostingsFormat postingsFormat() { + return postingsFormat; + } + + @Override + public final FieldInfosFormat fieldInfosFormat() { + return fieldInfosFormat; + } + + @Override + public final SegmentInfoFormat segmentInfoFormat() { + return segmentInfosFormat; + } + + @Override + public final LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } + + /** Returns the postings format that should be used for writing + * new segments of field. + * + * The default implementation always returns "Lucene41" + */ + public PostingsFormat getPostingsFormatForField(String field) { + return defaultFormat; + } + + /** Returns the docvalues format that should be used for writing + * new segments of field. + * + * The default implementation always returns "Lucene45" + */ + public DocValuesFormat getDocValuesFormatForField(String field) { + return defaultDVFormat; + } + + @Override + public final DocValuesFormat docValuesFormat() { + return docValuesFormat; + } + + private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41"); + private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene45"); + + private final NormsFormat normsFormat = new Lucene42NormsFormat(); + + @Override + public final NormsFormat normsFormat() { + return normsFormat; + } +} Property changes on: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46Codec.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java (working copy) @@ -0,0 +1,126 @@ +package org.apache.lucene.codecs.lucene46; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.FieldInfosReader; +import org.apache.lucene.codecs.FieldInfosWriter; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.store.DataOutput; + +/** + * Lucene 4.6 Field Infos format. + *

+ *

Field names are stored in the field info file, with suffix .fnm.

+ *

FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, + * FieldBits,DocValuesBits,DocValuesGen,Attributes> FieldsCount

+ *

Data types: + *

    + *
  • Header --> {@link CodecUtil#checkHeader CodecHeader}
  • + *
  • FieldsCount --> {@link DataOutput#writeVInt VInt}
  • + *
  • FieldName --> {@link DataOutput#writeString String}
  • + *
  • FieldBits, DocValuesBits --> {@link DataOutput#writeByte Byte}
  • + *
  • FieldNumber --> {@link DataOutput#writeInt VInt}
  • + *
  • Attributes --> {@link DataOutput#writeStringStringMap Map<String,String>}
  • + *
  • DocValuesGen --> {@link DataOutput#writeLong(long) Int64}
  • + *
+ *

+ * Field Descriptions: + *
    + *
  • FieldsCount: the number of fields in this file.
  • + *
  • FieldName: name of the field as a UTF-8 String.
  • + *
  • FieldNumber: the field's number. Note that unlike previous versions of + * Lucene, the fields are not numbered implicitly by their order in the + * file, instead explicitly.
  • + *
  • FieldBits: a byte containing field options. + *
      + *
    • The low-order bit is one for indexed fields, and zero for non-indexed + * fields.
    • + *
    • The second lowest-order bit is one for fields that have term vectors + * stored, and zero for fields without term vectors.
    • + *
    • If the third lowest order-bit is set (0x4), offsets are stored into + * the postings list in addition to positions.
    • + *
    • Fourth bit is unused.
    • + *
    • If the fifth lowest-order bit is set (0x10), norms are omitted for the + * indexed field.
    • + *
    • If the sixth lowest-order bit is set (0x20), payloads are stored for the + * indexed field.
    • + *
    • If the seventh lowest-order bit is set (0x40), term frequencies and + * positions omitted for the indexed field.
    • + *
    • If the eighth lowest-order bit is set (0x80), positions are omitted for the + * indexed field.
    • + *
    + *
  • + *
  • DocValuesBits: a byte containing per-document value types. The type + * recorded as two four-bit integers, with the high-order bits representing + * norms options, and the low-order bits representing + * {@code DocValues} options. Each four-bit integer can be decoded as such: + *
      + *
    • 0: no DocValues for this field.
    • + *
    • 1: NumericDocValues. ({@link DocValuesType#NUMERIC})
    • + *
    • 2: BinaryDocValues. ({@code DocValuesType#BINARY})
    • + *
    • 3: SortedDocValues. ({@code DocValuesType#SORTED})
    • + *
    + *
  • + *
  • DocValuesGen is the generation count of the field's DocValues. If this is -1, + * there are no DocValues updates to that field. Anything above zero means there + * are updates stored by {@link DocValuesFormat}.
  • + *
  • Attributes: a key-value map of codec-private attributes.
  • + *
+ * + * @lucene.experimental + */ +public final class Lucene46FieldInfosFormat extends FieldInfosFormat { + private final FieldInfosReader reader = new Lucene46FieldInfosReader(); + private final FieldInfosWriter writer = new Lucene46FieldInfosWriter(); + + /** Sole constructor. */ + public Lucene46FieldInfosFormat() { + } + + @Override + public FieldInfosReader getFieldInfosReader() throws IOException { + return reader; + } + + @Override + public FieldInfosWriter getFieldInfosWriter() throws IOException { + return writer; + } + + /** Extension of field infos */ + static final String EXTENSION = "fnm"; + + // Codec header + static final String CODEC_NAME = "Lucene46FieldInfos"; + static final int FORMAT_START = 0; + static final int FORMAT_CURRENT = FORMAT_START; + + // Field flags + static final byte IS_INDEXED = 0x1; + static final byte STORE_TERMVECTOR = 0x2; + static final byte STORE_OFFSETS_IN_POSTINGS = 0x4; + static final byte OMIT_NORMS = 0x10; + static final byte STORE_PAYLOADS = 0x20; + static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40; + static final byte OMIT_POSITIONS = -128; +} Property changes on: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java (working copy) @@ -0,0 +1,125 @@ +package org.apache.lucene.codecs.lucene46; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldInfosReader; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.IOUtils; + +/** + * Lucene 4.6 FieldInfos reader. + * + * @lucene.experimental + * @see Lucene46FieldInfosFormat + */ +final class Lucene46FieldInfosReader extends FieldInfosReader { + + /** Sole constructor. */ + public Lucene46FieldInfosReader() { + } + + @Override + public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext context) throws IOException { + final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); + IndexInput input = directory.openInput(fileName, context); + + boolean success = false; + try { + CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME, + Lucene46FieldInfosFormat.FORMAT_START, + Lucene46FieldInfosFormat.FORMAT_CURRENT); + + final int size = input.readVInt(); //read in the size + FieldInfo infos[] = new FieldInfo[size]; + + for (int i = 0; i < size; i++) { + String name = input.readString(); + final int fieldNumber = input.readVInt(); + byte bits = input.readByte(); + boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0; + boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0; + boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0; + boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0; + final IndexOptions indexOptions; + if (!isIndexed) { + indexOptions = null; + } else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { + indexOptions = IndexOptions.DOCS_ONLY; + } else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) { + indexOptions = IndexOptions.DOCS_AND_FREQS; + } else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { + indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + } else { + indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; + } + + // DV Types are packed in one byte + byte val = input.readByte(); + final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F)); + final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F)); + final long dvGen = input.readLong(); + final Map attributes = input.readStringStringMap(); + infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, + omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes)); + infos[i].setDocValuesGen(dvGen); + } + + if (input.getFilePointer() != input.length()) { + throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); + } + FieldInfos fieldInfos = new FieldInfos(infos); + success = true; + return fieldInfos; + } finally { + if (success) { + input.close(); + } else { + IOUtils.closeWhileHandlingException(input); + } + } + } + + private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException { + if (b == 0) { + return null; + } else if (b == 1) { + return DocValuesType.NUMERIC; + } else if (b == 2) { + return DocValuesType.BINARY; + } else if (b == 3) { + return DocValuesType.SORTED; + } else if (b == 4) { + return DocValuesType.SORTED_SET; + } else { + throw new CorruptIndexException("invalid docvalues byte: " + b + " (resource=" + input + ")"); + } + } +} Property changes on: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java (working copy) @@ -0,0 +1,109 @@ +package org.apache.lucene.codecs.lucene46; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldInfosWriter; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.IOUtils; + +/** + * Lucene 4.6 FieldInfos writer. + * + * @see Lucene46FieldInfosFormat + * @lucene.experimental + */ +final class Lucene46FieldInfosWriter extends FieldInfosWriter { + + /** Sole constructor. */ + public Lucene46FieldInfosWriter() { + } + + @Override + public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { + final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); + IndexOutput output = directory.createOutput(fileName, context); + boolean success = false; + try { + CodecUtil.writeHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT); + output.writeVInt(infos.size()); + for (FieldInfo fi : infos) { + IndexOptions indexOptions = fi.getIndexOptions(); + byte bits = 0x0; + if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR; + if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS; + if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS; + if (fi.isIndexed()) { + bits |= Lucene46FieldInfosFormat.IS_INDEXED; + assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads(); + if (indexOptions == IndexOptions.DOCS_ONLY) { + bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; + } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { + bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; + } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { + bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS; + } + } + output.writeString(fi.name); + output.writeVInt(fi.number); + output.writeByte(bits); + + // pack the DV types in one byte + final byte dv = docValuesByte(fi.getDocValuesType()); + final byte nrm = docValuesByte(fi.getNormType()); + assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; + byte val = (byte) (0xff & ((nrm << 4) | dv)); + output.writeByte(val); + output.writeLong(fi.getDocValuesGen()); + output.writeStringStringMap(fi.attributes()); + } + success = true; + } finally { + if (success) { + output.close(); + } else { + IOUtils.closeWhileHandlingException(output); + } + } + } + + private static byte docValuesByte(DocValuesType type) { + if (type == null) { + return 0; + } else if (type == DocValuesType.NUMERIC) { + return 1; + } else if (type == DocValuesType.BINARY) { + return 2; + } else if (type == DocValuesType.SORTED) { + return 3; + } else if (type == DocValuesType.SORTED_SET) { + return 4; + } else { + throw new AssertionError(); + } + } +} Property changes on: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java (working copy) @@ -0,0 +1,93 @@ +package org.apache.lucene.codecs.lucene46; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.SegmentInfoReader; +import org.apache.lucene.codecs.SegmentInfoWriter; +import org.apache.lucene.index.IndexWriter; // javadocs +import org.apache.lucene.index.SegmentInfo; // javadocs +import org.apache.lucene.index.SegmentInfos; // javadocs +import org.apache.lucene.store.DataOutput; // javadocs + +/** + * Lucene 4.6 Segment info format. + *

+ * Files: + *

    + *
  • .si: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files + *
+ *

+ * Data types: + *

+ *

    + *
  • Header --> {@link CodecUtil#writeHeader CodecHeader}
  • + *
  • SegSize --> {@link DataOutput#writeInt Int32}
  • + *
  • SegVersion --> {@link DataOutput#writeString String}
  • + *
  • Files --> {@link DataOutput#writeStringSet Set<String>}
  • + *
  • Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}
  • + *
  • IsCompoundFile --> {@link DataOutput#writeByte Int8}
  • + *
+ *

+ * Field Descriptions: + *

+ *

    + *
  • SegVersion is the code version that created the segment.
  • + *
  • SegSize is the number of documents contained in the segment index.
  • + *
  • IsCompoundFile records whether the segment is written as a compound file or + * not. If this is -1, the segment is not a compound file. If it is 1, the segment + * is a compound file.
  • + *
  • Checksum contains the CRC32 checksum of all bytes in the segments_N file up + * until the checksum. This is used to verify integrity of the file on opening the + * index.
  • + *
  • The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid, + * for each segment it creates. It includes metadata like the current Lucene + * version, OS, Java version, why the segment was created (merge, flush, + * addIndexes), etc.
  • + *
  • Files is a list of files referred to by this segment.
  • + *
+ *

+ * + * @see SegmentInfos + * @lucene.experimental + */ +public class Lucene46SegmentInfoFormat extends SegmentInfoFormat { + private final SegmentInfoReader reader = new Lucene46SegmentInfoReader(); + private final SegmentInfoWriter writer = new Lucene46SegmentInfoWriter(); + + /** Sole constructor. */ + public Lucene46SegmentInfoFormat() { + } + + @Override + public SegmentInfoReader getSegmentInfoReader() { + return reader; + } + + @Override + public SegmentInfoWriter getSegmentInfoWriter() { + return writer; + } + + /** File extension used to store {@link SegmentInfo}. */ + public final static String SI_EXTENSION = "si"; + static final String CODEC_NAME = "Lucene46SegmentInfo"; + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; +} Property changes on: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java (working copy) @@ -0,0 +1,83 @@ +package org.apache.lucene.codecs.lucene46; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.SegmentInfoReader; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.IOUtils; + +/** + * Lucene 4.6 implementation of {@link SegmentInfoReader}. + * + * @see Lucene46SegmentInfoFormat + * @lucene.experimental + */ +public class Lucene46SegmentInfoReader extends SegmentInfoReader { + + /** Sole constructor. */ + public Lucene46SegmentInfoReader() { + } + + @Override + public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException { + final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene46SegmentInfoFormat.SI_EXTENSION); + final IndexInput input = dir.openInput(fileName, context); + boolean success = false; + try { + CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME, + Lucene46SegmentInfoFormat.VERSION_START, + Lucene46SegmentInfoFormat.VERSION_CURRENT); + final String version = input.readString(); + final int docCount = input.readInt(); + if (docCount < 0) { + throw new CorruptIndexException("invalid docCount: " + docCount + " (resource=" + input + ")"); + } + final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; + final Map diagnostics = input.readStringStringMap(); + final Set files = input.readStringSet(); + + if (input.getFilePointer() != input.length()) { + throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); + } + + final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics); + si.setFiles(files); + + success = true; + + return si; + + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(input); + } else { + input.close(); + } + } + } +} Property changes on: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java (working copy) @@ -0,0 +1,73 @@ +package org.apache.lucene.codecs.lucene46; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.SegmentInfoWriter; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.IOUtils; + +/** + * Lucene 4.0 implementation of {@link SegmentInfoWriter}. + * + * @see Lucene46SegmentInfoFormat + * @lucene.experimental + */ +public class Lucene46SegmentInfoWriter extends SegmentInfoWriter { + + /** Sole constructor. */ + public Lucene46SegmentInfoWriter() { + } + + /** Save a single segment's info. */ + @Override + public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException { + final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene46SegmentInfoFormat.SI_EXTENSION); + si.addFile(fileName); + + final IndexOutput output = dir.createOutput(fileName, ioContext); + + boolean success = false; + try { + CodecUtil.writeHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT); + // Write the Lucene version that created this segment, since 3.1 + output.writeString(si.getVersion()); + output.writeInt(si.getDocCount()); + + output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); + output.writeStringStringMap(si.getDiagnostics()); + output.writeStringSet(si.files()); + + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(output); + si.dir.deleteFile(fileName); + } else { + output.close(); + } + } + } +} Property changes on: lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html (revision 0) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html (working copy) @@ -0,0 +1,398 @@ + + + + + + + +Lucene 4.6 file format. + +

Apache Lucene - Index File Formats

+ + +

Introduction

+
+

This document defines the index file formats used in this version of Lucene. +If you are using a different version of Lucene, please consult the copy of +docs/ that was distributed with +the version you are using.

+

Apache Lucene is written in Java, but several efforts are underway to write +versions of +Lucene in other programming languages. If these versions are to remain +compatible with Apache Lucene, then a language-independent definition of the +Lucene index format is required. This document thus attempts to provide a +complete and independent definition of the Apache Lucene file formats.

+

As Lucene evolves, this document should evolve. Versions of Lucene in +different programming languages should endeavor to agree on file formats, and +generate new versions of this document.

+
+ +

Definitions

+
+

The fundamental concepts in Lucene are index, document, field and term.

+

An index contains a sequence of documents.

+
    +
  • A document is a sequence of fields.
  • +
  • A field is a named sequence of terms.
  • +
  • A term is a sequence of bytes.
  • +
+

The same sequence of bytes in two different fields is considered a different +term. Thus terms are represented as a pair: the string naming the field, and the +bytes within the field.

+ +

Inverted Indexing

+

The index stores statistics about terms in order to make term-based search +more efficient. Lucene's index falls into the family of indexes known as an +inverted index. This is because it can list, for a term, the documents +that contain it. This is the inverse of the natural relationship, in which +documents list terms.

+ +

Types of Fields

+

In Lucene, fields may be stored, in which case their text is stored +in the index literally, in a non-inverted manner. Fields that are inverted are +called indexed. A field may be both stored and indexed.

+

The text of a field may be tokenized into terms to be indexed, or the +text of a field may be used literally as a term to be indexed. Most fields are +tokenized, but sometimes it is useful for certain identifier fields to be +indexed literally.

+

See the {@link org.apache.lucene.document.Field Field} +java docs for more information on Fields.

+ +

Segments

+

Lucene indexes may be composed of multiple sub-indexes, or segments. +Each segment is a fully independent index, which could be searched separately. +Indexes evolve by:

+
    +
  1. Creating new segments for newly added documents.
  2. +
  3. Merging existing segments.
  4. +
+

Searches may involve multiple segments and/or multiple indexes, each index +potentially composed of a set of segments.

+ +

Document Numbers

+

Internally, Lucene refers to documents by an integer document number. +The first document added to an index is numbered zero, and each subsequent +document added gets a number one greater than the previous.

+

Note that a document's number may change, so caution should be taken when +storing these numbers outside of Lucene. In particular, numbers may change in +the following situations:

+
    +
  • +

    The numbers stored in each segment are unique only within the segment, and +must be converted before they can be used in a larger context. The standard +technique is to allocate each segment a range of values, based on the range of +numbers used in that segment. To convert a document number from a segment to an +external value, the segment's base document number is added. To convert +an external value back to a segment-specific value, the segment is identified +by the range that the external value is in, and the segment's base value is +subtracted. For example two five document segments might be combined, so that +the first segment has a base value of zero, and the second of five. Document +three from the second segment would have an external value of eight.

    +
  • +
  • +

    When documents are deleted, gaps are created in the numbering. These are +eventually removed as the index evolves through merging. Deleted documents are +dropped when segments are merged. A freshly-merged segment thus has no gaps in +its numbering.

    +
  • +
+
+ +

Index Structure Overview

+
+

Each segment index maintains the following:

+
    +
  • +{@link org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat Segment info}. + This contains metadata about a segment, such as the number of documents, + what files it uses, +
  • +
  • +{@link org.apache.lucene.codecs.lucene46.Lucene46FieldInfosFormat Field names}. + This contains the set of field names used in the index. +
  • +
  • +{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Stored Field values}. +This contains, for each document, a list of attribute-value pairs, where the attributes +are field names. These are used to store auxiliary information about the document, such as +its title, url, or an identifier to access a database. The set of stored fields are what is +returned for each hit when searching. This is keyed by document number. +
  • +
  • +{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}. +A dictionary containing all of the terms used in all of the +indexed fields of all of the documents. The dictionary also contains the number +of documents which contain the term, and pointers to the term's frequency and +proximity data. +
  • +
  • +{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}. +For each term in the dictionary, the numbers of all the +documents that contain that term, and the frequency of the term in that +document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) +
  • +
  • +{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}. +For each term in the dictionary, the positions that the +term occurs in each document. Note that this will not exist if all fields in +all documents omit position data. +
  • +
  • +{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Normalization factors}. +For each field in each document, a value is stored +that is multiplied into the score for hits on that field. +
  • +
  • +{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vectors}. +For each field in each document, the term vector (sometimes +called document vector) may be stored. A term vector consists of term text and +term frequency. To add Term Vectors to your index see the +{@link org.apache.lucene.document.Field Field} constructors +
  • +
  • +{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-document values}. +Like stored values, these are also keyed by document +number, but are generally intended to be loaded into main memory for fast +access. Whereas stored values are generally intended for summary results from +searches, per-document values are useful for things like scoring factors. +
  • +
  • +{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}. +An optional file indicating which documents are deleted. +
  • +
+

Details on each of these are provided in their linked pages.

+
+ +

File Naming

+
+

All files belonging to a segment have the same name with varying extensions. +The extensions correspond to the different file formats described below. When +using the Compound File format (default in 1.4 and greater) these files (except +for the Segment info file, the Lock file, and Deleted documents file) are collapsed +into a single .cfs file (see below for details)

+

Typically, all segments in an index are stored in a single directory, +although this is not required.

+

As of version 2.1 (lock-less commits), file names are never re-used (there +is one exception, "segments.gen", see below). That is, when any file is saved +to the Directory it is given a never before used filename. This is achieved +using a simple generations approach. For example, the first segments file is +segments_1, then segments_2, etc. The generation is a sequential long integer +represented in alpha-numeric (base 36) form.

+
+ +

Summary of File Extensions

+
+

The following table summarizes the names and extensions of the files in +Lucene:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameExtensionBrief Description
{@link org.apache.lucene.index.SegmentInfos Segments File}segments.gen, segments_NStores information about a commit point
Lock Filewrite.lockThe Write lock prevents multiple IndexWriters from writing to the same +file.
{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}.siStores metadata about a segment
{@link org.apache.lucene.store.CompoundFileDirectory Compound File}.cfs, .cfeAn optional "virtual" file consisting of all the other index files for +systems that frequently run out of file handles.
{@link org.apache.lucene.codecs.lucene46.Lucene46FieldInfosFormat Fields}.fnmStores information about the fields
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Index}.fdxContains pointers to field data
{@link org.apache.lucene.codecs.lucene41.Lucene41StoredFieldsFormat Field Data}.fdtThe stored fields for documents
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}.timThe term dictionary, stores term info
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}.tipThe index into the Term Dictionary
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}.docContains the list of docs which contain each term along with frequency
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}.posStores position information about where a term occurs in the index
{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}.payStores additional per-position metadata information such as character offsets and user payloads
{@link org.apache.lucene.codecs.lucene42.Lucene42NormsFormat Norms}.nvd, .nvmEncodes length and boost factors for docs and fields
{@link org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat Per-Document Values}.dvd, .dvmEncodes additional scoring factors or other per-document information.
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Index}.tvxStores offset into the document data file
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Documents}.tvdContains information about each document that has term vectors
{@link org.apache.lucene.codecs.lucene42.Lucene42TermVectorsFormat Term Vector Fields}.tvfThe field level info about term vectors
{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}.delInfo about what files are deleted
+
+ +

Lock File

+The write lock, which is stored in the index directory by default, is named +"write.lock". If the lock directory is different from the index directory then +the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix +derived from the full path to the index directory. When this file is present, a +writer is currently modifying the index (adding or removing documents). This +lock file ensures that only one writer is modifying the index at a time.

+ +

History

+

Compatibility notes are provided in this document, describing how file +formats have changed from prior versions:

+
    +
  • In version 2.1, the file format was changed to allow lock-less commits (ie, +no more commit lock). The change is fully backwards compatible: you can open a +pre-2.1 index for searching or adding/deleting of docs. When the new segments +file is saved (committed), it will be written in the new file format (meaning +no specific "upgrade" process is needed). But note that once a commit has +occurred, pre-2.1 Lucene will not be able to read the index.
  • +
  • In version 2.3, the file format was changed to allow segments to share a +single set of doc store (vectors & stored fields) files. This allows for +faster indexing in certain cases. The change is fully backwards compatible (in +the same way as the lock-less commits change in 2.1).
  • +
  • In version 2.4, Strings are now written as true UTF-8 byte sequence, not +Java's modified UTF-8. See +LUCENE-510 for details.
  • +
  • In version 2.9, an optional opaque Map<String,String> CommitUserData +may be passed to IndexWriter's commit methods (and later retrieved), which is +recorded in the segments_N file. See +LUCENE-1382 for details. Also, +diagnostics were added to each segment written recording details about why it +was written (due to flush, merge; which OS/JRE was used; etc.). See issue +LUCENE-1654 for details.
  • +
  • In version 3.0, compressed fields are no longer written to the index (they +can still be read, but on merge the new segment will write them, uncompressed). +See issue LUCENE-1960 +for details.
  • +
  • In version 3.1, segments records the code version that created them. See +LUCENE-2720 for details. +Additionally segments track explicitly whether or not they have term vectors. +See LUCENE-2811 +for details.
  • +
  • In version 3.2, numeric fields are written as natively to stored fields +file, previously they were stored in text format only.
  • +
  • In version 3.4, fields can omit position data while still indexing term +frequencies.
  • +
  • In version 4.0, the format of the inverted index became extensible via +the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage +({@code DocValues}) was introduced. Normalization factors need no longer be a +single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. +Terms need not be unicode strings, they can be any byte sequence. Term offsets +can optionally be indexed into the postings lists. Payloads can be stored in the +term vectors.
  • +
  • In version 4.1, the format of the postings list changed to use either +of FOR compression or variable-byte encoding, depending upon the frequency +of the term. Terms appearing only once were changed to inline directly into +the term dictionary. Stored fields are compressed by default.
  • +
  • In version 4.2, term vectors are compressed by default. DocValues has +a new multi-valued type (SortedSet), that can be used for faceting/grouping/joining +on multi-valued fields.
  • +
  • In version 4.5, DocValues were extended to explicitly represent missing values.
  • +
  • In version 4.6, FieldInfos were extended to support per-field DocValues generation, to +allow updating NumericDocValues fields.
  • +
+ +

Limitations

+
+

Lucene uses a Java int to refer to +document numbers, and the index file format uses an Int32 +on-disk to store document numbers. This is a limitation +of both the index file format and the current implementation. Eventually these +should be replaced with either UInt64 values, or +better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.

+
+ + Property changes on: lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/codecs/package.html =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/package.html (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/codecs/package.html (working copy) @@ -61,13 +61,13 @@ If you just want to customise the {@link org.apache.lucene.codecs.PostingsFormat}, or use different postings formats for different fields, then you can register your custom postings format in the same way (in META-INF/services/org.apache.lucene.codecs.PostingsFormat), and then extend the default - {@link org.apache.lucene.codecs.lucene45.Lucene45Codec} and override - {@link org.apache.lucene.codecs.lucene45.Lucene45Codec#getPostingsFormatForField(String)} to return your custom + {@link org.apache.lucene.codecs.lucene46.Lucene46Codec} and override + {@link org.apache.lucene.codecs.lucene46.Lucene46Codec#getPostingsFormatForField(String)} to return your custom postings format.

Similarly, if you just want to customise the {@link org.apache.lucene.codecs.DocValuesFormat} per-field, have - a look at {@link org.apache.lucene.codecs.lucene45.Lucene45Codec#getDocValuesFormatForField(String)}. + a look at {@link org.apache.lucene.codecs.lucene46.Lucene46Codec#getDocValuesFormatForField(String)}.

Index: lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (working copy) @@ -22,13 +22,13 @@ import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Map; -import java.util.ServiceLoader; // javadocs +import java.util.ServiceLoader; import java.util.TreeMap; -import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.NumericDocValues; @@ -76,11 +76,10 @@ } @Override - public final DocValuesConsumer fieldsConsumer(SegmentWriteState state) - throws IOException { + public final DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { return new FieldsWriter(state); } - + static class ConsumerAndSuffix implements Closeable { DocValuesConsumer consumer; int suffix; @@ -97,7 +96,7 @@ private final Map suffixes = new HashMap(); private final SegmentWriteState segmentWriteState; - + public FieldsWriter(SegmentWriteState state) { segmentWriteState = state; } @@ -123,32 +122,53 @@ } private DocValuesConsumer getInstance(FieldInfo field) throws IOException { - final DocValuesFormat format = getDocValuesFormatForField(field.name); + DocValuesFormat format = null; + if (field.getDocValuesGen() != -1) { + final String formatName = field.getAttribute(PER_FIELD_FORMAT_KEY); + // this means the field never existed in that segment, yet is applied updates + if (formatName != null) { + format = DocValuesFormat.forName(formatName); + } + } if (format == null) { + format = getDocValuesFormatForField(field.name); + } + if (format == null) { throw new IllegalStateException("invalid null DocValuesFormat for field=\"" + field.name + "\""); } final String formatName = format.getName(); String previousValue = field.putAttribute(PER_FIELD_FORMAT_KEY, formatName); - assert previousValue == null: "formatName=" + formatName + " prevValue=" + previousValue; + assert field.getDocValuesGen() != -1 || previousValue == null: "formatName=" + formatName + " prevValue=" + previousValue; - Integer suffix; + Integer suffix = null; ConsumerAndSuffix consumer = formats.get(format); if (consumer == null) { // First time we are seeing this format; create a new instance + + if (field.getDocValuesGen() != -1) { + final String suffixAtt = field.getAttribute(PER_FIELD_SUFFIX_KEY); + // even when dvGen is != -1, it can still be a new field, that never + // existed in the segment, and therefore doesn't have the recorded + // attributes yet. + if (suffixAtt != null) { + suffix = Integer.valueOf(suffixAtt); + } + } - // bump the suffix - suffix = suffixes.get(formatName); if (suffix == null) { - suffix = 0; - } else { - suffix = suffix + 1; + // bump the suffix + suffix = suffixes.get(formatName); + if (suffix == null) { + suffix = 0; + } else { + suffix = suffix + 1; + } } suffixes.put(formatName, suffix); - final String segmentSuffix = getFullSegmentSuffix(field.name, - segmentWriteState.segmentSuffix, + final String segmentSuffix = getFullSegmentSuffix(segmentWriteState.segmentSuffix, getSuffix(formatName, Integer.toString(suffix))); consumer = new ConsumerAndSuffix(); consumer.consumer = format.fieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix)); @@ -161,10 +181,10 @@ } previousValue = field.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(suffix)); - assert previousValue == null; + assert field.getDocValuesGen() != -1 || previousValue == null : "suffix=" + Integer.toString(suffix) + " prevValue=" + previousValue; // TODO: we should only provide the "slice" of FIS - // that this PF actually sees ... + // that this DVF actually sees ... return consumer.consumer; } @@ -179,14 +199,11 @@ return formatName + "_" + suffix; } - static String getFullSegmentSuffix(String fieldName, String outerSegmentSuffix, String segmentSuffix) { + static String getFullSegmentSuffix(String outerSegmentSuffix, String segmentSuffix) { if (outerSegmentSuffix.length() == 0) { return segmentSuffix; } else { - // TODO: support embedding; I think it should work but - // we need a test confirm to confirm - // return outerSegmentSuffix + "_" + segmentSuffix; - throw new IllegalStateException("cannot embed PerFieldPostingsFormat inside itself (field \"" + fieldName + "\" returned PerFieldPostingsFormat)"); + return outerSegmentSuffix + "_" + segmentSuffix; } } @@ -210,7 +227,7 @@ final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY); assert suffix != null; DocValuesFormat format = DocValuesFormat.forName(formatName); - String segmentSuffix = getSuffix(formatName, suffix); + String segmentSuffix = getFullSegmentSuffix(readState.segmentSuffix, getSuffix(formatName, suffix)); if (!formats.containsKey(segmentSuffix)) { formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix))); } Index: lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java (working copy) @@ -19,25 +19,26 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.search.Query; import org.apache.lucene.util.RamUsageEstimator; -/* Holds buffered deletes, by docID, term or query for a +/* Holds buffered deletes and updates, by docID, term or query for a * single segment. This is used to hold buffered pending - * deletes against the to-be-flushed segment. Once the - * deletes are pushed (on flush in DocumentsWriter), these - * deletes are converted to a FrozenDeletes instance. */ + * deletes and updates against the to-be-flushed segment. Once the + * deletes and updates are pushed (on flush in DocumentsWriter), they + * are converted to a FrozenDeletes instance. */ // NOTE: instances of this class are accessed either via a private // instance on DocumentWriterPerThread, or via sync'd code by // DocumentsWriterDeleteQueue -class BufferedDeletes { +class BufferedDeletes { // TODO (DVU_RENAME) BufferedUpdates? /* Rough logic: HashMap has an array[Entry] w/ varying load factor (say 2 * POINTER). Entry is object w/ Term @@ -63,11 +64,50 @@ undercount (say 24 bytes). Integer is OBJ_HEADER + INT. */ final static int BYTES_PER_DEL_QUERY = 5*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 2*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 2*RamUsageEstimator.NUM_BYTES_INT + 24; + /* Rough logic: NumericUpdate calculates its actual size, + * including the update Term and DV field (String). The + * per-field map holds a reference to the updated field, and + * therefore we only account for the object reference and + * map space itself. This is incremented when we first see + * an updated field. + * + * HashMap has an array[Entry] w/ varying load + * factor (say 2*POINTER). Entry is an object w/ String key, + * LinkedHashMap val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). + * + * LinkedHashMap (val) is counted as OBJ_HEADER, array[Entry] ref + header, 4*INT, 1*FLOAT, + * Set (entrySet) (2*OBJ_HEADER + ARRAY_HEADER + 2*POINTER + 4*INT + FLOAT) + */ + final static int BYTES_PER_NUMERIC_FIELD_ENTRY = + 7*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 3*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + 5*RamUsageEstimator.NUM_BYTES_INT + RamUsageEstimator.NUM_BYTES_FLOAT; + + /* Rough logic: Incremented when we see another Term for an already updated + * field. + * LinkedHashMap has an array[Entry] w/ varying load factor + * (say 2*POINTER). Entry is an object w/ Term key, NumericUpdate val, + * int hash, Entry next, Entry before, Entry after (OBJ_HEADER + 5*POINTER + INT). + * + * Term (key) is counted only as POINTER. + * NumericUpdate (val) counts its own size and isn't accounted for here. + */ + final static int BYTES_PER_NUMERIC_UPDATE_ENTRY = 7*RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT; + final AtomicInteger numTermDeletes = new AtomicInteger(); + final AtomicInteger numNumericUpdates = new AtomicInteger(); final Map terms = new HashMap(); final Map queries = new HashMap(); final List docIDs = new ArrayList(); + // Map> + // For each field we keep an ordered list of NumericUpdates, key'd by the + // update Term. LinkedHashMap guarantees we will later traverse the map in + // insertion order (so that if two terms affect the same document, the last + // one that came in wins), and helps us detect faster if the same Term is + // used to update the same field multiple times (so we later traverse it + // only once). + final Map> numericUpdates = new HashMap>(); + public static final Integer MAX_INT = Integer.valueOf(Integer.MAX_VALUE); final AtomicLong bytesUsed; @@ -75,21 +115,17 @@ private final static boolean VERBOSE_DELETES = false; long gen; + public BufferedDeletes() { - this(new AtomicLong()); + this.bytesUsed = new AtomicLong(); } - BufferedDeletes(AtomicLong bytesUsed) { - assert bytesUsed != null; - this.bytesUsed = bytesUsed; - } - @Override public String toString() { if (VERBOSE_DELETES) { return "gen=" + gen + " numTerms=" + numTermDeletes + ", terms=" + terms - + ", queries=" + queries + ", docIDs=" + docIDs + ", bytesUsed=" - + bytesUsed; + + ", queries=" + queries + ", docIDs=" + docIDs + ", numericUpdates=" + numericUpdates + + ", bytesUsed=" + bytesUsed; } else { String s = "gen=" + gen; if (numTermDeletes.get() != 0) { @@ -101,6 +137,9 @@ if (docIDs.size() != 0) { s += " " + docIDs.size() + " deleted docIDs"; } + if (numNumericUpdates.get() != 0) { + s += " " + numNumericUpdates.get() + " numeric updates (unique count=" + numericUpdates.size() + ")"; + } if (bytesUsed.get() != 0) { s += " bytesUsed=" + bytesUsed.get(); } @@ -145,20 +184,46 @@ } } + public void addNumericUpdate(NumericUpdate update, int docIDUpto) { + LinkedHashMap fieldUpdates = numericUpdates.get(update.field); + if (fieldUpdates == null) { + fieldUpdates = new LinkedHashMap(); + numericUpdates.put(update.field, fieldUpdates); + bytesUsed.addAndGet(BYTES_PER_NUMERIC_FIELD_ENTRY); + } + final NumericUpdate current = fieldUpdates.get(update.term); + if (current != null && docIDUpto < current.docIDUpto) { + // Only record the new number if it's greater than or equal to the current + // one. This is important because if multiple threads are replacing the + // same doc at nearly the same time, it's possible that one thread that + // got a higher docID is scheduled before the other threads. + return; + } + + update.docIDUpto = docIDUpto; + // since it's a LinkedHashMap, we must first remove the Term entry so that + // it's added last (we're interested in insertion-order). + if (current != null) { + fieldUpdates.remove(update.term); + } + fieldUpdates.put(update.term, update); + numNumericUpdates.incrementAndGet(); + if (current == null) { + bytesUsed.addAndGet(BYTES_PER_NUMERIC_UPDATE_ENTRY + update.sizeInBytes()); + } + } + void clear() { terms.clear(); queries.clear(); docIDs.clear(); + numericUpdates.clear(); numTermDeletes.set(0); + numNumericUpdates.set(0); bytesUsed.set(0); } - void clearDocIDs() { - bytesUsed.addAndGet(-docIDs.size()*BYTES_PER_DEL_DOCID); - docIDs.clear(); - } - boolean any() { - return terms.size() > 0 || docIDs.size() > 0 || queries.size() > 0; + return terms.size() > 0 || docIDs.size() > 0 || queries.size() > 0 || numericUpdates.size() > 0; } } Index: lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (working copy) @@ -18,10 +18,13 @@ */ import java.io.IOException; -import java.util.List; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; -import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; @@ -35,12 +38,12 @@ /* Tracks the stream of {@link BufferedDeletes}. * When DocumentsWriterPerThread flushes, its buffered - * deletes are appended to this stream. We later - * apply these deletes (resolve them to the actual + * deletes and updates are appended to this stream. We later + * apply them (resolve them to the actual * docIDs, per segment) when a merge is started * (only to the to-be-merged segments). We * also apply to all segments when NRT reader is pulled, - * commit/close is called, or when too many deletes are + * commit/close is called, or when too many deletes or updates are * buffered and must be flushed (by RAM usage or by count). * * Each packet is assigned a generation, and each flushed or @@ -48,7 +51,7 @@ * track which BufferedDeletes packets to apply to any given * segment. */ -class BufferedDeletesStream { +class BufferedDeletesStream { // TODO (DVU_RENAME) BufferedUpdatesStream // TODO: maybe linked list? private final List deletes = new ArrayList(); @@ -114,6 +117,7 @@ } public static class ApplyDeletesResult { + // True if any actual deletes took place: public final boolean anyDeletes; @@ -190,7 +194,7 @@ final long segGen = info.getBufferedDeletesGen(); if (packet != null && segGen < packet.delGen()) { - //System.out.println(" coalesce"); +// System.out.println(" coalesce"); if (coalescedDeletes == null) { coalescedDeletes = new CoalescedDeletes(); } @@ -217,15 +221,21 @@ int delCount = 0; final boolean segAllDeletes; try { + Map fieldUpdates = null; if (coalescedDeletes != null) { //System.out.println(" del coalesced"); delCount += applyTermDeletes(coalescedDeletes.termsIterable(), rld, reader); delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), rld, reader); + fieldUpdates = applyNumericDocValuesUpdates(coalescedDeletes.numericDVUpdates, rld, reader, fieldUpdates); } //System.out.println(" del exact"); // Don't delete by Term here; DocumentsWriterPerThread // already did that on flush: delCount += applyQueryDeletes(packet.queriesIterable(), rld, reader); + fieldUpdates = applyNumericDocValuesUpdates(Arrays.asList(packet.updates), rld, reader, fieldUpdates); + if (!fieldUpdates.isEmpty()) { + rld.writeFieldUpdates(info.info.dir, fieldUpdates); + } final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount(); assert fullDelCount <= rld.info.info.getDocCount(); segAllDeletes = fullDelCount == rld.info.info.getDocCount(); @@ -272,10 +282,14 @@ try { delCount += applyTermDeletes(coalescedDeletes.termsIterable(), rld, reader); delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), rld, reader); + Map fieldUpdates = applyNumericDocValuesUpdates(coalescedDeletes.numericDVUpdates, rld, reader, null); + if (!fieldUpdates.isEmpty()) { + rld.writeFieldUpdates(info.info.dir, fieldUpdates); + } final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount(); assert fullDelCount <= rld.info.info.getDocCount(); segAllDeletes = fullDelCount == rld.info.info.getDocCount(); - } finally { + } finally { rld.release(reader); readerPool.release(rld); } @@ -289,7 +303,7 @@ } if (infoStream.isEnabled("BD")) { - infoStream.message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); + infoStream.message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + coalescedDeletes + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } } info.setBufferedDeletesGen(gen); @@ -409,15 +423,15 @@ if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } + if (!any) { + rld.initWritableLiveDocs(); + any = true; + } // NOTE: there is no limit check on the docID // when deleting by Term (unlike by Query) // because on flush we apply all Term deletes to // each segment. So all Term deleting here is // against prior segments: - if (!any) { - rld.initWritableLiveDocs(); - any = true; - } if (rld.delete(docID)) { delCount++; } @@ -429,6 +443,87 @@ return delCount; } + // NumericDocValues Updates + // If otherFieldUpdates != null, we need to merge the updates into them + private synchronized Map applyNumericDocValuesUpdates(Iterable updates, + ReadersAndLiveDocs rld, SegmentReader reader, Map otherFieldUpdates) throws IOException { + Fields fields = reader.fields(); + if (fields == null) { + // This reader has no postings + return Collections.emptyMap(); + } + + // TODO: we can process the updates per DV field, from last to first so that + // if multiple terms affect same document for the same field, we add an update + // only once (that of the last term). To do that, we can keep a bitset which + // marks which documents have already been updated. So e.g. if term T1 + // updates doc 7, and then we process term T2 and it updates doc 7 as well, + // we don't apply the update since we know T1 came last and therefore wins + // the update. + // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so + // that these documents aren't even returned. + + String currentField = null; + TermsEnum termsEnum = null; + DocsEnum docs = null; + final Map result = otherFieldUpdates == null ? new HashMap() : otherFieldUpdates; + //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader); + for (NumericUpdate update : updates) { + Term term = update.term; + int limit = update.docIDUpto; + + // TODO: we traverse the terms in update order (not term order) so that we + // apply the updates in the correct order, i.e. if two terms udpate the + // same document, the last one that came in wins, irrespective of the + // terms lexical order. + // we can apply the updates in terms order if we keep an updatesGen (and + // increment it with every update) and attach it to each NumericUpdate. Note + // that we cannot rely only on docIDUpto because an app may send two updates + // which will get same docIDUpto, yet will still need to respect the order + // those updates arrived. + + if (!term.field().equals(currentField)) { + // if we change the code to process updates in terms order, enable this assert +// assert currentField == null || currentField.compareTo(term.field()) < 0; + currentField = term.field(); + Terms terms = fields.terms(currentField); + if (terms != null) { + termsEnum = terms.iterator(termsEnum); + } else { + termsEnum = null; + continue; // no terms in that field + } + } + + if (termsEnum == null) { + continue; + } + // System.out.println(" term=" + term); + + if (termsEnum.seekExact(term.bytes())) { + // we don't need term frequencies for this + DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, DocsEnum.FLAG_NONE); + + //System.out.println("BDS: got docsEnum=" + docsEnum); + + NumericFieldUpdates fieldUpdates = result.get(update.field); + if (fieldUpdates == null) { + fieldUpdates = new NumericFieldUpdates.PackedNumericFieldUpdates(reader.maxDoc()); + result.put(update.field, fieldUpdates); + } + int doc; + while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID); + if (doc >= limit) { + break; // no more docs that can be updated for this term + } + fieldUpdates.add(doc, update.value); + } + } + } + return result; + } + public static class QueryAndLimit { public final Query query; public final int limit; Index: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -540,11 +540,6 @@ msg(infoStream, " diagnostics = " + diagnostics); } - Map atts = info.info.attributes(); - if (atts != null && !atts.isEmpty()) { - msg(infoStream, " attributes = " + atts); - } - if (!info.hasDeletions()) { msg(infoStream, " no deletions"); segInfoStat.hasDeletions = false; Index: lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java (working copy) @@ -30,20 +30,27 @@ class CoalescedDeletes { final Map queries = new HashMap(); final List> iterables = new ArrayList>(); - + final List numericDVUpdates = new ArrayList(); + @Override public String toString() { // note: we could add/collect more debugging information - return "CoalescedDeletes(termSets=" + iterables.size() + ",queries=" + queries.size() + ")"; + return "CoalescedDeletes(termSets=" + iterables.size() + ",queries=" + queries.size() + ",numericUpdates=" + numericDVUpdates.size() + ")"; } void update(FrozenBufferedDeletes in) { iterables.add(in.termsIterable()); - for(int queryIdx=0;queryIdx termsIterable() { Index: lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (working copy) @@ -85,7 +85,7 @@ // FreqProxTermsWriter does this with // FieldInfo.storePayload. FieldInfosWriter infosWriter = codec.fieldInfosFormat().getFieldInfosWriter(); - infosWriter.write(state.directory, state.segmentInfo.name, state.fieldInfos, IOContext.DEFAULT); + infosWriter.write(state.directory, state.segmentInfo.name, "", state.fieldInfos, IOContext.DEFAULT); } @Override Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -158,6 +158,13 @@ return applyAllDeletes( deleteQueue); } + synchronized boolean updateNumericDocValue(Term term, String field, Long value) throws IOException { + final DocumentsWriterDeleteQueue deleteQueue = this.deleteQueue; + deleteQueue.addNumericUpdate(new NumericUpdate(term, field, value)); + flushControl.doOnDelete(); + return applyAllDeletes(deleteQueue); + } + DocumentsWriterDeleteQueue currentDeleteSession() { return deleteQueue; } Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java (working copy) @@ -107,6 +107,11 @@ tryApplyGlobalSlice(); } + void addNumericUpdate(NumericUpdate update) { + add(new NumericUpdateNode(update)); + tryApplyGlobalSlice(); + } + /** * invariant for document update */ @@ -380,7 +385,23 @@ } } + private static final class NumericUpdateNode extends Node { + NumericUpdateNode(NumericUpdate update) { + super(update); + } + + @Override + void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + bufferedDeletes.addNumericUpdate(item, docIDUpto); + } + + @Override + public String toString() { + return "update=" + item; + } + } + private boolean forceApplyGlobalSlice() { globalBufferLock.lock(); final Node currentTail = tail; Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (working copy) @@ -17,9 +17,11 @@ * limitations under the License. */ +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; + import java.io.IOException; import java.text.NumberFormat; -import java.util.Collection; import java.util.HashSet; import java.util.Locale; import java.util.Set; @@ -41,9 +43,6 @@ import org.apache.lucene.util.MutableBits; import org.apache.lucene.util.RamUsageEstimator; -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; - class DocumentsWriterPerThread { /** @@ -174,8 +173,9 @@ final DocConsumer consumer; final Counter bytesUsed; - //Deletes for our still-in-RAM (to be flushed next) segment - final BufferedDeletes pendingDeletes; + SegmentWriteState flushState; + // Deletes for our still-in-RAM (to be flushed next) segment + final BufferedDeletes pendingDeletes; private final SegmentInfo segmentInfo; // Current segment we are working on boolean aborting = false; // True if an abort is pending boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting @@ -210,8 +210,7 @@ pendingDeletes.clear(); deleteSlice = deleteQueue.newSlice(); - segmentInfo = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segmentName, -1, - false, codec, null, null); + segmentInfo = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segmentName, -1, false, codec, null); assert numDocsInRAM == 0; if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue); @@ -467,7 +466,7 @@ pendingDeletes.terms.clear(); segmentInfo.setFiles(new HashSet(directory.getCreatedFiles())); - final SegmentInfoPerCommit segmentInfoPerCommit = new SegmentInfoPerCommit(segmentInfo, 0, -1L); + final SegmentInfoPerCommit segmentInfoPerCommit = new SegmentInfoPerCommit(segmentInfo, 0, -1L, -1L); if (infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : (flushState.segmentInfo.getDocCount() - flushState.delCountOnFlush)) + " deleted docs"); infoStream.message("DWPT", "new segment has " + @@ -481,7 +480,8 @@ } final BufferedDeletes segmentDeletes; - if (pendingDeletes.queries.isEmpty()) { + if (pendingDeletes.queries.isEmpty() && pendingDeletes.numericUpdates.isEmpty()) { + pendingDeletes.clear(); segmentDeletes = null; } else { segmentDeletes = pendingDeletes; Index: lucene/core/src/java/org/apache/lucene/index/FieldInfo.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FieldInfo.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/FieldInfo.java (working copy) @@ -47,6 +47,8 @@ private Map attributes; + private long dvGen = -1; // the DocValues generation of this field + /** * Controls how much information is stored in the postings lists. * @lucene.experimental @@ -79,7 +81,7 @@ * Character offsets are encoded alongside the positions. */ DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, - }; + } /** * DocValues types. @@ -110,7 +112,7 @@ * ordinal and by-value. Values must be <= 32766 bytes. */ SORTED_SET - }; + } /** * Sole Constructor. @@ -117,8 +119,9 @@ * * @lucene.experimental */ - public FieldInfo(String name, boolean indexed, int number, boolean storeTermVector, - boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normsType, Map attributes) { + public FieldInfo(String name, boolean indexed, int number, boolean storeTermVector, boolean omitNorms, + boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normsType, + Map attributes) { this.name = name; this.indexed = indexed; this.number = number; @@ -223,7 +226,20 @@ return docValueType; } + /** Sets the docValues generation of this field. */ + public void setDocValuesGen(long dvGen) { + this.dvGen = dvGen; + } + /** + * Returns the docValues generation of this field, or -1 if no docValues + * updates exist for it. + */ + public long getDocValuesGen() { + return dvGen; + } + + /** * Returns {@link DocValuesType} of the norm. this may be null if the field has no norms. */ public DocValuesType getNormType() { Index: lucene/core/src/java/org/apache/lucene/index/FieldInfos.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FieldInfos.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/FieldInfos.java (working copy) @@ -223,6 +223,20 @@ (dvType == null || docValuesType.get(name) == null || dvType == docValuesType.get(name)); } + /** + * Returns true if the {@code fieldName} exists in the map and is of the + * same {@code dvType}. + */ + synchronized boolean contains(String fieldName, DocValuesType dvType) { + // used by IndexWriter.updateNumericDocValue + if (!nameToNumber.containsKey(fieldName)) { + return false; + } else { + // only return true if the field has the same dvType as the requested one + return dvType == docValuesType.get(fieldName); + } + } + synchronized void clear() { numberToName.clear(); nameToNumber.clear(); Index: lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (working copy) @@ -17,32 +17,39 @@ * limitations under the License. */ +import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; +import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit; import org.apache.lucene.search.Query; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit; -/** Holds buffered deletes by term or query, once pushed. - * Pushed deletes are write-once, so we shift to more - * memory efficient data structure to hold them. We don't - * hold docIDs because these are applied on flush. */ +/** + * Holds buffered deletes and updates by term or query, once pushed. Pushed + * deletes/updates are write-once, so we shift to more memory efficient data + * structure to hold them. We don't hold docIDs because these are applied on + * flush. + */ +class FrozenBufferedDeletes { // TODO (DVU_RENAME) FrozenBufferedUpdates? -class FrozenBufferedDeletes { - /* Query we often undercount (say 24 bytes), plus int. */ final static int BYTES_PER_DEL_QUERY = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + 24; - + // Terms, in sorted order: final PrefixCodedTerms terms; int termCount; // just for debugging - // Parallel array of deleted query, and the docIDUpto for - // each + // Parallel array of deleted query, and the docIDUpto for each final Query[] queries; final int[] queryLimits; + + // numeric DV update term and their updates + final NumericUpdate[] updates; + final int bytesUsed; final int numTermDeletes; private long gen = -1; // assigned by BufferedDeletesStream once pushed @@ -72,7 +79,21 @@ upto++; } - bytesUsed = (int) terms.getSizeInBytes() + queries.length * BYTES_PER_DEL_QUERY; + // TODO if a Term affects multiple fields, we could keep the updates key'd by Term + // so that it maps to all fields it affects, sorted by their docUpto, and traverse + // that Term only once, applying the update to all fields that still need to be + // updated. + List allUpdates = new ArrayList(); + int numericUpdatesSize = 0; + for (LinkedHashMap fieldUpdates : deletes.numericUpdates.values()) { + for (NumericUpdate update : fieldUpdates.values()) { + allUpdates.add(update); + numericUpdatesSize += update.sizeInBytes(); + } + } + updates = allUpdates.toArray(new NumericUpdate[allUpdates.size()]); + + bytesUsed = (int) terms.getSizeInBytes() + queries.length * BYTES_PER_DEL_QUERY + numericUpdatesSize + updates.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF; numTermDeletes = deletes.numTermDeletes.get(); } @@ -140,6 +161,6 @@ } boolean any() { - return termCount > 0 || queries.length > 0; + return termCount > 0 || queries.length > 0 || updates.length > 0; } } Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -30,6 +30,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Map.Entry; import java.util.Queue; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -38,10 +39,12 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoFormat; +import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfos.FieldNumbers; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.MergePolicy.MergeTrigger; import org.apache.lucene.index.MergeState.CheckAbort; +import org.apache.lucene.index.NumericFieldUpdates.UpdatesIterator; import org.apache.lucene.search.Query; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.CompoundFileDirectory; @@ -434,6 +437,7 @@ final ReadersAndLiveDocs rld = readerMap.get(info); if (rld != null) { assert info == rld.info; +// System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.drop: " + info); readerMap.remove(info); rld.dropReaders(); } @@ -464,6 +468,7 @@ if (!poolReaders && rld.refCount() == 1) { // This is the last ref to this RLD, and we're not // pooling, so remove it: +// System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.release: " + rld.info); if (rld.writeLiveDocs(directory)) { // Make sure we only write del docs for a live segment: assert assertInfoLive == false || infoIsLive(rld.info); @@ -478,6 +483,7 @@ } //System.out.println("IW: done writeLiveDocs for info=" + rld.info); +// System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.release: drop readers " + rld.info); rld.dropReaders(); readerMap.remove(rld.info); } @@ -493,7 +499,7 @@ try { if (doSave && rld.writeLiveDocs(directory)) { - // Make sure we only write del docs for a live segment: + // Make sure we only write del docs and field updates for a live segment: assert infoIsLive(rld.info); // Must checkpoint because we just // created new _X_N.del and field updates files; @@ -799,27 +805,6 @@ } } - private FieldInfos getFieldInfos(SegmentInfo info) throws IOException { - Directory cfsDir = null; - try { - if (info.getUseCompoundFile()) { - cfsDir = new CompoundFileDirectory(info.dir, - IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), - IOContext.READONCE, - false); - } else { - cfsDir = info.dir; - } - return info.getCodec().fieldInfosFormat().getFieldInfosReader().read(cfsDir, - info.name, - IOContext.READONCE); - } finally { - if (info.getUseCompoundFile() && cfsDir != null) { - cfsDir.close(); - } - } - } - /** * Loads or returns the already loaded the global field number map for this {@link SegmentInfos}. * If this {@link SegmentInfos} has no global field number map the returned instance is empty @@ -828,7 +813,7 @@ final FieldNumbers map = new FieldNumbers(); for(SegmentInfoPerCommit info : segmentInfos) { - for(FieldInfo fi : getFieldInfos(info.info)) { + for(FieldInfo fi : SegmentReader.readFieldInfos(info)) { map.addOrGet(fi.name, fi.number, fi.getDocValuesType()); } } @@ -1545,6 +1530,42 @@ } } + /** + * Updates a document's NumericDocValue for field to the given + * value. This method can be used to 'unset' a document's value + * by passing {@code null} as the new value. Also, you can only update fields + * that already exist in the index, not add new fields through this method. + * + *

+ * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

+ * + * @param term + * the term to identify the document(s) to be updated + * @param field + * field name of the NumericDocValues field + * @param value + * new value for the field + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public void updateNumericDocValue(Term term, String field, Long value) throws IOException { + ensureOpen(); + if (!globalFieldNumberMap.contains(field, DocValuesType.NUMERIC)) { + throw new IllegalArgumentException("can only update existing numeric-docvalues fields!"); + } + try { + if (docWriter.updateNumericDocValue(term, field, value)) { + processEvents(true, false); + } + } catch (OutOfMemoryError oom) { + handleOOM(oom, "updateNumericDocValue"); + } + } + // for test purpose final synchronized int getSegmentCount(){ return segmentInfos.size(); @@ -1928,7 +1949,6 @@ merge.maxNumSegments = maxNumSegments; } } - } else { spec = mergePolicy.findMerges(trigger, segmentInfos); } @@ -2405,7 +2425,7 @@ IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.sizeInBytes(), true, -1)); - for(FieldInfo fi : getFieldInfos(info.info)) { + for(FieldInfo fi : SegmentReader.readFieldInfos(info)) { globalFieldNumberMap.addOrGet(fi.name, fi.number, fi.getDocValuesType()); } infos.add(copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context, copiedFiles)); @@ -2516,7 +2536,7 @@ TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1, - false, codec, null, null); + false, codec, null); SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, config.getTermIndexInterval(), MergeState.CheckAbort.NONE, globalFieldNumberMap, context); @@ -2538,7 +2558,7 @@ } } - SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, 0, -1L); + SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, 0, -1L, -1L); info.setFiles(new HashSet(trackingDir.getCreatedFiles())); trackingDir.getCreatedFiles().clear(); @@ -2624,7 +2644,7 @@ // note: we don't really need this fis (its copied), but we load it up // so we don't pass a null value to the si writer - FieldInfos fis = getFieldInfos(info.info); + FieldInfos fis = SegmentReader.readFieldInfos(info); Set docStoreFiles3xOnly = Lucene3xCodec.getDocStoreFiles(info.info); @@ -2646,9 +2666,9 @@ //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion()); // Same SI as before but we change directory, name and docStoreSegment: SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(), - info.info.getUseCompoundFile(), - info.info.getCodec(), info.info.getDiagnostics(), attributes); - SegmentInfoPerCommit newInfoPerCommit = new SegmentInfoPerCommit(newInfo, info.getDelCount(), info.getDelGen()); + info.info.getUseCompoundFile(), info.info.getCodec(), + info.info.getDiagnostics(), attributes); + SegmentInfoPerCommit newInfoPerCommit = new SegmentInfoPerCommit(newInfo, info.getDelCount(), info.getDelGen(), info.getFieldInfosGen()); Set segFiles = new HashSet(); @@ -3138,15 +3158,35 @@ } } - /** Carefully merges deletes for the segments we just - * merged. This is tricky because, although merging will - * clear all deletes (compacts the documents), new - * deletes may have been flushed to the segments since - * the merge was started. This method "carries over" - * such new deletes onto the newly merged segment, and - * saves the resulting deletes file (incrementing the - * delete generation for merge.info). If no deletes were - * flushed, no new deletes file is saved. */ + private MergePolicy.DocMap getDocMap(MergePolicy.OneMerge merge, MergeState mergeState) { + MergePolicy.DocMap docMap = merge.getDocMap(mergeState); + assert docMap.isConsistent(merge.info.info.getDocCount()); + return docMap; + } + + private void skipDeletedDoc(UpdatesIterator[] updatesIters, int deletedDoc) { + for (UpdatesIterator iter : updatesIters) { + if (iter.doc() == deletedDoc) { + iter.nextDoc(); + } + // when entering the method, all iterators must already be beyond the + // deleted document, or right on it, in which case we advance them above + // and they must be beyond it now. + assert iter.doc() > deletedDoc : "updateDoc=" + iter.doc() + " deletedDoc=" + deletedDoc; + } + } + + /** + * Carefully merges deletes and updates for the segments we just merged. This + * is tricky because, although merging will clear all deletes (compacts the + * documents) and compact all the updates, new deletes and updates may have + * been flushed to the segments since the merge was started. This method + * "carries over" such new deletes and updates onto the newly merged segment, + * and saves the resulting deletes and updates files (incrementing the delete + * and DV generations for merge.info). If no deletes were flushed, no new + * deletes file is saved. + */ + // TODO (DVU_RENAME) to commitMergedDeletesAndUpdates synchronized private ReadersAndLiveDocs commitMergedDeletes(MergePolicy.OneMerge merge, MergeState mergeState) throws IOException { assert testPoint("startCommitMergeDeletes"); @@ -3163,19 +3203,38 @@ long minGen = Long.MAX_VALUE; // Lazy init (only when we find a delete to carry over): - ReadersAndLiveDocs mergedDeletes = null; + ReadersAndLiveDocs mergedDeletes = null; // TODO (DVU_RENAME) to mergedDeletesAndUpdates + boolean initWritableLiveDocs = false; MergePolicy.DocMap docMap = null; - - for(int i=0; i < sourceSegments.size(); i++) { + final Map mergedFieldUpdates = new HashMap(); + + for (int i = 0; i < sourceSegments.size(); i++) { SegmentInfoPerCommit info = sourceSegments.get(i); minGen = Math.min(info.getBufferedDeletesGen(), minGen); final int docCount = info.info.getDocCount(); final Bits prevLiveDocs = merge.readers.get(i).getLiveDocs(); - final Bits currentLiveDocs; final ReadersAndLiveDocs rld = readerPool.get(info, false); // We hold a ref so it should still be in the pool: assert rld != null: "seg=" + info.info.name; - currentLiveDocs = rld.getLiveDocs(); + final Bits currentLiveDocs = rld.getLiveDocs(); + final Map mergingFieldUpdates = rld.getMergingFieldUpdates(); + final String[] mergingFields; + final UpdatesIterator[] updatesIters; + if (mergingFieldUpdates.isEmpty()) { + mergingFields = null; + updatesIters = null; + } else { + mergingFields = new String[mergingFieldUpdates.size()]; + updatesIters = new UpdatesIterator[mergingFieldUpdates.size()]; + int idx = 0; + for (Entry e : mergingFieldUpdates.entrySet()) { + mergingFields[idx] = e.getKey(); + updatesIters[idx] = e.getValue().getUpdates(); + updatesIters[idx].nextDoc(); // advance to first update doc + ++idx; + } + } +// System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: info=" + info + ", mergingUpdates=" + mergingUpdates); if (prevLiveDocs != null) { @@ -3198,11 +3257,10 @@ // If so, we must carefully merge the liveDocs one // doc at a time: if (currentLiveDocs != prevLiveDocs) { - // This means this segment received new deletes // since we started the merge, so we // must merge them: - for(int j=0;j j : "updateDoc=" + updatesIter.doc() + " curDoc=" + j; + } + } } docUpto++; } } + } else if (mergingFields != null) { + // need to check each non-deleted document if it has any updates + for (int j = 0; j < docCount; j++) { + if (prevLiveDocs.get(j)) { + // document isn't deleted, check if any of the fields have an update to it + int newDoc = -1; + for (int idx = 0; idx < mergingFields.length; idx++) { + UpdatesIterator updatesIter = updatesIters[idx]; + if (updatesIter.doc() == j) { // document has an update + if (mergedDeletes == null) { + mergedDeletes = readerPool.get(merge.info, true); + docMap = getDocMap(merge, mergeState); + } + if (newDoc == -1) { // map once per all field updates, but only if there are any updates + newDoc = docMap.map(docUpto); + } + String field = mergingFields[idx]; + NumericFieldUpdates fieldUpdates = mergedFieldUpdates.get(field); + if (fieldUpdates == null) { + // an approximantion of maxDoc, used to compute best bitsPerValue + fieldUpdates = new NumericFieldUpdates.PackedNumericFieldUpdates(mergeState.segmentInfo.getDocCount()); + mergedFieldUpdates.put(field, fieldUpdates); + } + fieldUpdates.add(newDoc, updatesIter.value() == null ? NumericUpdate.MISSING : updatesIter.value()); + updatesIter.nextDoc(); // advance to next document + } else { + assert updatesIter.doc() > j : "updateDoc=" + updatesIter.doc() + " curDoc=" + j; + } + } + // advance docUpto for every non-deleted document + docUpto++; + } else { + // advance all iters beyond the deleted document + skipDeletedDoc(updatesIters, j); + } + } } else { docUpto += info.info.getDocCount() - info.getDelCount() - rld.getPendingDeleteCount(); } @@ -3225,20 +3351,82 @@ assert currentLiveDocs.length() == docCount; // This segment had no deletes before but now it // does: - for(int j=0; j j : "field=" + mergingFields[idx] + " updateDoc=" + updatesIter.doc() + " curDoc=" + j; + } + } } docUpto++; } + } else if (mergingFields != null) { + // no deletions before or after, but there were updates + for (int j = 0; j < docCount; j++) { + int newDoc = -1; + for (int idx = 0; idx < mergingFields.length; idx++) { + UpdatesIterator updatesIter = updatesIters[idx]; + if (updatesIter.doc() == j) { // document has an update + if (mergedDeletes == null) { + mergedDeletes = readerPool.get(merge.info, true); + docMap = getDocMap(merge, mergeState); + } + if (newDoc == -1) { // map once per all field updates, but only if there are any updates + newDoc = docMap.map(docUpto); + } + String field = mergingFields[idx]; + NumericFieldUpdates fieldUpdates = mergedFieldUpdates.get(field); + if (fieldUpdates == null) { + // an approximantion of maxDoc, used to compute best bitsPerValue + fieldUpdates = new NumericFieldUpdates.PackedNumericFieldUpdates(mergeState.segmentInfo.getDocCount()); + mergedFieldUpdates.put(field, fieldUpdates); + } + fieldUpdates.add(newDoc, updatesIter.value() == null ? NumericUpdate.MISSING : updatesIter.value()); + updatesIter.nextDoc(); // advance to next document + } else { + assert updatesIter.doc() > j : "updateDoc=" + updatesIter.doc() + " curDoc=" + j; + } + } + // advance docUpto for every non-deleted document + docUpto++; + } } else { - // No deletes before or after + // No deletes or updates before or after docUpto += info.info.getDocCount(); } } @@ -3245,11 +3433,36 @@ assert docUpto == merge.info.info.getDocCount(); + if (!mergedFieldUpdates.isEmpty()) { +// System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMergedDeletes: mergedDeletes.info=" + mergedDeletes.info + ", mergedFieldUpdates=" + mergedFieldUpdates); + boolean success = false; + try { + // if any error occurs while writing the field updates we should release + // the info, otherwise it stays in the pool but is considered not "live" + // which later causes false exceptions in pool.dropAll(). + // NOTE: currently this is the only place which throws a true + // IOException. If this ever changes, we need to extend that try/finally + // block to the rest of the method too. + mergedDeletes.writeFieldUpdates(directory, mergedFieldUpdates); + success = true; + } finally { + if (!success) { + mergedDeletes.dropChanges(); + readerPool.drop(merge.info); + } + } + } + if (infoStream.isEnabled("IW")) { if (mergedDeletes == null) { - infoStream.message("IW", "no new deletes since merge started"); + infoStream.message("IW", "no new deletes or field updates since merge started"); } else { - infoStream.message("IW", mergedDeletes.getPendingDeleteCount() + " new deletes since merge started"); + String msg = mergedDeletes.getPendingDeleteCount() + " new deletes"; + if (!mergedFieldUpdates.isEmpty()) { + msg += " and " + mergedFieldUpdates.size() + " new field updates"; + } + msg += " since merge started"; + infoStream.message("IW", msg); } } @@ -3286,10 +3499,9 @@ return false; } - final ReadersAndLiveDocs mergedDeletes = merge.info.info.getDocCount() == 0 ? null : commitMergedDeletes(merge, mergeState); + final ReadersAndLiveDocs mergedDeletes = merge.info.info.getDocCount() == 0 ? null : commitMergedDeletes(merge, mergeState); +// System.out.println("[" + Thread.currentThread().getName() + "] IW.commitMerge: mergedDeletes=" + mergedDeletes); - assert mergedDeletes == null || mergedDeletes.getPendingDeleteCount() != 0; - // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound @@ -3613,7 +3825,7 @@ // Lock order: IW -> BD final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments); - + if (result.anyDeletes) { checkpoint(); } @@ -3637,13 +3849,15 @@ // ConcurrentMergePolicy we keep deterministic segment // names. final String mergeSegmentName = newSegmentName(); - SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, -1, false, codec, null, null); + SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, -1, false, codec, null); Map details = new HashMap(); details.put("mergeMaxNumSegments", "" + merge.maxNumSegments); details.put("mergeFactor", Integer.toString(merge.segments.size())); setDiagnostics(si, SOURCE_MERGE, details); - merge.setInfo(new SegmentInfoPerCommit(si, 0, -1L)); + merge.setInfo(new SegmentInfoPerCommit(si, 0, -1L, -1L)); +// System.out.println("[" + Thread.currentThread().getName() + "] IW._mergeInit: " + segString(merge.segments) + " into " + si); + // Lock order: IW -> BD bufferedDeletesStream.prune(segmentInfos); @@ -3684,7 +3898,7 @@ // exception inside mergeInit if (merge.registerDone) { final List sourceSegments = merge.segments; - for(SegmentInfoPerCommit info : sourceSegments) { + for (SegmentInfoPerCommit info : sourceSegments) { mergingSegments.remove(info); } merge.registerDone = false; @@ -3708,6 +3922,8 @@ assert rld != null; if (drop) { rld.dropChanges(); + } else { + rld.dropMergingUpdates(); } rld.release(sr); readerPool.release(rld); @@ -3763,20 +3979,20 @@ // Hold onto the "live" reader; we will use this to // commit merged deletes final ReadersAndLiveDocs rld = readerPool.get(info, true); - SegmentReader reader = rld.getMergeReader(context); - assert reader != null; - // Carefully pull the most recent live docs: + // Carefully pull the most recent live docs and reader + SegmentReader reader; final Bits liveDocs; final int delCount; - synchronized(this) { - // Must sync to ensure BufferedDeletesStream - // cannot change liveDocs/pendingDeleteCount while - // we pull a copy: + synchronized (this) { + // Must sync to ensure BufferedDeletesStream cannot change liveDocs, + // pendingDeleteCount and field updates while we pull a copy: + reader = rld.getReaderForMerge(context); liveDocs = rld.getReadOnlyLiveDocs(); delCount = rld.getPendingDeleteCount() + info.getDelCount(); + assert reader != null; assert rld.verifyDocCounts(); if (infoStream.isEnabled("IW")) { @@ -3798,7 +4014,7 @@ // fix the reader's live docs and del count assert delCount > reader.numDeletedDocs(); // beware of zombies - SegmentReader newReader = new SegmentReader(info, reader.core, liveDocs, info.info.getDocCount() - delCount); + SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.info.getDocCount() - delCount); boolean released = false; try { rld.release(reader); @@ -3817,6 +4033,8 @@ segUpto++; } +// System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders()); + // we pass merge.getMergeReaders() instead of merge.readers to allow the // OneMerge to return a view over the actual segments to merge final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(), Index: lucene/core/src/java/org/apache/lucene/index/MergeState.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/MergeState.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/MergeState.java (working copy) @@ -105,11 +105,11 @@ } - private static class NoDelDocMap extends DocMap { + private static final class NoDelDocMap extends DocMap { private final int maxDoc; - private NoDelDocMap(int maxDoc) { + NoDelDocMap(int maxDoc) { this.maxDoc = maxDoc; } Index: lucene/core/src/java/org/apache/lucene/index/NumericFieldUpdates.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/NumericFieldUpdates.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/NumericFieldUpdates.java (working copy) @@ -0,0 +1,259 @@ +package org.apache.lucene.index; + +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.InPlaceMergeSorter; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.PagedGrowableWriter; +import org.apache.lucene.util.packed.PagedMutable; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Holds numeric values updates of documents, of a single + * {@link NumericDocValuesField}. + * + * @lucene.experimental + */ +interface NumericFieldUpdates { + + /** + * An iterator over documents and their updated values. Only documents with + * updates are returned by this iterator, and the documents are returned in + * increasing order. + */ + static abstract class UpdatesIterator { + + /** + * Returns the next document which has an update, or + * {@link DocIdSetIterator#NO_MORE_DOCS} if there are no more documents to + * return. + */ + abstract int nextDoc(); + + /** Returns the current document this iterator is on. */ + abstract int doc(); + + /** + * Returns the value of the document returned from {@link #nextDoc()}. A + * {@code null} value means that it was unset for this document. + */ + abstract Long value(); + + /** + * Reset the iterator's state. Should be called before {@link #nextDoc()} + * and {@link #value()}. + */ + abstract void reset(); + + } + + /** + * A {@link NumericFieldUpdates} which holds the updated documents and values + * in packed structures. Only supports up to 2B entries (docs and values) + * since we need to sort the docs/values and the Sorter interfaces currently + * only take integer indexes. + */ + static final class PackedNumericFieldUpdates implements NumericFieldUpdates { + + private FixedBitSet docsWithField; + private PagedMutable docs; + private PagedGrowableWriter values; + private int size; + + public PackedNumericFieldUpdates(int maxDoc) { + docsWithField = new FixedBitSet(64); + docs = new PagedMutable(1, 1024, PackedInts.bitsRequired(maxDoc - 1), PackedInts.COMPACT); + values = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST); + size = 0; + } + + @Override + public void add(int doc, Long value) { + assert value != null; + // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation + if (size == Integer.MAX_VALUE) { + throw new IllegalStateException("cannot support more than Integer.MAX_VALUE doc/value entries"); + } + + // grow the structures to have room for more elements + if (docs.size() == size) { + docs = docs.grow(size + 1); + values = values.grow(size + 1); + int numWords = (int) (docs.size() >> 6); + if (docsWithField.getBits().length <= numWords) { + numWords = ArrayUtil.oversize(numWords + 1, RamUsageEstimator.NUM_BYTES_LONG); + docsWithField = new FixedBitSet(docsWithField, numWords << 6); + } + } + + if (value != NumericUpdate.MISSING) { + // only mark the document as having a value in that field if the value wasn't set to null (MISSING) + docsWithField.set(size); + } + + docs.set(size, doc); + values.set(size, value.longValue()); + ++size; + } + + @Override + public UpdatesIterator getUpdates() { + final PagedMutable docs = this.docs; + final PagedGrowableWriter values = this.values; + final FixedBitSet docsWithField = this.docsWithField; + new InPlaceMergeSorter() { + @Override + protected void swap(int i, int j) { + long tmpDoc = docs.get(j); + docs.set(j, docs.get(i)); + docs.set(i, tmpDoc); + + long tmpVal = values.get(j); + values.set(j, values.get(i)); + values.set(i, tmpVal); + + boolean tmpBool = docsWithField.get(j); + if (docsWithField.get(i)) { + docsWithField.set(j); + } else { + docsWithField.clear(j); + } + if (tmpBool) { + docsWithField.set(i); + } else { + docsWithField.clear(i); + } + } + + @Override + protected int compare(int i, int j) { + int x = (int) docs.get(i); + int y = (int) docs.get(j); + return (x < y) ? -1 : ((x == y) ? 0 : 1); + } + }.sort(0, size); + + final int size = this.size; + return new UpdatesIterator() { + private long idx = 0; // long so we don't overflow if size == Integer.MAX_VALUE + private int doc = -1; + private Long value = null; + + @Override + Long value() { + return value; + } + + @Override + int nextDoc() { + if (idx >= size) { + value = null; + return doc = DocIdSetIterator.NO_MORE_DOCS; + } + doc = (int) docs.get(idx); + ++idx; + while (idx < size && docs.get(idx) == doc) { + ++idx; + } + if (!docsWithField.get((int) (idx - 1))) { + value = null; + } else { + // idx points to the "next" element + value = Long.valueOf(values.get(idx - 1)); + } + return doc; + } + + @Override + int doc() { + return doc; + } + + @Override + void reset() { + doc = -1; + value = null; + idx = 0; + } + }; + } + + @Override + public void merge(NumericFieldUpdates other) { + if (other instanceof PackedNumericFieldUpdates) { + PackedNumericFieldUpdates packedOther = (PackedNumericFieldUpdates) other; + if (size + packedOther.size > Integer.MAX_VALUE) { + throw new IllegalStateException( + "cannot support more than Integer.MAX_VALUE doc/value entries; size=" + + size + " other.size=" + packedOther.size); + } + docs = docs.grow(size + packedOther.size); + values = values.grow(size + packedOther.size); + int numWords = (int) (docs.size() >> 6); + if (docsWithField.getBits().length <= numWords) { + numWords = ArrayUtil.oversize(numWords + 1, RamUsageEstimator.NUM_BYTES_LONG); + docsWithField = new FixedBitSet(docsWithField, numWords << 6); + } + for (int i = 0; i < packedOther.size; i++) { + int doc = (int) packedOther.docs.get(i); + if (packedOther.docsWithField.get(i)) { + docsWithField.set(size); + } + docs.set(size, doc); + values.set(size, packedOther.values.get(i)); + ++size; + } + } else { + UpdatesIterator iter = other.getUpdates(); + int doc; + while ((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + Long value = iter.value(); + if (value == null) { + value = NumericUpdate.MISSING; + } + add(doc, value); + } + } + } + + } + + /** + * Add an update to a document. For unsetting a value you should pass + * {@link NumericUpdate#MISSING} instead of {@code null}. + */ + public void add(int doc, Long value); + + /** + * Returns an {@link UpdatesIterator} over the updated documents and their + * values. + */ + public UpdatesIterator getUpdates(); + + /** + * Merge with another {@link NumericFieldUpdates}. This is called for a + * segment which received updates while it was being merged. The given updates + * should override whatever numeric updates are in that instance. + */ + public void merge(NumericFieldUpdates other); + +} Property changes on: lucene/core/src/java/org/apache/lucene/index/NumericFieldUpdates.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/index/NumericUpdate.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/NumericUpdate.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/NumericUpdate.java (working copy) @@ -0,0 +1,68 @@ +package org.apache.lucene.index; + +import static org.apache.lucene.util.RamUsageEstimator.*; + +import org.apache.lucene.document.NumericDocValuesField; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** An in-place update to a numeric docvalues field */ +final class NumericUpdate { + + /* Rough logic: OBJ_HEADER + 3*PTR + INT + * Term: OBJ_HEADER + 2*PTR + * Term.field: 2*OBJ_HEADER + 4*INT + PTR + string.length*CHAR + * Term.bytes: 2*OBJ_HEADER + 2*INT + PTR + bytes.length + * String: 2*OBJ_HEADER + 4*INT + PTR + string.length*CHAR + * Long: OBJ_HEADER + LONG + */ + private static final int RAW_SIZE_IN_BYTES = 9*NUM_BYTES_OBJECT_HEADER + 8*NUM_BYTES_OBJECT_REF + 8*NUM_BYTES_INT + NUM_BYTES_LONG; + + static final Long MISSING = new Long(0); + + Term term; + String field; + Long value; + int docIDUpto = -1; // unassigned until applied, and confusing that it's here, when it's just used in BufferedDeletes... + + /** + * Constructor. + * + * @param term the {@link Term} which determines the documents that will be updated + * @param field the {@link NumericDocValuesField} to update + * @param value the updated value + */ + NumericUpdate(Term term, String field, Long value) { + this.term = term; + this.field = field; + this.value = value == null ? MISSING : value; + } + + int sizeInBytes() { + int sizeInBytes = RAW_SIZE_IN_BYTES; + sizeInBytes += term.field.length() * NUM_BYTES_CHAR; + sizeInBytes += term.bytes.bytes.length; + sizeInBytes += field.length() * NUM_BYTES_CHAR; + return sizeInBytes; + } + + @Override + public String toString() { + return "term=" + term + ",field=" + field + ",value=" + value; + } +} Property changes on: lucene/core/src/java/org/apache/lucene/index/NumericUpdate.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (working copy) @@ -18,19 +18,31 @@ */ import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.NoSuchElementException; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.NumericFieldUpdates.UpdatesIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.TrackingDirectoryWrapper; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.MutableBits; // Used by IndexWriter to hold open SegmentReaders (for -// searching or merging), plus pending deletes, +// searching or merging), plus pending deletes and updates, // for a given segment -class ReadersAndLiveDocs { +class ReadersAndLiveDocs { // TODO (DVU_RENAME) to ReaderAndUpdates // Not final because we replace (clone) when we need to // change it and it's been shared: public final SegmentInfoPerCommit info; @@ -53,8 +65,8 @@ // Set once (null, and then maybe set, and never set again): private SegmentReader mergeReader; - // Holds the current shared (readable and writable - // liveDocs). This is null when there are no deleted + // Holds the current shared (readable and writable) + // liveDocs. This is null when there are no deleted // docs, and it's copy-on-write (cloned whenever we need // to change it but it's been shared to an external NRT // reader). @@ -66,12 +78,22 @@ // True if the current liveDocs is referenced by an // external NRT reader: - private boolean shared; + private boolean liveDocsShared; + // Indicates whether this segment is currently being merged. While a segment + // is merging, all field updates are also registered in the + // mergingNumericUpdates map. Also, calls to writeFieldUpdates merge the + // updates with mergingNumericUpdates. + // That way, when the segment is done merging, IndexWriter can apply the + // updates on the merged segment too. + private boolean isMerging = false; + + private final Map mergingNumericUpdates = new HashMap(); + public ReadersAndLiveDocs(IndexWriter writer, SegmentInfoPerCommit info) { this.info = info; this.writer = writer; - shared = true; + liveDocsShared = true; } public void incRef() { @@ -93,7 +115,7 @@ public synchronized int getPendingDeleteCount() { return pendingDeleteCount; } - + // Call only from assert! public synchronized boolean verifyDocCounts() { int count; @@ -112,10 +134,8 @@ return true; } - // Get reader for searching/deleting - public synchronized SegmentReader getReader(IOContext context) throws IOException { - //System.out.println(" livedocs=" + rld.liveDocs); - + /** Returns a {@link SegmentReader}. */ + public SegmentReader getReader(IOContext context) throws IOException { if (reader == null) { // We steal returned ref: reader = new SegmentReader(info, writer.getConfig().getReaderTermsIndexDivisor(), context); @@ -122,15 +142,13 @@ if (liveDocs == null) { liveDocs = reader.getLiveDocs(); } - //System.out.println("ADD seg=" + rld.info + " isMerge=" + isMerge + " " + readerMap.size() + " in pool"); - //System.out.println(Thread.currentThread().getName() + ": getReader seg=" + info.name); } - + // Ref for caller reader.incRef(); return reader; } - + // Get reader for merging (does not load the terms // index): public synchronized SegmentReader getMergeReader(IOContext context) throws IOException { @@ -171,7 +189,7 @@ assert liveDocs != null; assert Thread.holdsLock(writer); assert docID >= 0 && docID < liveDocs.length() : "out of bounds: docid=" + docID + " liveDocsLength=" + liveDocs.length() + " seg=" + info.info.name + " docCount=" + info.info.getDocCount(); - assert !shared; + assert !liveDocsShared; final boolean didDelete = liveDocs.get(docID); if (didDelete) { ((MutableBits) liveDocs).clear(docID); @@ -217,9 +235,9 @@ getReader(context).decRef(); assert reader != null; } - shared = true; + liveDocsShared = true; if (liveDocs != null) { - return new SegmentReader(reader.getSegmentInfo(), reader.core, liveDocs, info.info.getDocCount() - info.getDelCount() - pendingDeleteCount); + return new SegmentReader(reader.getSegmentInfo(), reader, liveDocs, info.info.getDocCount() - info.getDelCount() - pendingDeleteCount); } else { assert reader.getLiveDocs() == liveDocs; reader.incRef(); @@ -231,7 +249,7 @@ assert Thread.holdsLock(writer); assert info.info.getDocCount() > 0; //System.out.println("initWritableLivedocs seg=" + info + " liveDocs=" + liveDocs + " shared=" + shared); - if (shared) { + if (liveDocsShared) { // Copy on write: this means we've cloned a // SegmentReader sharing the current liveDocs // instance; must now make a private clone so we can @@ -243,9 +261,7 @@ } else { liveDocs = liveDocsFormat.newLiveDocs(liveDocs); } - shared = false; - } else { - assert liveDocs != null; + liveDocsShared = false; } } @@ -257,7 +273,7 @@ public synchronized Bits getReadOnlyLiveDocs() { //System.out.println("getROLiveDocs seg=" + info); assert Thread.holdsLock(writer); - shared = true; + liveDocsShared = true; //if (liveDocs != null) { //System.out.println(" liveCount=" + liveDocs.count()); //} @@ -273,61 +289,288 @@ // deletes onto the newly merged segment, so we can // discard them on the sub-readers: pendingDeleteCount = 0; + dropMergingUpdates(); } - // Commit live docs to the directory (writes new - // _X_N.del files); returns true if it wrote the file - // and false if there were no new deletes to write: + // Commit live docs (writes new _X_N.del files) and field updates (writes new + // _X_N updates files) to the directory; returns true if it wrote any file + // and false if there were no new deletes or updates to write: + // TODO (DVU_RENAME) to writeDeletesAndUpdates public synchronized boolean writeLiveDocs(Directory dir) throws IOException { - //System.out.println("rld.writeLiveDocs seg=" + info + " pendingDelCount=" + pendingDeleteCount); - if (pendingDeleteCount != 0) { - // We have new deletes - assert liveDocs.length() == info.info.getDocCount(); + assert Thread.holdsLock(writer); + //System.out.println("rld.writeLiveDocs seg=" + info + " pendingDelCount=" + pendingDeleteCount + " numericUpdates=" + numericUpdates); + if (pendingDeleteCount == 0) { + return false; + } + + // We have new deletes + assert liveDocs.length() == info.info.getDocCount(); + + // Do this so we can delete any created files on + // exception; this saves all codecs from having to do + // it: + TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); + + // We can write directly to the actual name (vs to a + // .tmp & renaming it) because the file is not live + // until segments file is written: + boolean success = false; + try { + Codec codec = info.info.getCodec(); + codec.liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, trackingDir, info, pendingDeleteCount, IOContext.DEFAULT); + success = true; + } finally { + if (!success) { + // Advance only the nextWriteDelGen so that a 2nd + // attempt to write will write to a new file + info.advanceNextWriteDelGen(); + + // Delete any partially created file(s): + for (String fileName : trackingDir.getCreatedFiles()) { + try { + dir.deleteFile(fileName); + } catch (Throwable t) { + // Ignore so we throw only the first exc + } + } + } + } + + // If we hit an exc in the line above (eg disk full) + // then info's delGen remains pointing to the previous + // (successfully written) del docs: + info.advanceDelGen(); + info.setDelCount(info.getDelCount() + pendingDeleteCount); + pendingDeleteCount = 0; + + return true; + } - // Do this so we can delete any created files on - // exception; this saves all codecs from having to do - // it: - TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); + // Writes field updates (new _X_N updates files) to the directory + public synchronized void writeFieldUpdates(Directory dir, Map numericFieldUpdates) throws IOException { + assert Thread.holdsLock(writer); + //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); + + assert numericFieldUpdates != null && !numericFieldUpdates.isEmpty(); + + // Do this so we can delete any created files on + // exception; this saves all codecs from having to do + // it: + TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); + + FieldInfos fieldInfos = null; + boolean success = false; + try { + final Codec codec = info.info.getCodec(); - // We can write directly to the actual name (vs to a - // .tmp & renaming it) because the file is not live - // until segments file is written: - boolean success = false; + // reader could be null e.g. for a just merged segment (from + // IndexWriter.commitMergedDeletes). + final SegmentReader reader = this.reader == null ? new SegmentReader(info, writer.getConfig().getReaderTermsIndexDivisor(), IOContext.READONCE) : this.reader; try { - info.info.getCodec().liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, trackingDir, info, pendingDeleteCount, IOContext.DEFAULT); - success = true; - } finally { - if (!success) { - // Advance only the nextWriteDelGen so that a 2nd - // attempt to write will write to a new file - info.advanceNextWriteDelGen(); - - // Delete any partially created file(s): - for(String fileName : trackingDir.getCreatedFiles()) { - try { - dir.deleteFile(fileName); - } catch (Throwable t) { - // Ignore so we throw only the first exc + // clone FieldInfos so that we can update their dvGen separately from + // the reader's infos and write them to a new fieldInfos_gen file + FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap); + // cannot use builder.add(reader.getFieldInfos()) because it does not + // clone FI.attributes as well FI.dvGen + for (FieldInfo fi : reader.getFieldInfos()) { + FieldInfo clone = builder.add(fi); + // copy the stuff FieldInfos.Builder doesn't copy + if (fi.attributes() != null) { + for (Entry e : fi.attributes().entrySet()) { + clone.putAttribute(e.getKey(), e.getValue()); } } + clone.setDocValuesGen(fi.getDocValuesGen()); } - } + // create new fields or update existing ones to have NumericDV type + for (String f : numericFieldUpdates.keySet()) { + builder.addOrUpdate(f, NumericDocValuesField.TYPE); + } + + fieldInfos = builder.finish(); + final long nextFieldInfosGen = info.getNextFieldInfosGen(); + final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX); + final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, writer.getConfig().getTermIndexInterval(), null, IOContext.DEFAULT, segmentSuffix); + final DocValuesFormat docValuesFormat = codec.docValuesFormat(); + final DocValuesConsumer fieldsConsumer = docValuesFormat.fieldsConsumer(state); + boolean fieldsConsumerSuccess = false; + try { +// System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: applying updates; seg=" + info + " updates=" + numericUpdates); + for (Entry e : numericFieldUpdates.entrySet()) { + final String field = e.getKey(); + final NumericFieldUpdates fieldUpdates = e.getValue(); + final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); + assert fieldInfo != null; - // If we hit an exc in the line above (eg disk full) - // then info's delGen remains pointing to the previous - // (successfully written) del docs: - info.advanceDelGen(); - info.setDelCount(info.getDelCount() + pendingDeleteCount); + fieldInfo.setDocValuesGen(nextFieldInfosGen); + // write the numeric updates to a new gen'd docvalues file + fieldsConsumer.addNumericField(fieldInfo, new Iterable() { + final NumericDocValues currentValues = reader.getNumericDocValues(field); + final Bits docsWithField = reader.getDocsWithField(field); + final int maxDoc = reader.maxDoc(); + final UpdatesIterator updatesIter = fieldUpdates.getUpdates(); + @Override + public Iterator iterator() { + updatesIter.reset(); + return new Iterator() { - pendingDeleteCount = 0; - return true; - } else { - return false; + int curDoc = -1; + int updateDoc = updatesIter.nextDoc(); + + @Override + public boolean hasNext() { + return curDoc < maxDoc - 1; + } + + @Override + public Number next() { + if (++curDoc >= maxDoc) { + throw new NoSuchElementException("no more documents to return values for"); + } + if (curDoc == updateDoc) { // this document has an updated value + Long value = updatesIter.value(); // either null (unset value) or updated value + updateDoc = updatesIter.nextDoc(); // prepare for next round + return value; + } else { + // no update for this document + assert curDoc < updateDoc; + if (currentValues != null && docsWithField.get(curDoc)) { + // only read the current value if the document had a value before + return currentValues.get(curDoc); + } else { + return null; + } + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException("this iterator does not support removing elements"); + } + }; + } + }); + } + + codec.fieldInfosFormat().getFieldInfosWriter().write(trackingDir, info.info.name, segmentSuffix, fieldInfos, IOContext.DEFAULT); + fieldsConsumerSuccess = true; + } finally { + if (fieldsConsumerSuccess) { + fieldsConsumer.close(); + } else { + IOUtils.closeWhileHandlingException(fieldsConsumer); + } + } + } finally { + if (reader != this.reader) { +// System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); + reader.close(); + } + } + + success = true; + } finally { + if (!success) { + // Advance only the nextWriteDocValuesGen so that a 2nd + // attempt to write will write to a new file + info.advanceNextWriteFieldInfosGen(); + + // Delete any partially created file(s): + for (String fileName : trackingDir.getCreatedFiles()) { + try { + dir.deleteFile(fileName); + } catch (Throwable t) { + // Ignore so we throw only the first exc + } + } + } } + + info.advanceFieldInfosGen(); + // copy all the updates to mergingUpdates, so they can later be applied to the merged segment + if (isMerging) { + for (Entry e : numericFieldUpdates.entrySet()) { + NumericFieldUpdates fieldUpdates = mergingNumericUpdates.get(e.getKey()); + if (fieldUpdates == null) { + mergingNumericUpdates.put(e.getKey(), e.getValue()); + } else { + fieldUpdates.merge(e.getValue()); + } + } + } + + // create a new map, keeping only the gens that are in use + Map> genUpdatesFiles = info.getUpdatesFiles(); + Map> newGenUpdatesFiles = new HashMap>(); + final long fieldInfosGen = info.getFieldInfosGen(); + for (FieldInfo fi : fieldInfos) { + long dvGen = fi.getDocValuesGen(); + if (dvGen != -1 && !newGenUpdatesFiles.containsKey(dvGen)) { + if (dvGen == fieldInfosGen) { + newGenUpdatesFiles.put(fieldInfosGen, trackingDir.getCreatedFiles()); + } else { + newGenUpdatesFiles.put(dvGen, genUpdatesFiles.get(dvGen)); + } + } + } + + info.setGenUpdatesFiles(newGenUpdatesFiles); + + // wrote new files, should checkpoint() + writer.checkpoint(); + + // if there is a reader open, reopen it to reflect the updates + if (reader != null) { + SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.info.getDocCount() - info.getDelCount() - pendingDeleteCount); + boolean reopened = false; + try { + reader.decRef(); + reader = newReader; + reopened = true; + } finally { + if (!reopened) { + newReader.decRef(); + } + } + } } + /** + * Returns a reader for merge. This method applies field updates if there are + * any and marks that this segment is currently merging. + */ + synchronized SegmentReader getReaderForMerge(IOContext context) throws IOException { + assert Thread.holdsLock(writer); + // must execute these two statements as atomic operation, otherwise we + // could lose updates if e.g. another thread calls writeFieldUpdates in + // between, or the updates are applied to the obtained reader, but then + // re-applied in IW.commitMergedDeletes (unnecessary work and potential + // bugs). + isMerging = true; + return getReader(context); + } + + /** + * Drops all merging updates. Called from IndexWriter after this segment + * finished merging (whether successfully or not). + */ + public synchronized void dropMergingUpdates() { + mergingNumericUpdates.clear(); + isMerging = false; + } + + /** Returns updates that came in while this segment was merging. */ + public synchronized Map getMergingFieldUpdates() { + return mergingNumericUpdates; + } + @Override public String toString() { - return "ReadersAndLiveDocs(seg=" + info + " pendingDeleteCount=" + pendingDeleteCount + " shared=" + shared + ")"; + StringBuilder sb = new StringBuilder(); + sb.append("ReadersAndLiveDocs(seg=").append(info); + sb.append(" pendingDeleteCount=").append(pendingDeleteCount); + sb.append(" liveDocsShared=").append(liveDocsShared); + return sb.toString(); } + } Index: lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (working copy) @@ -26,17 +26,15 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; -import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.SegmentReader.CoreClosedListener; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.IOUtils; @@ -44,18 +42,15 @@ * SegmentReader is cloned or reopened */ final class SegmentCoreReaders { - // Counts how many other reader share the core objects + // Counts how many other readers share the core objects // (freqStream, proxStream, tis, etc.) of this reader; // when coreRef drops to 0, these core objects may be // closed. A given instance of SegmentReader may be - // closed, even those it shares core objects with other + // closed, even though it shares core objects with other // SegmentReaders: private final AtomicInteger ref = new AtomicInteger(1); - final FieldInfos fieldInfos; - final FieldsProducer fields; - final DocValuesProducer dvProducer; final DocValuesProducer normsProducer; final int termsIndexDivisor; @@ -66,7 +61,7 @@ // TODO: make a single thread local w/ a // Thingy class holding fieldsReader, termVectorsReader, - // normsProducer, dvProducer + // normsProducer final CloseableThreadLocal fieldsReaderLocal = new CloseableThreadLocal() { @Override @@ -82,20 +77,6 @@ } }; - final CloseableThreadLocal> docValuesLocal = new CloseableThreadLocal>() { - @Override - protected Map initialValue() { - return new HashMap(); - } - }; - - final CloseableThreadLocal> docsWithFieldLocal = new CloseableThreadLocal>() { - @Override - protected Map initialValue() { - return new HashMap(); - } - }; - final CloseableThreadLocal> normsLocal = new CloseableThreadLocal>() { @Override protected Map initialValue() { @@ -124,8 +105,9 @@ cfsReader = null; cfsDir = dir; } - fieldInfos = codec.fieldInfosFormat().getFieldInfosReader().read(cfsDir, si.info.name, IOContext.READONCE); + final FieldInfos fieldInfos = owner.fieldInfos; + this.termsIndexDivisor = termsIndexDivisor; final PostingsFormat format = codec.postingsFormat(); final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, fieldInfos, context, termsIndexDivisor); @@ -136,13 +118,6 @@ // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! - if (fieldInfos.hasDocValues()) { - dvProducer = codec.docValuesFormat().fieldsProducer(segmentReadState); - assert dvProducer != null; - } else { - dvProducer = null; - } - if (fieldInfos.hasNorms()) { normsProducer = codec.normsFormat().normsProducer(segmentReadState); assert normsProducer != null; @@ -166,164 +141,23 @@ } } + int getRefCount() { + return ref.get(); + } + void incRef() { ref.incrementAndGet(); } - NumericDocValues getNumericDocValues(String field) throws IOException { - FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null) { - // Field does not exist - return null; - } - if (fi.getDocValuesType() == null) { - // Field was not indexed with doc values - return null; - } - if (fi.getDocValuesType() != DocValuesType.NUMERIC) { - // DocValues were not numeric - return null; - } - - assert dvProducer != null; - - Map dvFields = docValuesLocal.get(); - - NumericDocValues dvs = (NumericDocValues) dvFields.get(field); - if (dvs == null) { - dvs = dvProducer.getNumeric(fi); - dvFields.put(field, dvs); - } - - return dvs; - } - - BinaryDocValues getBinaryDocValues(String field) throws IOException { - FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null) { - // Field does not exist - return null; - } - if (fi.getDocValuesType() == null) { - // Field was not indexed with doc values - return null; - } - if (fi.getDocValuesType() != DocValuesType.BINARY) { - // DocValues were not binary - return null; - } - - assert dvProducer != null; - - Map dvFields = docValuesLocal.get(); - - BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field); - if (dvs == null) { - dvs = dvProducer.getBinary(fi); - dvFields.put(field, dvs); - } - - return dvs; - } - - SortedDocValues getSortedDocValues(String field) throws IOException { - FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null) { - // Field does not exist - return null; - } - if (fi.getDocValuesType() == null) { - // Field was not indexed with doc values - return null; - } - if (fi.getDocValuesType() != DocValuesType.SORTED) { - // DocValues were not sorted - return null; - } - - assert dvProducer != null; - - Map dvFields = docValuesLocal.get(); - - SortedDocValues dvs = (SortedDocValues) dvFields.get(field); - if (dvs == null) { - dvs = dvProducer.getSorted(fi); - dvFields.put(field, dvs); - } - - return dvs; - } - - SortedSetDocValues getSortedSetDocValues(String field) throws IOException { - FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null) { - // Field does not exist - return null; - } - if (fi.getDocValuesType() == null) { - // Field was not indexed with doc values - return null; - } - if (fi.getDocValuesType() != DocValuesType.SORTED_SET) { - // DocValues were not sorted - return null; - } - - assert dvProducer != null; - - Map dvFields = docValuesLocal.get(); - - SortedSetDocValues dvs = (SortedSetDocValues) dvFields.get(field); - if (dvs == null) { - dvs = dvProducer.getSortedSet(fi); - dvFields.put(field, dvs); - } - - return dvs; - } - - Bits getDocsWithField(String field) throws IOException { - FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null) { - // Field does not exist - return null; - } - if (fi.getDocValuesType() == null) { - // Field was not indexed with doc values - return null; - } - - assert dvProducer != null; - - Map dvFields = docsWithFieldLocal.get(); - - Bits dvs = dvFields.get(field); - if (dvs == null) { - dvs = dvProducer.getDocsWithField(fi); - dvFields.put(field, dvs); - } - - return dvs; - } - - NumericDocValues getNormValues(String field) throws IOException { - FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null) { - // Field does not exist - return null; - } - if (!fi.hasNorms()) { - return null; - } - + NumericDocValues getNormValues(FieldInfo fi) throws IOException { assert normsProducer != null; Map normFields = normsLocal.get(); - NumericDocValues norms = (NumericDocValues) normFields.get(field); + NumericDocValues norms = (NumericDocValues) normFields.get(fi.name); if (norms == null) { norms = normsProducer.getNumeric(fi); - normFields.put(field, norms); + normFields.put(fi.name, norms); } return norms; @@ -331,8 +165,9 @@ void decRef() throws IOException { if (ref.decrementAndGet() == 0) { - IOUtils.close(termVectorsLocal, fieldsReaderLocal, docValuesLocal, normsLocal, docsWithFieldLocal, fields, - dvProducer, termVectorsReaderOrig, fieldsReaderOrig, cfsReader, normsProducer); +// System.err.println("--- closing core readers"); + IOUtils.close(termVectorsLocal, fieldsReaderLocal, normsLocal, fields, termVectorsReaderOrig, fieldsReaderOrig, + cfsReader, normsProducer); notifyCoreClosedListeners(); } } @@ -357,8 +192,7 @@ /** Returns approximate RAM bytes used */ public long ramBytesUsed() { - return ((dvProducer!=null) ? dvProducer.ramBytesUsed() : 0) + - ((normsProducer!=null) ? normsProducer.ramBytesUsed() : 0) + + return ((normsProducer!=null) ? normsProducer.ramBytesUsed() : 0) + ((fields!=null) ? fields.ramBytesUsed() : 0) + ((fieldsReaderOrig!=null)? fieldsReaderOrig.ramBytesUsed() : 0) + ((termVectorsReaderOrig!=null) ? termVectorsReaderOrig.ramBytesUsed() : 0); Index: lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java (working copy) @@ -0,0 +1,108 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RefCount; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Manages the {@link DocValuesProducer} held by {@link SegmentReader} and + * keeps track of their reference counting. + */ +final class SegmentDocValues { + + private final Map> genDVProducers = new HashMap>(); + + private RefCount newDocValuesProducer(SegmentInfoPerCommit si, IOContext context, Directory dir, + DocValuesFormat dvFormat, final Long gen, List infos, int termsIndexDivisor) throws IOException { + Directory dvDir = dir; + String segmentSuffix = ""; + if (gen.longValue() != -1) { + dvDir = si.info.dir; // gen'd files are written outside CFS, so use SegInfo directory + segmentSuffix = Long.toString(gen.longValue(), Character.MAX_RADIX); + } + + // set SegmentReadState to list only the fields that are relevant to that gen + SegmentReadState srs = new SegmentReadState(dvDir, si.info, new FieldInfos(infos.toArray(new FieldInfo[infos.size()])), context, termsIndexDivisor, segmentSuffix); + return new RefCount(dvFormat.fieldsProducer(srs)) { + @SuppressWarnings("synthetic-access") + @Override + protected void release() throws IOException { + object.close(); + synchronized (SegmentDocValues.this) { + genDVProducers.remove(gen); + } + } + }; + } + + /** Returns the {@link DocValuesProducer} for the given generation. */ + synchronized DocValuesProducer getDocValuesProducer(long gen, SegmentInfoPerCommit si, IOContext context, Directory dir, + DocValuesFormat dvFormat, List infos, int termsIndexDivisor) throws IOException { + RefCount dvp = genDVProducers.get(gen); + if (dvp == null) { + dvp = newDocValuesProducer(si, context, dir, dvFormat, gen, infos, termsIndexDivisor); + assert dvp != null; + genDVProducers.put(gen, dvp); + } else { + dvp.incRef(); + } + return dvp.get(); + } + + /** + * Decrement the reference count of the given {@link DocValuesProducer} + * generations. + */ + synchronized void decRef(List dvProducersGens) throws IOException { + Throwable t = null; + for (Long gen : dvProducersGens) { + RefCount dvp = genDVProducers.get(gen); + assert dvp != null : "gen=" + gen; + try { + dvp.decRef(); + } catch (Throwable th) { + if (t != null) { + t = th; + } + } + } + if (t != null) { + IOUtils.reThrow(t); + } + } + + /** Returns approximate RAM bytes used. */ + synchronized long ramBytesUsed() { + long ramBytesUsed = 0; + for (RefCount dvp : genDVProducers.values()) { + ramBytesUsed += dvp.get().ramBytesUsed(); + } + return ramBytesUsed; + } + +} Property changes on: lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -61,6 +61,7 @@ private Map diagnostics; + /** @deprecated not used anymore */ private Map attributes; // Tracks the Lucene version this segment was created with, since 3.1. Null @@ -79,6 +80,16 @@ public Map getDiagnostics() { return diagnostics; } + + /** + * Construct a new complete SegmentInfo instance from input. + *

Note: this is public only to allow access from + * the codecs package.

+ */ + public SegmentInfo(Directory dir, String version, String name, int docCount, + boolean isCompoundFile, Codec codec, Map diagnostics) { + this(dir, version, name, docCount, isCompoundFile, codec, diagnostics, null); + } /** * Construct a new complete SegmentInfo instance from input. @@ -128,7 +139,7 @@ public void setCodec(Codec codec) { assert this.codec == null; if (codec == null) { - throw new IllegalArgumentException("segmentCodecs must be non-null"); + throw new IllegalArgumentException("codec must be non-null"); } this.codec = codec; } @@ -179,7 +190,6 @@ * left off when there are no deletions).

*/ public String toString(Directory dir, int delCount) { - StringBuilder s = new StringBuilder(); s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':'); char cfs = getUseCompoundFile() ? 'c' : 'C'; @@ -271,6 +281,8 @@ /** * Get a codec attribute value, or null if it does not exist + * + * @deprecated no longer supported */ public String getAttribute(String key) { if (attributes == null) { @@ -283,12 +295,14 @@ /** * Puts a codec attribute value. *

- * This is a key-value mapping for the field that the codec can use - * to store additional metadata, and will be available to the codec - * when reading the segment via {@link #getAttribute(String)} + * This is a key-value mapping for the field that the codec can use to store + * additional metadata, and will be available to the codec when reading the + * segment via {@link #getAttribute(String)} *

- * If a value already exists for the field, it will be replaced with - * the new value. + * If a value already exists for the field, it will be replaced with the new + * value. + * + * @deprecated no longer supported */ public String putAttribute(String key, String value) { if (attributes == null) { @@ -301,6 +315,8 @@ * Returns the internal codec attributes map. * * @return internal codec attributes map. May be null if no mappings exist. + * + * @deprecated no longer supported */ public Map attributes() { return attributes; Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (working copy) @@ -19,7 +19,12 @@ import java.io.IOException; import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; import org.apache.lucene.store.Directory; @@ -27,9 +32,8 @@ * fields. * * @lucene.experimental */ - -public class SegmentInfoPerCommit { - +public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo + /** The {@link SegmentInfo} that we wrap. */ public final SegmentInfo info; @@ -44,15 +48,31 @@ // attempt to write: private long nextWriteDelGen; + // Generation number of the FieldInfos (-1 if there are no updates) + private long fieldInfosGen; + + // Normally 1 + fieldInfosGen, unless an exception was hit on last attempt to + // write + private long nextWriteFieldInfosGen; + + // Track the per-generation updates files + private final Map> genUpdatesFiles = new HashMap>(); + private volatile long sizeInBytes = -1; - /** Sole constructor. - * @param info {@link SegmentInfo} that we wrap - * @param delCount number of deleted documents in this segment - * @param delGen deletion generation number (used to name - deletion files) + /** + * Sole constructor. + * + * @param info + * {@link SegmentInfo} that we wrap + * @param delCount + * number of deleted documents in this segment + * @param delGen + * deletion generation number (used to name deletion files) + * @param fieldInfosGen + * FieldInfos generation number (used to name field-infos files) **/ - public SegmentInfoPerCommit(SegmentInfo info, int delCount, long delGen) { + public SegmentInfoPerCommit(SegmentInfo info, int delCount, long delGen, long fieldInfosGen) { this.info = info; this.delCount = delCount; this.delGen = delGen; @@ -61,8 +81,26 @@ } else { nextWriteDelGen = delGen+1; } + + this.fieldInfosGen = fieldInfosGen; + if (fieldInfosGen == -1) { + nextWriteFieldInfosGen = 1; + } else { + nextWriteFieldInfosGen = fieldInfosGen + 1; + } } + /** Returns the per generation updates files. */ + public Map> getUpdatesFiles() { + return Collections.unmodifiableMap(genUpdatesFiles); + } + + /** Sets the updates file names per generation. Does not deep clone the map. */ + public void setGenUpdatesFiles(Map> genUpdatesFiles) { + this.genUpdatesFiles.clear(); + this.genUpdatesFiles.putAll(genUpdatesFiles); + } + /** Called when we succeed in writing deletes */ void advanceDelGen() { delGen = nextWriteDelGen; @@ -76,6 +114,21 @@ void advanceNextWriteDelGen() { nextWriteDelGen++; } + + /** Called when we succeed in writing a new FieldInfos generation. */ + void advanceFieldInfosGen() { + fieldInfosGen = nextWriteFieldInfosGen; + nextWriteFieldInfosGen = fieldInfosGen + 1; + sizeInBytes = -1; + } + + /** + * Called if there was an exception while writing a new generation of + * FieldInfos, so that we don't try to write to the same file more than once. + */ + void advanceNextWriteFieldInfosGen() { + nextWriteFieldInfosGen++; + } /** Returns total size in bytes of all files for this * segment. @@ -98,9 +151,17 @@ // Start from the wrapped info's files: Collection files = new HashSet(info.files()); + // TODO we could rely on TrackingDir.getCreatedFiles() (like we do for + // updates) and then maybe even be able to remove LiveDocsFormat.files(). + // Must separately add any live docs files: info.getCodec().liveDocsFormat().files(this, files); + // Must separately add any field updates files + for (Set updateFiles : genUpdatesFiles.values()) { + files.addAll(updateFiles); + } + return files; } @@ -117,20 +178,6 @@ sizeInBytes = -1; } - void clearDelGen() { - delGen = -1; - sizeInBytes = -1; - } - - /** - * Sets the generation number of the live docs file. - * @see #getDelGen() - */ - public void setDelGen(long delGen) { - this.delGen = delGen; - sizeInBytes = -1; - } - /** Returns true if there are any deletions for the * segment at this commit. */ public boolean hasDeletions() { @@ -137,7 +184,25 @@ return delGen != -1; } + /** Returns true if there are any field updates for the segment in this commit. */ + public boolean hasFieldUpdates() { + return fieldInfosGen != -1; + } + + /** Returns the next available generation number of the FieldInfos files. */ + public long getNextFieldInfosGen() { + return nextWriteFieldInfosGen; + } + /** + * Returns the generation number of the field infos file or -1 if there are no + * field updates yet. + */ + public long getFieldInfosGen() { + return fieldInfosGen; + } + + /** * Returns the next available generation number * of the live docs file. */ @@ -171,6 +236,9 @@ if (delGen != -1) { s += ":delGen=" + delGen; } + if (fieldInfosGen != -1) { + s += ":fieldInfosGen=" + fieldInfosGen; + } return s; } @@ -181,12 +249,19 @@ @Override public SegmentInfoPerCommit clone() { - SegmentInfoPerCommit other = new SegmentInfoPerCommit(info, delCount, delGen); + SegmentInfoPerCommit other = new SegmentInfoPerCommit(info, delCount, delGen, fieldInfosGen); // Not clear that we need to carry over nextWriteDelGen // (i.e. do we ever clone after a failed write and // before the next successful write?), but just do it to // be safe: other.nextWriteDelGen = nextWriteDelGen; + other.nextWriteFieldInfosGen = nextWriteFieldInfosGen; + + // deep clone + for (Entry> e : genUpdatesFiles.entrySet()) { + other.genUpdatesFiles.put(e.getKey(), new HashSet(e.getValue())); + } + return other; } } Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -17,7 +17,6 @@ * limitations under the License. */ -import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; @@ -29,10 +28,12 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoFormat; @@ -39,7 +40,7 @@ import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoReader; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.ChecksumIndexOutput; -import org.apache.lucene.store.DataOutput; // javadocs +import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -47,7 +48,6 @@ import org.apache.lucene.store.NoSuchDirectoryException; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; -import org.apache.lucene.util.ThreadInterruptedException; /** * A collection of segmentInfo objects with methods for operating on @@ -75,7 +75,7 @@ *

    *
  • segments.gen: GenHeader, Generation, Generation *
  • segments_N: Header, Version, NameCounter, SegCount, - * <SegName, SegCodec, DelGen, DeletionCount>SegCount, + * <SegName, SegCodec, DelGen, DeletionCount, FieldInfosGen, UpdatesFiles>SegCount, * CommitUserData, Checksum *
*

@@ -84,9 +84,10 @@ *
    *
  • Header --> {@link CodecUtil#writeHeader CodecHeader}
  • *
  • GenHeader, NameCounter, SegCount, DeletionCount --> {@link DataOutput#writeInt Int32}
  • - *
  • Generation, Version, DelGen, Checksum --> {@link DataOutput#writeLong Int64}
  • + *
  • Generation, Version, DelGen, Checksum, FieldInfosGen --> {@link DataOutput#writeLong Int64}
  • *
  • SegName, SegCodec --> {@link DataOutput#writeString String}
  • *
  • CommitUserData --> {@link DataOutput#writeStringStringMap Map<String,String>}
  • + *
  • UpdatesFiles --> {@link DataOutput#writeStringSet(Set) Set<String>}
  • *
*

* Field Descriptions: @@ -109,6 +110,10 @@ *
  • CommitUserData stores an optional user-supplied opaque * Map<String,String> that was passed to * {@link IndexWriter#setCommitData(java.util.Map)}.
  • + *
  • FieldInfosGen is the generation count of the fieldInfos file. If this is -1, + * there are no updates to the fieldInfos in that segment. Anything above zero + * means there are updates to fieldInfos stored by {@link FieldInfosFormat}.
  • + *
  • UpdatesFiles stores the list of files that were updated in that segment.
  • * *

    * @@ -116,11 +121,12 @@ */ public final class SegmentInfos implements Cloneable, Iterable { - /** - * The file format version for the segments_N codec header - */ + /** The file format version for the segments_N codec header, up to 4.5. */ public static final int VERSION_40 = 0; + /** The file format version for the segments_N codec header, since 4.6+. */ + public static final int VERSION_46 = 1; + /** Used for the segments.gen file only! * Whenever you add a new format, make it 1 smaller (negative version logic)! */ public static final int FORMAT_SEGMENTS_GEN_CURRENT = -2; @@ -320,7 +326,7 @@ final int format = input.readInt(); if (format == CodecUtil.CODEC_MAGIC) { // 4.0+ - CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_40); + int actualFormat = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_46); version = input.readLong(); counter = input.readInt(); int numSegments = input.readInt(); @@ -338,7 +344,25 @@ if (delCount < 0 || delCount > info.getDocCount()) { throw new CorruptIndexException("invalid deletion count: " + delCount + " (resource: " + input + ")"); } - add(new SegmentInfoPerCommit(info, delCount, delGen)); + long fieldInfosGen = -1; + if (actualFormat >= VERSION_46) { + fieldInfosGen = input.readLong(); + } + SegmentInfoPerCommit siPerCommit = new SegmentInfoPerCommit(info, delCount, delGen, fieldInfosGen); + if (actualFormat >= VERSION_46) { + int numGensUpdatesFiles = input.readInt(); + final Map> genUpdatesFiles; + if (numGensUpdatesFiles == 0) { + genUpdatesFiles = Collections.emptyMap(); + } else { + genUpdatesFiles = new HashMap>(numGensUpdatesFiles); + for (int i = 0; i < numGensUpdatesFiles; i++) { + genUpdatesFiles.put(input.readLong(), input.readStringSet()); + } + } + siPerCommit.setGenUpdatesFiles(genUpdatesFiles); + } + add(siPerCommit); } userData = input.readStringStringMap(); } else { @@ -408,7 +432,7 @@ try { segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentsFileName, IOContext.DEFAULT)); - CodecUtil.writeHeader(segnOutput, "segments", VERSION_40); + CodecUtil.writeHeader(segnOutput, "segments", VERSION_46); segnOutput.writeLong(version); segnOutput.writeInt(counter); // write counter segnOutput.writeInt(size()); // write infos @@ -418,6 +442,13 @@ segnOutput.writeString(si.getCodec().getName()); segnOutput.writeLong(siPerCommit.getDelGen()); segnOutput.writeInt(siPerCommit.getDelCount()); + segnOutput.writeLong(siPerCommit.getFieldInfosGen()); + final Map> genUpdatesFiles = siPerCommit.getUpdatesFiles(); + segnOutput.writeInt(genUpdatesFiles.size()); + for (Entry> e : genUpdatesFiles.entrySet()) { + segnOutput.writeLong(e.getKey()); + segnOutput.writeStringSet(e.getValue()); + } assert si.dir == directory; assert siPerCommit.getDelCount() <= si.getDocCount(); @@ -931,6 +962,7 @@ files.addAll(info.files()); } } + return files; } Index: lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -145,7 +145,7 @@ // write the merged infos FieldInfosWriter fieldInfosWriter = codec.fieldInfosFormat().getFieldInfosWriter(); - fieldInfosWriter.write(directory, mergeState.segmentInfo.name, mergeState.fieldInfos, context); + fieldInfosWriter.write(directory, mergeState.segmentInfo.name, "", mergeState.fieldInfos, context); return mergeState; } Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -18,13 +18,24 @@ */ import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; -import org.apache.lucene.store.Directory; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; -import org.apache.lucene.search.FieldCache; // javadocs +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.store.CompoundFileDirectory; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.CloseableThreadLocal; /** * IndexReader implementation over a single segment. @@ -44,7 +55,28 @@ private final int numDocs; final SegmentCoreReaders core; + final SegmentDocValues segDocValues; + + final CloseableThreadLocal> docValuesLocal = new CloseableThreadLocal>() { + @Override + protected Map initialValue() { + return new HashMap(); + } + }; + final CloseableThreadLocal> docsWithFieldLocal = new CloseableThreadLocal>() { + @Override + protected Map initialValue() { + return new HashMap(); + } + }; + + final Map dvProducers = new HashMap(); + + final FieldInfos fieldInfos; + + private final List dvGens = new ArrayList(); + /** * Constructs a new SegmentReader with a new core. * @throws CorruptIndexException if the index is corrupt @@ -53,17 +85,32 @@ // TODO: why is this public? public SegmentReader(SegmentInfoPerCommit si, int termInfosIndexDivisor, IOContext context) throws IOException { this.si = si; + // TODO if the segment uses CFS, we may open the CFS file twice: once for + // reading the FieldInfos (if they are not gen'd) and second time by + // SegmentCoreReaders. We can open the CFS here and pass to SCR, but then it + // results in less readable code (resource not closed where it was opened). + // Best if we could somehow read FieldInfos in SCR but not keep it there, but + // constructors don't allow returning two things... + fieldInfos = readFieldInfos(si); core = new SegmentCoreReaders(this, si.info.dir, si, context, termInfosIndexDivisor); + segDocValues = new SegmentDocValues(); + boolean success = false; + final Codec codec = si.info.getCodec(); try { if (si.hasDeletions()) { // NOTE: the bitvector is stored using the regular directory, not cfs - liveDocs = si.info.getCodec().liveDocsFormat().readLiveDocs(directory(), si, IOContext.READONCE); + liveDocs = codec.liveDocsFormat().readLiveDocs(directory(), si, IOContext.READONCE); } else { assert si.getDelCount() == 0; liveDocs = null; } numDocs = si.info.getDocCount() - si.getDelCount(); + + if (fieldInfos.hasDocValues()) { + initDocValuesProducers(codec); + } + success = true; } finally { // With lock-less commits, it's entirely possible (and @@ -72,7 +119,7 @@ // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { - core.decRef(); + doClose(); } } } @@ -80,8 +127,8 @@ /** Create new SegmentReader sharing core from a previous * SegmentReader and loading new live docs from a new * deletes file. Used by openIfChanged. */ - SegmentReader(SegmentInfoPerCommit si, SegmentCoreReaders core) throws IOException { - this(si, core, + SegmentReader(SegmentInfoPerCommit si, SegmentReader sr) throws IOException { + this(si, sr, si.info.getCodec().liveDocsFormat().readLiveDocs(si.info.dir, si, IOContext.READONCE), si.info.getDocCount() - si.getDelCount()); } @@ -90,17 +137,106 @@ * SegmentReader and using the provided in-memory * liveDocs. Used by IndexWriter to provide a new NRT * reader */ - SegmentReader(SegmentInfoPerCommit si, SegmentCoreReaders core, Bits liveDocs, int numDocs) { + SegmentReader(SegmentInfoPerCommit si, SegmentReader sr, Bits liveDocs, int numDocs) throws IOException { this.si = si; - this.core = core; - core.incRef(); - - assert liveDocs != null; this.liveDocs = liveDocs; - this.numDocs = numDocs; + this.core = sr.core; + core.incRef(); + this.segDocValues = sr.segDocValues; + +// System.out.println("[" + Thread.currentThread().getName() + "] SR.init: sharing reader: " + sr + " for gens=" + sr.genDVProducers.keySet()); + + // increment refCount of DocValuesProducers that are used by this reader + boolean success = false; + try { + final Codec codec = si.info.getCodec(); + if (si.getFieldInfosGen() == -1) { + fieldInfos = sr.fieldInfos; + } else { + fieldInfos = readFieldInfos(si); + } + + if (fieldInfos.hasDocValues()) { + initDocValuesProducers(codec); + } + success = true; + } finally { + if (!success) { + doClose(); + } + } } + // initialize the per-field DocValuesProducer + private void initDocValuesProducers(Codec codec) throws IOException { + final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir; + final DocValuesFormat dvFormat = codec.docValuesFormat(); + final Map> genInfos = getGenInfos(); + +// System.out.println("[" + Thread.currentThread().getName() + "] SR.initDocValuesProducers: segInfo=" + si + "; gens=" + genInfos.keySet()); + + for (Entry> e : genInfos.entrySet()) { + Long gen = e.getKey(); + List infos = e.getValue(); + DocValuesProducer dvp = segDocValues.getDocValuesProducer(gen, si, IOContext.READ, dir, dvFormat, infos, getTermInfosIndexDivisor()); + for (FieldInfo fi : infos) { + dvProducers.put(fi.name, dvp); + } + } + + dvGens.addAll(genInfos.keySet()); + } + + /** + * Reads the most recent {@link FieldInfos} of the given segment info. + * + * @lucene.internal + */ + static FieldInfos readFieldInfos(SegmentInfoPerCommit info) throws IOException { + final Directory dir; + final boolean closeDir; + if (info.getFieldInfosGen() == -1 && info.info.getUseCompoundFile()) { + // no fieldInfos gen and segment uses a compound file + dir = new CompoundFileDirectory(info.info.dir, + IndexFileNames.segmentFileName(info.info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), + IOContext.READONCE, + false); + closeDir = true; + } else { + // gen'd FIS are read outside CFS, or the segment doesn't use a compound file + dir = info.info.dir; + closeDir = false; + } + + try { + final String segmentSuffix = info.getFieldInfosGen() == -1 ? "" : Long.toString(info.getFieldInfosGen(), Character.MAX_RADIX); + return info.info.getCodec().fieldInfosFormat().getFieldInfosReader().read(dir, info.info.name, segmentSuffix, IOContext.READONCE); + } finally { + if (closeDir) { + dir.close(); + } + } + } + + // returns a gen->List mapping. Fields without DV updates have gen=-1 + private Map> getGenInfos() { + final Map> genInfos = new HashMap>(); + for (FieldInfo fi : fieldInfos) { + if (fi.getDocValuesType() == null) { + continue; + } + long gen = fi.getDocValuesGen(); + List infos = genInfos.get(gen); + if (infos == null) { + infos = new ArrayList(); + genInfos.put(gen, infos); + } + infos.add(fi); + } + return genInfos; + } + @Override public Bits getLiveDocs() { ensureOpen(); @@ -110,13 +246,20 @@ @Override protected void doClose() throws IOException { //System.out.println("SR.close seg=" + si); - core.decRef(); + try { + core.decRef(); + } finally { + dvProducers.clear(); + docValuesLocal.close(); + docsWithFieldLocal.close(); + segDocValues.decRef(dvGens); + } } @Override public FieldInfos getFieldInfos() { ensureOpen(); - return core.fieldInfos; + return fieldInfos; } /** Expert: retrieve thread-private {@link @@ -226,40 +369,151 @@ return core.termsIndexDivisor; } + // returns the FieldInfo that corresponds to the given field and type, or + // null if the field does not exist, or not indexed as the requested + // DovDocValuesType. + private FieldInfo getDVField(String field, DocValuesType type) { + FieldInfo fi = fieldInfos.fieldInfo(field); + if (fi == null) { + // Field does not exist + return null; + } + if (fi.getDocValuesType() == null) { + // Field was not indexed with doc values + return null; + } + if (fi.getDocValuesType() != type) { + // Field DocValues are different than requested type + return null; + } + + return fi; + } + @Override public NumericDocValues getNumericDocValues(String field) throws IOException { ensureOpen(); - return core.getNumericDocValues(field); + FieldInfo fi = getDVField(field, DocValuesType.NUMERIC); + if (fi == null) { + return null; + } + + DocValuesProducer dvProducer = dvProducers.get(field); + assert dvProducer != null; + + Map dvFields = docValuesLocal.get(); + + NumericDocValues dvs = (NumericDocValues) dvFields.get(field); + if (dvs == null) { + dvs = dvProducer.getNumeric(fi); + dvFields.put(field, dvs); + } + + return dvs; } @Override public Bits getDocsWithField(String field) throws IOException { ensureOpen(); - return core.getDocsWithField(field); + FieldInfo fi = fieldInfos.fieldInfo(field); + if (fi == null) { + // Field does not exist + return null; + } + if (fi.getDocValuesType() == null) { + // Field was not indexed with doc values + return null; + } + + DocValuesProducer dvProducer = dvProducers.get(field); + assert dvProducer != null; + + Map dvFields = docsWithFieldLocal.get(); + + Bits dvs = dvFields.get(field); + if (dvs == null) { + dvs = dvProducer.getDocsWithField(fi); + dvFields.put(field, dvs); + } + + return dvs; } @Override public BinaryDocValues getBinaryDocValues(String field) throws IOException { ensureOpen(); - return core.getBinaryDocValues(field); + FieldInfo fi = getDVField(field, DocValuesType.BINARY); + if (fi == null) { + return null; + } + + DocValuesProducer dvProducer = dvProducers.get(field); + assert dvProducer != null; + + Map dvFields = docValuesLocal.get(); + + BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field); + if (dvs == null) { + dvs = dvProducer.getBinary(fi); + dvFields.put(field, dvs); + } + + return dvs; } @Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen(); - return core.getSortedDocValues(field); + FieldInfo fi = getDVField(field, DocValuesType.SORTED); + if (fi == null) { + return null; + } + + DocValuesProducer dvProducer = dvProducers.get(field); + assert dvProducer != null; + + Map dvFields = docValuesLocal.get(); + + SortedDocValues dvs = (SortedDocValues) dvFields.get(field); + if (dvs == null) { + dvs = dvProducer.getSorted(fi); + dvFields.put(field, dvs); + } + + return dvs; } @Override public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { ensureOpen(); - return core.getSortedSetDocValues(field); + FieldInfo fi = getDVField(field, DocValuesType.SORTED_SET); + if (fi == null) { + return null; + } + + DocValuesProducer dvProducer = dvProducers.get(field); + assert dvProducer != null; + + Map dvFields = docValuesLocal.get(); + + SortedSetDocValues dvs = (SortedSetDocValues) dvFields.get(field); + if (dvs == null) { + dvs = dvProducer.getSortedSet(fi); + dvFields.put(field, dvs); + } + + return dvs; } @Override public NumericDocValues getNormValues(String field) throws IOException { ensureOpen(); - return core.getNormValues(field); + FieldInfo fi = fieldInfos.fieldInfo(field); + if (fi == null || !fi.hasNorms()) { + // Field does not exist or does not index norms + return null; + } + return core.getNormValues(fi); } /** @@ -296,6 +550,13 @@ /** Returns approximate RAM Bytes used */ public long ramBytesUsed() { ensureOpen(); - return (core!=null) ? core.ramBytesUsed() : 0; + long ramBytesUsed = 0; + if (segDocValues != null) { + ramBytesUsed += segDocValues.ramBytesUsed(); + } + if (core != null) { + ramBytesUsed += core.ramBytesUsed(); + } + return ramBytesUsed; } } Index: lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java (working copy) @@ -81,6 +81,17 @@ /** Sole constructor. */ public SegmentWriteState(InfoStream infoStream, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, int termIndexInterval, BufferedDeletes segDeletes, IOContext context) { + this(infoStream, directory, segmentInfo, fieldInfos, termIndexInterval, segDeletes, context, ""); + } + + /** + * Constructor which takes segment suffix. + * + * @see #SegmentWriteState(InfoStream, Directory, SegmentInfo, FieldInfos, int, + * BufferedDeletes, IOContext) + */ + public SegmentWriteState(InfoStream infoStream, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, + int termIndexInterval, BufferedDeletes segDeletes, IOContext context, String segmentSuffix) { this.infoStream = infoStream; this.segDeletes = segDeletes; this.directory = directory; @@ -87,7 +98,7 @@ this.segmentInfo = segmentInfo; this.fieldInfos = fieldInfos; this.termIndexInterval = termIndexInterval; - segmentSuffix = ""; + this.segmentSuffix = segmentSuffix; this.context = context; } Index: lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java (revision 1535645) +++ lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java (working copy) @@ -173,7 +173,8 @@ readerShared[i] = false; newReaders[i] = newReader; } else { - if (newReaders[i].getSegmentInfo().getDelGen() == infos.info(i).getDelGen()) { + if (newReaders[i].getSegmentInfo().getDelGen() == infos.info(i).getDelGen() + && newReaders[i].getSegmentInfo().getFieldInfosGen() == infos.info(i).getFieldInfosGen()) { // No change; this reader will be shared between // the old and the new one, so we must incRef // it: @@ -180,11 +181,18 @@ readerShared[i] = true; newReaders[i].incRef(); } else { + // there are changes to the reader, either liveDocs or DV updates readerShared[i] = false; // Steal the ref returned by SegmentReader ctor: assert infos.info(i).info.dir == newReaders[i].getSegmentInfo().info.dir; - assert infos.info(i).hasDeletions(); - newReaders[i] = new SegmentReader(infos.info(i), newReaders[i].core); + assert infos.info(i).hasDeletions() || infos.info(i).hasFieldUpdates(); + if (newReaders[i].getSegmentInfo().getDelGen() == infos.info(i).getDelGen()) { + // only DV updates + newReaders[i] = new SegmentReader(infos.info(i), newReaders[i], newReaders[i].getLiveDocs(), newReaders[i].numDocs()); + } else { + // both DV and liveDocs have changed + newReaders[i] = new SegmentReader(infos.info(i), newReaders[i]); + } } } success = true; Index: lucene/core/src/java/org/apache/lucene/util/RefCount.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/RefCount.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/util/RefCount.java (working copy) @@ -0,0 +1,84 @@ +package org.apache.lucene.util; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicInteger; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Manages reference counting for a given object. Extensions can override + * {@link #release()} to do custom logic when reference counting hits 0. + */ +public class RefCount { + + private final AtomicInteger refCount = new AtomicInteger(1); + + protected final T object; + + public RefCount(T object) { + this.object = object; + } + + /** + * Called when reference counting hits 0. By default this method does nothing, + * but extensions can override to e.g. release resources attached to object + * that is managed by this class. + */ + protected void release() throws IOException {} + + /** + * Decrements the reference counting of this object. When reference counting + * hits 0, calls {@link #release()}. + */ + public final void decRef() throws IOException { + final int rc = refCount.decrementAndGet(); + if (rc == 0) { + boolean success = false; + try { + release(); + success = true; + } finally { + if (!success) { + // Put reference back on failure + refCount.incrementAndGet(); + } + } + } else if (rc < 0) { + throw new IllegalStateException("too many decRef calls: refCount is " + rc + " after decrement"); + } + } + + public final T get() { + return object; + } + + /** Returns the current reference count. */ + public final int getRefCount() { + return refCount.get(); + } + + /** + * Increments the reference count. Calls to this method must be matched with + * calls to {@link #decRef()}. + */ + public final void incRef() { + refCount.incrementAndGet(); + } + +} + Property changes on: lucene/core/src/java/org/apache/lucene/util/RefCount.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec =================================================================== --- lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (revision 1535645) +++ lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (working copy) @@ -17,4 +17,5 @@ org.apache.lucene.codecs.lucene3x.Lucene3xCodec org.apache.lucene.codecs.lucene41.Lucene41Codec org.apache.lucene.codecs.lucene42.Lucene42Codec -org.apache.lucene.codecs.lucene45.Lucene45Codec \ No newline at end of file +org.apache.lucene.codecs.lucene45.Lucene45Codec +org.apache.lucene.codecs.lucene46.Lucene46Codec \ No newline at end of file Index: lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java =================================================================== --- lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java (working copy) @@ -19,7 +19,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; @@ -37,7 +37,7 @@ public class TestExternalCodecs extends LuceneTestCase { - private static final class CustomPerFieldCodec extends Lucene45Codec { + private static final class CustomPerFieldCodec extends Lucene46Codec { private final PostingsFormat ramFormat = PostingsFormat.forName("RAMOnly"); private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41"); Index: lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestTermInfosReaderIndex.java =================================================================== --- lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestTermInfosReaderIndex.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestTermInfosReaderIndex.java (working copy) @@ -97,7 +97,7 @@ r.close(); FieldInfosReader infosReader = new PreFlexRWCodec().fieldInfosFormat().getFieldInfosReader(); - FieldInfos fieldInfos = infosReader.read(directory, segment, IOContext.READONCE); + FieldInfos fieldInfos = infosReader.read(directory, segment, "", IOContext.READONCE); String segmentFileName = IndexFileNames.segmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION); long tiiFileLength = directory.fileLength(segmentFileName); IndexInput input = directory.openInput(segmentFileName, newIOContext(random())); Index: lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java =================================================================== --- lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -80,7 +80,7 @@ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); final DocValuesFormat fast = DocValuesFormat.forName("Lucene45"); final DocValuesFormat slow = DocValuesFormat.forName("SimpleText"); - iwc.setCodec(new Lucene45Codec() { + iwc.setCodec(new Lucene46Codec() { @Override public DocValuesFormat getDocValuesFormatForField(String field) { if ("dv1".equals(field)) { Index: lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java =================================================================== --- lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java (working copy) @@ -21,8 +21,8 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat; @@ -34,10 +34,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; @@ -200,7 +200,7 @@ } - public static class MockCodec extends Lucene45Codec { + public static class MockCodec extends Lucene46Codec { final PostingsFormat lucene40 = new Lucene41PostingsFormat(); final PostingsFormat simpleText = new SimpleTextPostingsFormat(); final PostingsFormat mockSep = new MockSepPostingsFormat(); @@ -217,7 +217,7 @@ } } - public static class MockCodec2 extends Lucene45Codec { + public static class MockCodec2 extends Lucene46Codec { final PostingsFormat lucene40 = new Lucene41PostingsFormat(); final PostingsFormat simpleText = new SimpleTextPostingsFormat(); @@ -268,7 +268,7 @@ } public void testSameCodecDifferentInstance() throws Exception { - Codec codec = new Lucene45Codec() { + Codec codec = new Lucene46Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { if ("id".equals(field)) { @@ -284,7 +284,7 @@ } public void testSameCodecDifferentParams() throws Exception { - Codec codec = new Lucene45Codec() { + Codec codec = new Lucene46Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { if ("id".equals(field)) { Index: lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java (working copy) @@ -27,7 +27,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -42,7 +42,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -1059,7 +1058,7 @@ aux2.close(); } - private static final class CustomPerFieldCodec extends Lucene45Codec { + private static final class CustomPerFieldCodec extends Lucene46Codec { private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText"); private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41"); private final PostingsFormat mockSepFormat = PostingsFormat.forName("MockSep"); @@ -1110,7 +1109,7 @@ private static final class UnRegisteredCodec extends FilterCodec { public UnRegisteredCodec() { - super("NotRegistered", new Lucene45Codec()); + super("NotRegistered", new Lucene46Codec()); } } Index: lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java (working copy) @@ -21,12 +21,9 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -41,7 +38,7 @@ public void test() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - conf.setCodec(new Lucene45Codec()); + conf.setCodec(new Lucene46Codec()); // riw should sometimes create docvalues fields, etc RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Index: lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -37,8 +37,8 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.IntField; +import org.apache.lucene.document.LongField; import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.LongField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; @@ -54,14 +54,14 @@ import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.store.NIOFSDirectory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Constants; +import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util._TestUtil; import org.junit.AfterClass; @@ -76,7 +76,7 @@ // we won't even be running the actual code, only the impostor // @SuppressCodecs("Lucene4x") // Sep codec cannot yet handle the offsets in our 4.x index! -@SuppressCodecs({"Lucene3x", "MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene40", "Lucene41", "Appending", "Lucene42"}) +@SuppressCodecs({"Lucene3x", "MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene40", "Lucene41", "Appending", "Lucene42", "Lucene45"}) public class TestBackwardsCompatibility extends LuceneTestCase { // Uncomment these cases & run them on an older Lucene version, Property changes on: lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:r1527154 Index: lucene/core/src/test/org/apache/lucene/index/TestCodecs.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestCodecs.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestCodecs.java (working copy) @@ -299,7 +299,7 @@ final Directory dir = newDirectory(); this.write(fieldInfos, dir, fields, true); Codec codec = Codec.getDefault(); - final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null); + final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)); @@ -356,8 +356,7 @@ this.write(fieldInfos, dir, fields, false); Codec codec = Codec.getDefault(); - final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, - false, codec, null, null); + final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); if (VERBOSE) { System.out.println("TEST: now read postings"); @@ -666,7 +665,7 @@ final int termIndexInterval = _TestUtil.nextInt(random(), 13, 27); final Codec codec = Codec.getDefault(); - final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null); + final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, termIndexInterval, null, newIOContext(random())); final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state); Index: lucene/core/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java (working copy) @@ -29,7 +29,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.FailOnNonBulkMergesInfoStream; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; import org.junit.Test; public class TestConsistentFieldNumbers extends LuceneTestCase { @@ -67,8 +66,8 @@ sis.read(dir); assertEquals(2, sis.size()); - FieldInfos fis1 = _TestUtil.getFieldInfos(sis.info(0).info); - FieldInfos fis2 = _TestUtil.getFieldInfos(sis.info(1).info); + FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0)); + FieldInfos fis2 = SegmentReader.readFieldInfos(sis.info(1)); assertEquals("f1", fis1.fieldInfo(0).name); assertEquals("f2", fis1.fieldInfo(1).name); @@ -85,7 +84,7 @@ sis.read(dir); assertEquals(1, sis.size()); - FieldInfos fis3 = _TestUtil.getFieldInfos(sis.info(0).info); + FieldInfos fis3 = SegmentReader.readFieldInfos(sis.info(0)); assertEquals("f1", fis3.fieldInfo(0).name); assertEquals("f2", fis3.fieldInfo(1).name); @@ -130,8 +129,8 @@ sis.read(dir1); assertEquals(2, sis.size()); - FieldInfos fis1 = _TestUtil.getFieldInfos(sis.info(0).info); - FieldInfos fis2 = _TestUtil.getFieldInfos(sis.info(1).info); + FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0)); + FieldInfos fis2 = SegmentReader.readFieldInfos(sis.info(1)); assertEquals("f1", fis1.fieldInfo(0).name); assertEquals("f2", fis1.fieldInfo(1).name); @@ -161,7 +160,7 @@ SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(1, sis.size()); - FieldInfos fis1 = _TestUtil.getFieldInfos(sis.info(0).info); + FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0)); assertEquals("f1", fis1.fieldInfo(0).name); assertEquals("f2", fis1.fieldInfo(1).name); } @@ -180,8 +179,8 @@ SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(2, sis.size()); - FieldInfos fis1 = _TestUtil.getFieldInfos(sis.info(0).info); - FieldInfos fis2 = _TestUtil.getFieldInfos(sis.info(1).info); + FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0)); + FieldInfos fis2 = SegmentReader.readFieldInfos(sis.info(1)); assertEquals("f1", fis1.fieldInfo(0).name); assertEquals("f2", fis1.fieldInfo(1).name); assertEquals("f1", fis2.fieldInfo(0).name); @@ -203,9 +202,9 @@ SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(3, sis.size()); - FieldInfos fis1 = _TestUtil.getFieldInfos(sis.info(0).info); - FieldInfos fis2 = _TestUtil.getFieldInfos(sis.info(1).info); - FieldInfos fis3 = _TestUtil.getFieldInfos(sis.info(2).info); + FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0)); + FieldInfos fis2 = SegmentReader.readFieldInfos(sis.info(1)); + FieldInfos fis3 = SegmentReader.readFieldInfos(sis.info(2)); assertEquals("f1", fis1.fieldInfo(0).name); assertEquals("f2", fis1.fieldInfo(1).name); assertEquals("f1", fis2.fieldInfo(0).name); @@ -237,7 +236,7 @@ SegmentInfos sis = new SegmentInfos(); sis.read(dir); assertEquals(1, sis.size()); - FieldInfos fis1 = _TestUtil.getFieldInfos(sis.info(0).info); + FieldInfos fis1 = SegmentReader.readFieldInfos(sis.info(0)); assertEquals("f1", fis1.fieldInfo(0).name); assertEquals("f2", fis1.fieldInfo(1).name); assertEquals("f3", fis1.fieldInfo(2).name); @@ -275,7 +274,7 @@ SegmentInfos sis = new SegmentInfos(); sis.read(dir); for (SegmentInfoPerCommit si : sis) { - FieldInfos fis = _TestUtil.getFieldInfos(si.info); + FieldInfos fis = SegmentReader.readFieldInfos(si); for (FieldInfo fi : fis) { Field expected = getField(Integer.parseInt(fi.name)); Index: lucene/core/src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestDoc.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -217,7 +217,7 @@ final Codec codec = Codec.getDefault(); TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir); - final SegmentInfo si = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null, null); + final SegmentInfo si = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null); SegmentMerger merger = new SegmentMerger(Arrays.asList(r1, r2), si, InfoStream.getDefault(), trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, @@ -228,7 +228,7 @@ r2.close(); final SegmentInfo info = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, si1.info.getDocCount() + si2.info.getDocCount(), - false, codec, null, null); + false, codec, null); info.setFiles(new HashSet(trackingDir.getCreatedFiles())); if (useCompoundFile) { @@ -239,7 +239,7 @@ } } - return new SegmentInfoPerCommit(info, 0, -1L); + return new SegmentInfoPerCommit(info, 0, -1L, -1L); } Index: lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java (working copy) @@ -50,7 +50,7 @@ public void setUp() throws Exception { super.setUp(); - // for now its SimpleText vs Lucene45(random postings format) + // for now its SimpleText vs Lucene46(random postings format) // as this gives the best overall coverage. when we have more // codecs we should probably pick 2 from Codec.availableCodecs() Index: lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java (working copy) @@ -17,20 +17,17 @@ * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; +import java.io.IOException; + import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.document.Document; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.LuceneTestCase; -import java.io.IOException; -import java.util.Arrays; - //import org.cnlp.utils.properties.ResourceBundleHelper; public class TestFieldInfos extends LuceneTestCase { @@ -60,7 +57,7 @@ //Use a RAMOutputStream FieldInfosWriter writer = Codec.getDefault().fieldInfosFormat().getFieldInfosWriter(); - writer.write(dir, filename, fieldInfos, IOContext.DEFAULT); + writer.write(dir, filename, "", fieldInfos, IOContext.DEFAULT); output.close(); return fieldInfos; } @@ -67,7 +64,7 @@ public FieldInfos readFieldInfos(Directory dir, String filename) throws IOException { FieldInfosReader reader = Codec.getDefault().fieldInfosFormat().getFieldInfosReader(); - return reader.read(dir, filename, IOContext.DEFAULT); + return reader.read(dir, filename, "", IOContext.DEFAULT); } public void test() throws IOException { Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy) @@ -30,7 +30,9 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -40,7 +42,6 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.MockDirectoryWrapper.FakeIOException; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; @@ -1228,192 +1229,4 @@ r.close(); d.close(); } - - // Make sure if we hit a transient IOException (e.g., disk - // full), and then the exception stops (e.g., disk frees - // up), so we successfully close IW or open an NRT - // reader, we don't lose any deletes: - public void testNoLostDeletesOnIOException() throws Exception { - - int deleteCount = 0; - int docBase = 0; - int docCount = 0; - - MockDirectoryWrapper dir = newMockDirectory(); - final AtomicBoolean shouldFail = new AtomicBoolean(); - dir.failOn(new MockDirectoryWrapper.Failure() { - - @Override - public void eval(MockDirectoryWrapper dir) throws IOException { - StackTraceElement[] trace = new Exception().getStackTrace(); - if (shouldFail.get() == false) { - return; - } - - boolean sawSeal = false; - boolean sawWrite = false; - for (int i = 0; i < trace.length; i++) { - if ("sealFlushedSegment".equals(trace[i].getMethodName())) { - sawSeal = true; - break; - } - if ("writeLiveDocs".equals(trace[i].getMethodName())) { - sawWrite = true; - } - } - - // Don't throw exc if we are "flushing", else - // the segment is aborted and docs are lost: - if (sawWrite && sawSeal == false && random().nextInt(3) == 2) { - // Only sometimes throw the exc, so we get - // it sometimes on creating the file, on - // flushing buffer, on closing the file: - if (VERBOSE) { - System.out.println("TEST: now fail; thread=" + Thread.currentThread().getName() + " exc:"); - new Throwable().printStackTrace(System.out); - } - shouldFail.set(false); - throw new FakeIOException(); - } - } - }); - - RandomIndexWriter w = null; - - for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) { - int numDocs = atLeast(100); - if (VERBOSE) { - System.out.println("\nTEST: iter=" + iter + " numDocs=" + numDocs + " docBase=" + docBase + " delCount=" + deleteCount); - } - if (w == null) { - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - final MergeScheduler ms = iwc.getMergeScheduler(); - if (ms instanceof ConcurrentMergeScheduler) { - final ConcurrentMergeScheduler suppressFakeIOE = new ConcurrentMergeScheduler() { - @Override - protected void handleMergeException(Throwable exc) { - // suppress only FakeIOException: - if (!(exc instanceof FakeIOException)) { - super.handleMergeException(exc); - } - } - }; - final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) ms; - suppressFakeIOE.setMaxMergesAndThreads(cms.getMaxMergeCount(), cms.getMaxThreadCount()); - suppressFakeIOE.setMergeThreadPriority(cms.getMergeThreadPriority()); - iwc.setMergeScheduler(suppressFakeIOE); - } - w = new RandomIndexWriter(random(), dir, iwc); - // Since we hit exc during merging, a partial - // forceMerge can easily return when there are still - // too many segments in the index: - w.setDoRandomForceMergeAssert(false); - } - for(int i=0;i 0); // we delete at most one document per round + for (AtomicReaderContext context : reader.leaves()) { + AtomicReader r = context.reader(); +// System.out.println(((SegmentReader) r).getSegmentName()); + Bits liveDocs = r.getLiveDocs(); + for (int field = 0; field < fieldValues.length; field++) { + String f = "f" + field; + NumericDocValues ndv = r.getNumericDocValues(f); + Bits docsWithField = r.getDocsWithField(f); + assertNotNull(ndv); + int maxDoc = r.maxDoc(); + for (int doc = 0; doc < maxDoc; doc++) { + if (liveDocs == null || liveDocs.get(doc)) { +// System.out.println("doc=" + (doc + context.docBase) + " f='" + f + "' vslue=" + ndv.get(doc)); + if (fieldHasValue[field]) { + assertTrue(docsWithField.get(doc)); + assertEquals("invalid value for doc=" + doc + ", field=" + f + ", reader=" + r, fieldValues[field], ndv.get(doc)); + } else { + assertFalse(docsWithField.get(doc)); + } + } + } + } + } +// System.out.println(); + } + + IOUtils.close(writer, reader, dir); + } + + @Test + public void testUpdateSegmentWithNoDocValues() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + // prevent merges, otherwise by the time updates are applied + // (writer.close()), the segments might have merged and that update becomes + // legit. + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); + IndexWriter writer = new IndexWriter(dir, conf); + + // first segment with NDV + Document doc = new Document(); + doc.add(new StringField("id", "doc0", Store.NO)); + doc.add(new NumericDocValuesField("ndv", 3)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new StringField("id", "doc4", Store.NO)); // document without 'ndv' field + writer.addDocument(doc); + writer.commit(); + + // second segment with no NDV + doc = new Document(); + doc.add(new StringField("id", "doc1", Store.NO)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new StringField("id", "doc2", Store.NO)); // document that isn't updated + writer.addDocument(doc); + writer.commit(); + + // update document in the first segment - should not affect docsWithField of + // the document without NDV field + writer.updateNumericDocValue(new Term("id", "doc0"), "ndv", 5L); + + // update document in the second segment - field should be added and we should + // be able to handle the other document correctly (e.g. no NPE) + writer.updateNumericDocValue(new Term("id", "doc1"), "ndv", 5L); + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + for (AtomicReaderContext context : reader.leaves()) { + AtomicReader r = context.reader(); + NumericDocValues ndv = r.getNumericDocValues("ndv"); + Bits docsWithField = r.getDocsWithField("ndv"); + assertNotNull(docsWithField); + assertTrue(docsWithField.get(0)); + assertEquals(5L, ndv.get(0)); + assertFalse(docsWithField.get(1)); + assertEquals(0L, ndv.get(1)); + } + reader.close(); + + dir.close(); + } + + @Test + public void testUpdateSegmentWithPostingButNoDocValues() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + // prevent merges, otherwise by the time updates are applied + // (writer.close()), the segments might have merged and that update becomes + // legit. + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); + IndexWriter writer = new IndexWriter(dir, conf); + + // first segment with NDV + Document doc = new Document(); + doc.add(new StringField("id", "doc0", Store.NO)); + doc.add(new StringField("ndv", "mock-value", Store.NO)); + doc.add(new NumericDocValuesField("ndv", 5)); + writer.addDocument(doc); + writer.commit(); + + // second segment with no NDV + doc = new Document(); + doc.add(new StringField("id", "doc1", Store.NO)); + doc.add(new StringField("ndv", "mock-value", Store.NO)); + writer.addDocument(doc); + writer.commit(); + + // update document in the second segment + writer.updateNumericDocValue(new Term("id", "doc1"), "ndv", 5L); + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + for (AtomicReaderContext context : reader.leaves()) { + AtomicReader r = context.reader(); + NumericDocValues ndv = r.getNumericDocValues("ndv"); + for (int i = 0; i < r.maxDoc(); i++) { + assertEquals(5L, ndv.get(i)); + } + } + reader.close(); + + dir.close(); + } + + @Test + public void testUpdateNumericDVFieldWithSameNameAsPostingField() throws Exception { + // this used to fail because FieldInfos.Builder neglected to update + // globalFieldMaps.docValueTypes map + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + + Document doc = new Document(); + doc.add(new StringField("f", "mock-value", Store.NO)); + doc.add(new NumericDocValuesField("f", 5)); + writer.addDocument(doc); + writer.commit(); + writer.updateNumericDocValue(new Term("f", "mock-value"), "f", 17L); + writer.close(); + + DirectoryReader r = DirectoryReader.open(dir); + NumericDocValues ndv = r.leaves().get(0).reader().getNumericDocValues("f"); + assertEquals(17, ndv.get(0)); + r.close(); + + dir.close(); + } + + @Test + public void testUpdateOldSegments() throws Exception { + Codec[] oldCodecs = new Codec[] { new Lucene40RWCodec(), new Lucene41RWCodec(), new Lucene42RWCodec(), new Lucene45RWCodec() }; + Directory dir = newDirectory(); + + // create a segment with an old Codec + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + conf.setCodec(oldCodecs[random().nextInt(oldCodecs.length)]); + IndexWriter writer = new IndexWriter(dir, conf); + Document doc = new Document(); + doc.add(new StringField("id", "doc", Store.NO)); + doc.add(new NumericDocValuesField("f", 5)); + writer.addDocument(doc); + writer.close(); + + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + writer = new IndexWriter(dir, conf); + writer.updateNumericDocValue(new Term("id", "doc"), "f", 4L); + OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; + try { + writer.close(); + fail("should not have succeeded to update a segment written with an old Codec"); + } catch (UnsupportedOperationException e) { + writer.rollback(); + } finally { + OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; + } + + dir.close(); + } + + @Test + public void testStressMultiThreading() throws Exception { + final Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + final IndexWriter writer = new IndexWriter(dir, conf); + + // create index + final int numThreads = _TestUtil.nextInt(random(), 3, 6); + final int numDocs = atLeast(2000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(new StringField("id", "doc" + i, Store.NO)); + double group = random().nextDouble(); + String g; + if (group < 0.1) g = "g0"; + else if (group < 0.5) g = "g1"; + else if (group < 0.8) g = "g2"; + else g = "g3"; + doc.add(new StringField("updKey", g, Store.NO)); + for (int j = 0; j < numThreads; j++) { + long value = random().nextInt(); + doc.add(new NumericDocValuesField("f" + j, value)); + doc.add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2 + } + writer.addDocument(doc); + } + + final CountDownLatch done = new CountDownLatch(numThreads); + final AtomicInteger numUpdates = new AtomicInteger(atLeast(100)); + + // same thread updates a field as well as reopens + Thread[] threads = new Thread[numThreads]; + for (int i = 0; i < threads.length; i++) { + final String f = "f" + i; + final String cf = "cf" + i; + threads[i] = new Thread("UpdateThread-" + i) { + @Override + public void run() { + DirectoryReader reader = null; + boolean success = false; + try { + Random random = random(); + while (numUpdates.getAndDecrement() > 0) { + double group = random.nextDouble(); + Term t; + if (group < 0.1) t = new Term("updKey", "g0"); + else if (group < 0.5) t = new Term("updKey", "g1"); + else if (group < 0.8) t = new Term("updKey", "g2"); + else t = new Term("updKey", "g3"); +// System.out.println("[" + Thread.currentThread().getName() + "] numUpdates=" + numUpdates + " updateTerm=" + t); + if (random.nextBoolean()) { // sometimes unset a value + writer.updateNumericDocValue(t, f, null); + writer.updateNumericDocValue(t, cf, null); + } else { + long updValue = random.nextInt(); + writer.updateNumericDocValue(t, f, updValue); + writer.updateNumericDocValue(t, cf, updValue * 2); + } + + if (random.nextDouble() < 0.2) { + // delete a random document + int doc = random.nextInt(numDocs); +// System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc); + writer.deleteDocuments(new Term("id", "doc" + doc)); + } + + if (random.nextDouble() < 0.05) { // commit every 20 updates on average +// System.out.println("[" + Thread.currentThread().getName() + "] commit"); + writer.commit(); + } + + if (random.nextDouble() < 0.1) { // reopen NRT reader (apply updates), on average once every 10 updates + if (reader == null) { +// System.out.println("[" + Thread.currentThread().getName() + "] open NRT"); + reader = DirectoryReader.open(writer, true); + } else { +// System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT"); + DirectoryReader r2 = DirectoryReader.openIfChanged(reader, writer, true); + if (r2 != null) { + reader.close(); + reader = r2; + } + } + } + } +// System.out.println("[" + Thread.currentThread().getName() + "] DONE"); + success = true; + } catch (IOException e) { + throw new RuntimeException(e); + } finally { + if (reader != null) { + try { + reader.close(); + } catch (IOException e) { + if (success) { // suppress this exception only if there was another exception + throw new RuntimeException(e); + } + } + } + done.countDown(); + } + } + }; + } + + for (Thread t : threads) t.start(); + done.await(); + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + for (AtomicReaderContext context : reader.leaves()) { + AtomicReader r = context.reader(); + for (int i = 0; i < numThreads; i++) { + NumericDocValues ndv = r.getNumericDocValues("f" + i); + NumericDocValues control = r.getNumericDocValues("cf" + i); + Bits docsWithNdv = r.getDocsWithField("f" + i); + Bits docsWithControl = r.getDocsWithField("cf" + i); + Bits liveDocs = r.getLiveDocs(); + for (int j = 0; j < r.maxDoc(); j++) { + if (liveDocs == null || liveDocs.get(j)) { + assertEquals(docsWithNdv.get(j), docsWithControl.get(j)); + if (docsWithNdv.get(j)) { + assertEquals(control.get(j), ndv.get(j) * 2); + } + } + } + } + } + reader.close(); + + dir.close(); + } + + @Test + public void testUpdateDifferentDocsInDifferentGens() throws Exception { + // update same document multiple times across generations + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + conf.setMaxBufferedDocs(4); + IndexWriter writer = new IndexWriter(dir, conf); + final int numDocs = atLeast(10); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(new StringField("id", "doc" + i, Store.NO)); + long value = random().nextInt(); + doc.add(new NumericDocValuesField("f", value)); + doc.add(new NumericDocValuesField("cf", value * 2)); + writer.addDocument(doc); + } + + int numGens = atLeast(5); + for (int i = 0; i < numGens; i++) { + int doc = random().nextInt(numDocs); + Term t = new Term("id", "doc" + doc); + long value = random().nextLong(); + writer.updateNumericDocValue(t, "f", value); + writer.updateNumericDocValue(t, "cf", value * 2); + DirectoryReader reader = DirectoryReader.open(writer, true); + for (AtomicReaderContext context : reader.leaves()) { + AtomicReader r = context.reader(); + NumericDocValues fndv = r.getNumericDocValues("f"); + NumericDocValues cfndv = r.getNumericDocValues("cf"); + for (int j = 0; j < r.maxDoc(); j++) { + assertEquals(cfndv.get(j), fndv.get(j) * 2); + } + } + reader.close(); + } + writer.close(); + dir.close(); + } + + @Test + public void testChangeCodec() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // disable merges to simplify test assertions. + conf.setCodec(new Lucene46Codec() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return new Lucene45DocValuesFormat(); + } + }); + IndexWriter writer = new IndexWriter(dir, conf.clone()); + Document doc = new Document(); + doc.add(new StringField("id", "d0", Store.NO)); + doc.add(new NumericDocValuesField("f1", 5L)); + doc.add(new NumericDocValuesField("f2", 13L)); + writer.addDocument(doc); + writer.close(); + + // change format + conf.setCodec(new Lucene46Codec() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return new AssertingDocValuesFormat(); + } + }); + writer = new IndexWriter(dir, conf.clone()); + doc = new Document(); + doc.add(new StringField("id", "d1", Store.NO)); + doc.add(new NumericDocValuesField("f1", 17L)); + doc.add(new NumericDocValuesField("f2", 2L)); + writer.addDocument(doc); + writer.updateNumericDocValue(new Term("id", "d0"), "f1", 12L); + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + AtomicReader r = SlowCompositeReaderWrapper.wrap(reader); + NumericDocValues f1 = r.getNumericDocValues("f1"); + NumericDocValues f2 = r.getNumericDocValues("f2"); + assertEquals(12L, f1.get(0)); + assertEquals(13L, f2.get(0)); + assertEquals(17L, f1.get(1)); + assertEquals(2L, f2.get(1)); + reader.close(); + dir.close(); + } + + @Test + public void testAddIndexes() throws Exception { + Directory dir1 = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir1, conf); + + final int numDocs = atLeast(50); + final int numTerms = _TestUtil.nextInt(random(), 1, numDocs / 5); + Set randomTerms = new HashSet(); + while (randomTerms.size() < numTerms) { + randomTerms.add(_TestUtil.randomSimpleString(random())); + } + + // create first index + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(new StringField("id", RandomPicks.randomFrom(random(), randomTerms), Store.NO)); + doc.add(new NumericDocValuesField("ndv", 4L)); + doc.add(new NumericDocValuesField("control", 8L)); + writer.addDocument(doc); + } + + if (random().nextBoolean()) { + writer.commit(); + } + + // update some docs to a random value + long value = random().nextInt(); + Term term = new Term("id", RandomPicks.randomFrom(random(), randomTerms)); + writer.updateNumericDocValue(term, "ndv", value); + writer.updateNumericDocValue(term, "control", value * 2); + writer.close(); + + Directory dir2 = newDirectory(); + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + writer = new IndexWriter(dir2, conf); + if (random().nextBoolean()) { + writer.addIndexes(dir1); + } else { + DirectoryReader reader = DirectoryReader.open(dir1); + writer.addIndexes(reader); + reader.close(); + } + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir2); + for (AtomicReaderContext context : reader.leaves()) { + AtomicReader r = context.reader(); + NumericDocValues ndv = r.getNumericDocValues("ndv"); + NumericDocValues control = r.getNumericDocValues("control"); + for (int i = 0; i < r.maxDoc(); i++) { + assertEquals(ndv.get(i)*2, control.get(i)); + } + } + reader.close(); + + IOUtils.close(dir1, dir2); + } + + @Test + public void testDeleteUnusedUpdatesFiles() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + + Document doc = new Document(); + doc.add(new StringField("id", "d0", Store.NO)); + doc.add(new NumericDocValuesField("f", 1L)); + writer.addDocument(doc); + + // create first gen of update files + writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L); + writer.commit(); + int numFiles = dir.listAll().length; + + DirectoryReader r = DirectoryReader.open(dir); + assertEquals(2L, r.leaves().get(0).reader().getNumericDocValues("f").get(0)); + r.close(); + + // create second gen of update files, first gen should be deleted + writer.updateNumericDocValue(new Term("id", "d0"), "f", 5L); + writer.commit(); + assertEquals(numFiles, dir.listAll().length); + + r = DirectoryReader.open(dir); + assertEquals(5L, r.leaves().get(0).reader().getNumericDocValues("f").get(0)); + r.close(); + + writer.close(); + dir.close(); + } + + @Test + public void testTonsOfUpdates() throws Exception { + // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM + Directory dir = newDirectory(); + final Random random = random(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); + conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc + IndexWriter writer = new IndexWriter(dir, conf); + + // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) + final int numDocs = atLeast(20000); + final int numNumericFields = atLeast(5); + final int numTerms = _TestUtil.nextInt(random, 10, 100); // terms should affect many docs + Set updateTerms = new HashSet(); + while (updateTerms.size() < numTerms) { + updateTerms.add(_TestUtil.randomSimpleString(random)); + } + +// System.out.println("numDocs=" + numDocs + " numNumericFields=" + numNumericFields + " numTerms=" + numTerms); + + // build a large index with many NDV fields and update terms + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numUpdateTerms = _TestUtil.nextInt(random, 1, numTerms / 10); + for (int j = 0; j < numUpdateTerms; j++) { + doc.add(new StringField("upd", RandomPicks.randomFrom(random, updateTerms), Store.NO)); + } + for (int j = 0; j < numNumericFields; j++) { + long val = random.nextInt(); + doc.add(new NumericDocValuesField("f" + j, val)); + doc.add(new NumericDocValuesField("cf" + j, val * 2)); + } + writer.addDocument(doc); + } + + writer.commit(); // commit so there's something to apply to + + // set to flush every 2048 bytes (approximately every 12 updates), so we get + // many flushes during numeric updates + writer.getConfig().setRAMBufferSizeMB(2048.0 / 1024 / 1024); + final int numUpdates = atLeast(100); +// System.out.println("numUpdates=" + numUpdates); + for (int i = 0; i < numUpdates; i++) { + int field = random.nextInt(numNumericFields); + Term updateTerm = new Term("upd", RandomPicks.randomFrom(random, updateTerms)); + long value = random.nextInt(); + writer.updateNumericDocValue(updateTerm, "f" + field, value); + writer.updateNumericDocValue(updateTerm, "cf" + field, value * 2); + } + + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + for (AtomicReaderContext context : reader.leaves()) { + for (int i = 0; i < numNumericFields; i++) { + AtomicReader r = context.reader(); + NumericDocValues f = r.getNumericDocValues("f" + i); + NumericDocValues cf = r.getNumericDocValues("cf" + i); + for (int j = 0; j < r.maxDoc(); j++) { + assertEquals("reader=" + r + ", field=f" + i + ", doc=" + j, cf.get(j), f.get(j) * 2); + } + } + } + reader.close(); + + dir.close(); + } + + @Test + public void testUpdatesOrder() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + + Document doc = new Document(); + doc.add(new StringField("upd", "t1", Store.NO)); + doc.add(new StringField("upd", "t2", Store.NO)); + doc.add(new NumericDocValuesField("f1", 1L)); + doc.add(new NumericDocValuesField("f2", 1L)); + writer.addDocument(doc); + writer.updateNumericDocValue(new Term("upd", "t1"), "f1", 2L); // update f1 to 2 + writer.updateNumericDocValue(new Term("upd", "t1"), "f2", 2L); // update f2 to 2 + writer.updateNumericDocValue(new Term("upd", "t2"), "f1", 3L); // update f1 to 3 + writer.updateNumericDocValue(new Term("upd", "t2"), "f2", 3L); // update f2 to 3 + writer.updateNumericDocValue(new Term("upd", "t1"), "f1", 4L); // update f1 to 4 (but not f2) + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + assertEquals(4, reader.leaves().get(0).reader().getNumericDocValues("f1").get(0)); + assertEquals(3, reader.leaves().get(0).reader().getNumericDocValues("f2").get(0)); + reader.close(); + + dir.close(); + } + + @Test + public void testUpdateAllDeletedSegment() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + + Document doc = new Document(); + doc.add(new StringField("id", "doc", Store.NO)); + doc.add(new NumericDocValuesField("f1", 1L)); + writer.addDocument(doc); + writer.addDocument(doc); + writer.commit(); + writer.deleteDocuments(new Term("id", "doc")); // delete all docs in the first segment + writer.addDocument(doc); + writer.updateNumericDocValue(new Term("id", "doc"), "f1", 2L); + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + assertEquals(1, reader.leaves().size()); + assertEquals(2L, reader.leaves().get(0).reader().getNumericDocValues("f1").get(0)); + reader.close(); + + dir.close(); + } + + @Test + public void testUpdateTwoNonexistingTerms() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + + Document doc = new Document(); + doc.add(new StringField("id", "doc", Store.NO)); + doc.add(new NumericDocValuesField("f1", 1L)); + writer.addDocument(doc); + // update w/ multiple nonexisting terms in same field + writer.updateNumericDocValue(new Term("c", "foo"), "f1", 2L); + writer.updateNumericDocValue(new Term("c", "bar"), "f1", 2L); + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + assertEquals(1, reader.leaves().size()); + assertEquals(1L, reader.leaves().get(0).reader().getNumericDocValues("f1").get(0)); + reader.close(); + + dir.close(); + } + +} Property changes on: lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy) @@ -78,7 +78,7 @@ public void testMerge() throws IOException { final Codec codec = Codec.getDefault(); - final SegmentInfo si = new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, -1, false, codec, null, null); + final SegmentInfo si = new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, -1, false, codec, null); SegmentMerger merger = new SegmentMerger(Arrays.asList(reader1, reader2), si, InfoStream.getDefault(), mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, @@ -89,8 +89,8 @@ //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentInfoPerCommit( new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, docsMerged, - false, codec, null, null), - 0, -1L), + false, codec, null), + 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); assertTrue(mergedReader != null); assertTrue(mergedReader.numDocs() == 2); Index: lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java (working copy) @@ -127,7 +127,7 @@ seg = writer.newestSegment(); writer.close(); - fieldInfos = _TestUtil.getFieldInfos(seg.info); + fieldInfos = SegmentReader.readFieldInfos(seg); } @Override Index: lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java =================================================================== --- lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java (revision 1535645) +++ lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java (working copy) @@ -24,15 +24,16 @@ // TODO: maybe we should test this with mocks, but its easy // enough to test the basics via Codec public class TestNamedSPILoader extends LuceneTestCase { + public void testLookup() { - Codec codec = Codec.forName("Lucene45"); - assertEquals("Lucene45", codec.getName()); + Codec codec = Codec.forName("Lucene46"); + assertEquals("Lucene46", codec.getName()); } // we want an exception if its not found. public void testBogusLookup() { try { - Codec codec = Codec.forName("dskfdskfsdfksdfdsf"); + Codec.forName("dskfdskfsdfksdfdsf"); fail(); } catch (IllegalArgumentException expected) {} } @@ -39,6 +40,6 @@ public void testAvailableServices() { Set codecs = Codec.availableCodecs(); - assertTrue(codecs.contains("Lucene45")); + assertTrue(codecs.contains("Lucene46")); } } Index: lucene/core =================================================================== --- lucene/core (revision 1535645) +++ lucene/core (working copy) Property changes on: lucene/core ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/core:r1523461,1523477,1523525,1524900-1524901,1525231,1527147,1527154,1527361,1527391,1527460,1528076,1528837,1529611,1531496,1531620,1532670,1535526 Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/Facet45Codec.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/Facet45Codec.java (revision 1535645) +++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/Facet45Codec.java (working copy) @@ -1,79 +0,0 @@ -package org.apache.lucene.facet.codecs.facet45; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.HashSet; -import java.util.Set; - -import org.apache.lucene.codecs.DocValuesFormat; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; -import org.apache.lucene.facet.codecs.facet42.Facet42DocValuesFormat; -import org.apache.lucene.facet.params.CategoryListParams; -import org.apache.lucene.facet.params.FacetIndexingParams; - -/** - * Same as {@link Lucene45Codec} except it uses {@link Facet42DocValuesFormat} - * for facet fields (faster-but-more-RAM-consuming doc values). - * - *

    - * NOTE: this codec does not support facet partitions (see - * {@link FacetIndexingParams#getPartitionSize()}). - * - *

    - * NOTE: this format cannot handle more than 2 GB - * of facet data in a single segment. If your usage may hit - * this limit, you can either use Lucene's default - * DocValuesFormat, limit the maximum segment size in your - * MergePolicy, or send us a patch fixing the limitation. - * - * @lucene.experimental - */ -public class Facet45Codec extends Lucene45Codec { - - private final Set facetFields; - private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName("Facet42"); - - /** Default constructor, uses {@link FacetIndexingParams#DEFAULT}. */ - public Facet45Codec() { - this(FacetIndexingParams.DEFAULT); - } - - /** - * Initializes with the given {@link FacetIndexingParams}. Returns the proper - * {@link DocValuesFormat} for the fields that are returned by - * {@link FacetIndexingParams#getAllCategoryListParams()}. - */ - public Facet45Codec(FacetIndexingParams fip) { - if (fip.getPartitionSize() != Integer.MAX_VALUE) { - throw new IllegalArgumentException("this Codec does not support partitions"); - } - this.facetFields = new HashSet(); - for (CategoryListParams clp : fip.getAllCategoryListParams()) { - facetFields.add(clp.field); - } - } - - @Override - public DocValuesFormat getDocValuesFormatForField(String field) { - if (facetFields.contains(field)) { - return facetsDVFormat; - } else { - return super.getDocValuesFormatForField(field); - } - } -} Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/package.html =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/package.html (revision 1535645) +++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet45/package.html (working copy) @@ -1,22 +0,0 @@ - - - - -Codec + DocValuesFormat that are optimized for facets. - - Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet46/Facet46Codec.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet46/Facet46Codec.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet46/Facet46Codec.java (working copy) @@ -0,0 +1,79 @@ +package org.apache.lucene.facet.codecs.facet46; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; +import org.apache.lucene.facet.codecs.facet42.Facet42DocValuesFormat; +import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.facet.params.FacetIndexingParams; + +/** + * Same as {@link Lucene46Codec} except it uses {@link Facet42DocValuesFormat} + * for facet fields (faster-but-more-RAM-consuming doc values). + * + *

    + * NOTE: this codec does not support facet partitions (see + * {@link FacetIndexingParams#getPartitionSize()}). + * + *

    + * NOTE: this format cannot handle more than 2 GB + * of facet data in a single segment. If your usage may hit + * this limit, you can either use Lucene's default + * DocValuesFormat, limit the maximum segment size in your + * MergePolicy, or send us a patch fixing the limitation. + * + * @lucene.experimental + */ +public class Facet46Codec extends Lucene46Codec { + + private final Set facetFields; + private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName("Facet42"); + + /** Default constructor, uses {@link FacetIndexingParams#DEFAULT}. */ + public Facet46Codec() { + this(FacetIndexingParams.DEFAULT); + } + + /** + * Initializes with the given {@link FacetIndexingParams}. Returns the proper + * {@link DocValuesFormat} for the fields that are returned by + * {@link FacetIndexingParams#getAllCategoryListParams()}. + */ + public Facet46Codec(FacetIndexingParams fip) { + if (fip.getPartitionSize() != Integer.MAX_VALUE) { + throw new IllegalArgumentException("this Codec does not support partitions"); + } + this.facetFields = new HashSet(); + for (CategoryListParams clp : fip.getAllCategoryListParams()) { + facetFields.add(clp.field); + } + } + + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + if (facetFields.contains(field)) { + return facetsDVFormat; + } else { + return super.getDocValuesFormatForField(field); + } + } +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet46/Facet46Codec.java ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet46/package.html =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/codecs/facet46/package.html (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/codecs/facet46/package.html (working copy) @@ -0,0 +1,22 @@ + + + + +Codec + DocValuesFormat that are optimized for facets. + + Property changes on: lucene/facet/src/java/org/apache/lucene/facet/codecs/facet46/package.html ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java (revision 1535645) +++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java (working copy) @@ -3,7 +3,7 @@ import java.util.Random; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.facet.codecs.facet45.Facet45Codec; +import org.apache.lucene.facet.codecs.facet46.Facet46Codec; import org.apache.lucene.facet.encoding.DGapIntEncoder; import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder; import org.apache.lucene.facet.encoding.EightFlagsIntEncoder; @@ -55,7 +55,7 @@ public static void beforeClassFacetTestCase() throws Exception { if (random().nextDouble() < 0.3) { savedDefault = Codec.getDefault(); // save to restore later - Codec.setDefault(new Facet45Codec()); + Codec.setDefault(new Facet46Codec()); } } Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java (revision 1535645) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java (working copy) @@ -31,7 +31,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.FacetTestUtils; -import org.apache.lucene.facet.codecs.facet45.Facet45Codec; +import org.apache.lucene.facet.codecs.facet46.Facet46Codec; import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.params.FacetIndexingParams; @@ -260,7 +260,7 @@ Directory dir = newDirectory(); Directory taxoDir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - iwc.setCodec(new Facet45Codec()); + iwc.setCodec(new Facet46Codec()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); Index: lucene/facet =================================================================== --- lucene/facet (revision 1535645) +++ lucene/facet (working copy) Property changes on: lucene/facet ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/facet:r1527154 Index: lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java (revision 1535645) +++ lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java (working copy) @@ -139,9 +139,8 @@ SegmentInfo info = infoPerCommit.info; // Same info just changing the dir: SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.getDocCount(), - info.getUseCompoundFile(), - info.getCodec(), info.getDiagnostics(), info.attributes()); - destInfos.add(new SegmentInfoPerCommit(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getDelGen())); + info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics()); + destInfos.add(new SegmentInfoPerCommit(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getDelGen(), infoPerCommit.getFieldInfosGen())); // now copy files over Collection files = infoPerCommit.files(); for (final String srcName : files) { Index: lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java =================================================================== --- lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java (revision 1535645) +++ lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java (working copy) @@ -37,7 +37,6 @@ import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.index.TieredMergePolicy; @@ -123,9 +122,23 @@ iw1.commit(); iw2.commit(); final Document doc = randomDocument(); - iw1.addDocument(doc); - iw2.addDocument(doc); + // NOTE: don't use RIW.addDocument directly, since it sometimes commits + // which may trigger a merge, at which case forceMerge may not do anything. + // With field updates this is a problem, since the updates can go into the + // single segment in the index, and threefore the index won't be sorted. + // This hurts the assumption of the test later on, that the index is sorted + // by SortingMP. + iw1.w.addDocument(doc); + iw2.w.addDocument(doc); + if (defaultCodecSupportsFieldUpdates()) { + // update NDV of docs belonging to one term (covers many documents) + final long value = random().nextLong(); + final String term = RandomPicks.randomFrom(random(), terms); + iw1.w.updateNumericDocValue(new Term("s", term), "ndv", value); + iw2.w.updateNumericDocValue(new Term("s", term), "ndv", value); + } + iw1.forceMerge(1); iw2.forceMerge(1); iw1.close(); @@ -146,7 +159,7 @@ private static void assertSorted(AtomicReader reader) throws IOException { final NumericDocValues ndv = reader.getNumericDocValues("ndv"); for (int i = 1; i < reader.maxDoc(); ++i) { - assertTrue(ndv.get(i-1) <= ndv.get(i)); + assertTrue("ndv(" + (i-1) + ")=" + ndv.get(i-1) + ",ndv(" + i + ")=" + ndv.get(i), ndv.get(i-1) <= ndv.get(i)); } } @@ -156,6 +169,7 @@ assertSorted(sortedReader1); assertSorted(sortedReader2); + assertReaderEquals("", sortedReader1, sortedReader2); } Index: lucene/misc =================================================================== --- lucene/misc (revision 1535645) +++ lucene/misc (working copy) Property changes on: lucene/misc ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/misc:r1523461,1527154,1527391 Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (revision 1535645) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (working copy) @@ -34,7 +34,7 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -163,7 +163,7 @@ * codec to use. */ protected IndexWriterConfig getIndexWriterConfig(Version matchVersion, Analyzer indexAnalyzer) { IndexWriterConfig iwc = new IndexWriterConfig(matchVersion, indexAnalyzer); - iwc.setCodec(new Lucene45Codec()); + iwc.setCodec(new Lucene46Codec()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); return iwc; } Index: lucene/suggest =================================================================== --- lucene/suggest (revision 1535645) +++ lucene/suggest (working copy) Property changes on: lucene/suggest ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/suggest:r1527154 Index: lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java (working copy) @@ -23,10 +23,10 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; /** - * Acts like {@link Lucene45Codec} but with additional asserts. + * Acts like {@link Lucene46Codec} but with additional asserts. */ public final class AssertingCodec extends FilterCodec { @@ -37,7 +37,7 @@ private final NormsFormat norms = new AssertingNormsFormat(); public AssertingCodec() { - super("Asserting", new Lucene45Codec()); + super("Asserting", new Lucene46Codec()); } @Override Index: lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat; import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; /** Codec that tries to use as little ram as possible because he spent all his money on beer */ // TODO: better name :) @@ -45,9 +45,10 @@ private final NormsFormat norms = new DiskNormsFormat(); public CheapBastardCodec() { - super("CheapBastard", new Lucene45Codec()); + super("CheapBastard", new Lucene46Codec()); } + @Override public PostingsFormat postingsFormat() { return postings; } Index: lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java (working copy) @@ -23,13 +23,13 @@ import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import com.carrotsearch.randomizedtesting.generators.RandomInts; /** * A codec that uses {@link CompressingStoredFieldsFormat} for its stored - * fields and delegates to {@link Lucene45Codec} for everything else. + * fields and delegates to {@link Lucene46Codec} for everything else. */ public abstract class CompressingCodec extends FilterCodec { @@ -73,7 +73,7 @@ * Creates a compressing codec with a given segment suffix */ public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize) { - super(name, new Lucene45Codec()); + super(name, new Lucene46Codec()); this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize); this.termVectorsFormat = new CompressingTermVectorsFormat(name, segmentSuffix, compressionMode, chunkSize); } Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosReader.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosReader.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosReader.java (working copy) @@ -40,7 +40,7 @@ static final int FORMAT_MINIMUM = PreFlexRWFieldInfosWriter.FORMAT_START; @Override - public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException { + public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException { final String fileName = IndexFileNames.segmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION); IndexInput input = directory.openInput(fileName, iocontext); Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosWriter.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosWriter.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldInfosWriter.java (working copy) @@ -56,7 +56,7 @@ static final byte OMIT_POSITIONS = -128; @Override - public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException { + public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); IndexOutput output = directory.createOutput(fileName, context); boolean success = false; Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java (working copy) @@ -37,6 +37,7 @@ * @see Lucene40FieldInfosFormat * @lucene.experimental */ +@Deprecated public class Lucene40FieldInfosWriter extends FieldInfosWriter { /** Sole constructor. */ @@ -44,7 +45,7 @@ } @Override - public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException { + public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION); IndexOutput output = directory.createOutput(fileName, context); boolean success = false; Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java (working copy) @@ -6,6 +6,7 @@ import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.util.LuceneTestCase; /* @@ -40,6 +41,17 @@ } }; + private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat() { + @Override + public org.apache.lucene.codecs.SegmentInfoWriter getSegmentInfoWriter() { + if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { + return super.getSegmentInfoWriter(); + } else { + return new Lucene40SegmentInfoWriter(); + } + } + }; + private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat(); private final NormsFormat norms = new Lucene40RWNormsFormat(); @@ -57,4 +69,10 @@ public NormsFormat normsFormat() { return norms; } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + return infosFormat; + } + } Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java (revision 0) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java (working copy) @@ -0,0 +1,76 @@ +package org.apache.lucene.codecs.lucene40; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collections; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.SegmentInfoWriter; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.IOUtils; + +/** + * Lucene 4.0 implementation of {@link SegmentInfoWriter}. + * + * @see Lucene40SegmentInfoFormat + * @lucene.experimental + */ +@Deprecated +public class Lucene40SegmentInfoWriter extends SegmentInfoWriter { + + /** Sole constructor. */ + public Lucene40SegmentInfoWriter() { + } + + /** Save a single segment's info. */ + @Override + public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException { + final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene40SegmentInfoFormat.SI_EXTENSION); + si.addFile(fileName); + + final IndexOutput output = dir.createOutput(fileName, ioContext); + + boolean success = false; + try { + CodecUtil.writeHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT); + // Write the Lucene version that created this segment, since 3.1 + output.writeString(si.getVersion()); + output.writeInt(si.getDocCount()); + + output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); + output.writeStringStringMap(si.getDiagnostics()); + output.writeStringStringMap(Collections.emptyMap()); + output.writeStringSet(si.files()); + + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(output); + si.dir.deleteFile(fileName); + } else { + output.close(); + } + } + } +} Property changes on: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java (working copy) @@ -6,11 +6,14 @@ import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat; import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosWriter; import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat; import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat; +import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoWriter; import org.apache.lucene.util.LuceneTestCase; /* @@ -50,6 +53,17 @@ private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat(); private final NormsFormat norms = new Lucene40RWNormsFormat(); + private final SegmentInfoFormat segmentInfosFormat = new Lucene40SegmentInfoFormat() { + @Override + public org.apache.lucene.codecs.SegmentInfoWriter getSegmentInfoWriter() { + if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { + return super.getSegmentInfoWriter(); + } else { + return new Lucene40SegmentInfoWriter(); + } + } + }; + @Override public FieldInfosFormat fieldInfosFormat() { return fieldInfos; @@ -69,4 +83,10 @@ public NormsFormat normsFormat() { return norms; } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + return segmentInfosFormat; + } + } Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java (revision 0) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java (working copy) @@ -0,0 +1,109 @@ +package org.apache.lucene.codecs.lucene42; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldInfosWriter; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.IOUtils; + +/** + * Lucene 4.2 FieldInfos writer. + * + * @see Lucene42FieldInfosFormat + * @lucene.experimental + */ +@Deprecated +public final class Lucene42FieldInfosWriter extends FieldInfosWriter { + + /** Sole constructor. */ + public Lucene42FieldInfosWriter() { + } + + @Override + public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { + final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION); + IndexOutput output = directory.createOutput(fileName, context); + boolean success = false; + try { + CodecUtil.writeHeader(output, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_CURRENT); + output.writeVInt(infos.size()); + for (FieldInfo fi : infos) { + IndexOptions indexOptions = fi.getIndexOptions(); + byte bits = 0x0; + if (fi.hasVectors()) bits |= Lucene42FieldInfosFormat.STORE_TERMVECTOR; + if (fi.omitsNorms()) bits |= Lucene42FieldInfosFormat.OMIT_NORMS; + if (fi.hasPayloads()) bits |= Lucene42FieldInfosFormat.STORE_PAYLOADS; + if (fi.isIndexed()) { + bits |= Lucene42FieldInfosFormat.IS_INDEXED; + assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads(); + if (indexOptions == IndexOptions.DOCS_ONLY) { + bits |= Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; + } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { + bits |= Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; + } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { + bits |= Lucene42FieldInfosFormat.OMIT_POSITIONS; + } + } + output.writeString(fi.name); + output.writeVInt(fi.number); + output.writeByte(bits); + + // pack the DV types in one byte + final byte dv = docValuesByte(fi.getDocValuesType()); + final byte nrm = docValuesByte(fi.getNormType()); + assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; + byte val = (byte) (0xff & ((nrm << 4) | dv)); + output.writeByte(val); + output.writeStringStringMap(fi.attributes()); + } + success = true; + } finally { + if (success) { + output.close(); + } else { + IOUtils.closeWhileHandlingException(output); + } + } + } + + private static byte docValuesByte(DocValuesType type) { + if (type == null) { + return 0; + } else if (type == DocValuesType.NUMERIC) { + return 1; + } else if (type == DocValuesType.BINARY) { + return 2; + } else if (type == DocValuesType.SORTED) { + return 3; + } else if (type == DocValuesType.SORTED_SET) { + return 4; + } else { + throw new AssertionError(); + } + } +} Property changes on: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42RWCodec.java (working copy) @@ -17,16 +17,48 @@ * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat; +import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoWriter; +import org.apache.lucene.util.LuceneTestCase; /** * Read-write version of {@link Lucene42Codec} for testing. */ +@SuppressWarnings("deprecation") public class Lucene42RWCodec extends Lucene42Codec { + private static final DocValuesFormat dv = new Lucene42RWDocValuesFormat(); private static final NormsFormat norms = new Lucene42NormsFormat(); + private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat() { + @Override + public FieldInfosWriter getFieldInfosWriter() throws IOException { + if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { + return super.getFieldInfosWriter(); + } else { + return new Lucene42FieldInfosWriter(); + } + } + }; + + private final SegmentInfoFormat segmentInfosFormat = new Lucene40SegmentInfoFormat() { + @Override + public org.apache.lucene.codecs.SegmentInfoWriter getSegmentInfoWriter() { + if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { + return super.getSegmentInfoWriter(); + } else { + return new Lucene40SegmentInfoWriter(); + } + } + }; + @Override public DocValuesFormat getDocValuesFormatForField(String field) { return dv; @@ -36,4 +68,15 @@ public NormsFormat normsFormat() { return norms; } + + @Override + public FieldInfosFormat fieldInfosFormat() { + return fieldInfosFormat; + } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + return segmentInfosFormat; + } + } Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java (revision 0) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java (working copy) @@ -0,0 +1,69 @@ +package org.apache.lucene.codecs.lucene45; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.FieldInfosWriter; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat; +import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoWriter; +import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat; +import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosWriter; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Read-write version of {@link Lucene45Codec} for testing. + */ +@SuppressWarnings("deprecation") +public class Lucene45RWCodec extends Lucene45Codec { + + private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat() { + @Override + public FieldInfosWriter getFieldInfosWriter() throws IOException { + if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { + return super.getFieldInfosWriter(); + } else { + return new Lucene42FieldInfosWriter(); + } + } + }; + + private final SegmentInfoFormat segmentInfosFormat = new Lucene40SegmentInfoFormat() { + @Override + public org.apache.lucene.codecs.SegmentInfoWriter getSegmentInfoWriter() { + if (!LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE) { + return super.getSegmentInfoWriter(); + } else { + return new Lucene40SegmentInfoWriter(); + } + } + }; + + @Override + public FieldInfosFormat fieldInfosFormat() { + return fieldInfosFormat; + } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + return segmentInfosFormat; + } + +} Property changes on: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/Lucene45RWCodec.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/package.html =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/package.html (revision 0) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/package.html (working copy) @@ -0,0 +1,25 @@ + + + + + + + +Support for testing {@link org.apache.lucene.codecs.lucene45.Lucene45Codec}. + + \ No newline at end of file Property changes on: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene45/package.html ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java (working copy) @@ -438,7 +438,7 @@ // randomly index at lower IndexOption private FieldsProducer buildIndex(Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException { Codec codec = getCodec(); - SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", maxDoc, false, codec, null, null); + SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", maxDoc, false, codec, null); int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed); if (VERBOSE) { Index: lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java (working copy) @@ -31,13 +31,14 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.StoredFieldsFormat; -import org.apache.lucene.codecs.compressing.CompressingCodec; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.FieldType.NumericType; import org.apache.lucene.document.FloatField; import org.apache.lucene.document.IntField; import org.apache.lucene.document.LongField; @@ -44,8 +45,6 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.FieldType.NumericType; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeQuery; @@ -59,7 +58,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; -import org.apache.lucene.util.LuceneTestCase.Nightly; import com.carrotsearch.randomizedtesting.generators.RandomInts; import com.carrotsearch.randomizedtesting.generators.RandomPicks; @@ -78,6 +76,7 @@ */ protected abstract Codec getCodec(); + @Override public void setUp() throws Exception { super.setUp(); // set the default codec, so adding test cases to this isn't fragile @@ -85,6 +84,7 @@ Codec.setDefault(getCodec()); } + @Override public void tearDown() throws Exception { Codec.setDefault(savedCodec); // restore super.tearDown(); @@ -502,7 +502,7 @@ // get another codec, other than the default: so we are merging segments across different codecs final Codec otherCodec; if ("SimpleText".equals(Codec.getDefault().getName())) { - otherCodec = new Lucene45Codec(); + otherCodec = new Lucene46Codec(); } else { otherCodec = new SimpleTextCodec(); } Index: lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (working copy) @@ -27,16 +27,16 @@ import java.util.Random; import java.util.Set; +import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat; import org.apache.lucene.codecs.asserting.AssertingPostingsFormat; +import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings; +import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat; import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; import org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat; -import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings; -import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.codecs.memory.DirectPostingsFormat; import org.apache.lucene.codecs.memory.MemoryDocValuesFormat; import org.apache.lucene.codecs.memory.MemoryPostingsFormat; @@ -46,8 +46,8 @@ import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; import org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat; import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat; +import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat; import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat; -import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -60,7 +60,7 @@ * documents in different orders and the test will still be deterministic * and reproducable. */ -public class RandomCodec extends Lucene45Codec { +public class RandomCodec extends Lucene46Codec { /** Shuffled list of postings formats to use for new mappings */ private List formats = new ArrayList(); Index: lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java (working copy) @@ -221,6 +221,10 @@ w.addIndexes(readers); } + public void updateNumericDocValue(Term term, String field, Long value) throws IOException { + w.updateNumericDocValue(term, field, value); + } + public void deleteDocuments(Term term) throws IOException { w.deleteDocuments(term); } Index: lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -1407,6 +1407,17 @@ } return true; } + + /** Returns true if the codec "supports" field updates. */ + public static boolean defaultCodecSupportsFieldUpdates() { + String name = Codec.getDefault().getName(); + if (name.equals("Lucene3x") || name.equals("Appending") + || name.equals("Lucene40") || name.equals("Lucene41") + || name.equals("Lucene42") || name.equals("Lucene45")) { + return false; + } + return true; + } public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException { assertReaderStatisticsEquals(info, leftReader, rightReader); Index: lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java (working copy) @@ -17,14 +17,23 @@ * limitations under the License. */ +import static org.apache.lucene.util.LuceneTestCase.INFOSTREAM; +import static org.apache.lucene.util.LuceneTestCase.TEST_CODEC; +import static org.apache.lucene.util.LuceneTestCase.TEST_DOCVALUESFORMAT; +import static org.apache.lucene.util.LuceneTestCase.TEST_POSTINGSFORMAT; +import static org.apache.lucene.util.LuceneTestCase.VERBOSE; +import static org.apache.lucene.util.LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE; +import static org.apache.lucene.util.LuceneTestCase.assumeFalse; +import static org.apache.lucene.util.LuceneTestCase.localeForName; +import static org.apache.lucene.util.LuceneTestCase.random; +import static org.apache.lucene.util.LuceneTestCase.randomLocale; +import static org.apache.lucene.util.LuceneTestCase.randomTimeZone; + import java.io.PrintStream; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; -import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Random; @@ -42,22 +51,19 @@ import org.apache.lucene.codecs.lucene40.Lucene40RWCodec; import org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat; import org.apache.lucene.codecs.lucene41.Lucene41RWCodec; -import org.apache.lucene.codecs.lucene42.Lucene42Codec; import org.apache.lucene.codecs.lucene42.Lucene42RWCodec; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene45.Lucene45RWCodec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.index.RandomCodec; import org.apache.lucene.search.RandomSimilarityProvider; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; // javadocs +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.junit.internal.AssumptionViolatedException; + import com.carrotsearch.randomizedtesting.RandomizedContext; -import static org.apache.lucene.util.LuceneTestCase.*; - - - /** * Setup and restore suite-level environment (fine grained junk that * doesn't fit anywhere else). @@ -181,6 +187,13 @@ !shouldAvoidCodec("Lucene42"))) { codec = Codec.forName("Lucene42"); assert codec instanceof Lucene42RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; + } else if ("Lucene45".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && + "random".equals(TEST_POSTINGSFORMAT) && + "random".equals(TEST_DOCVALUESFORMAT) && + randomVal == 5 && + !shouldAvoidCodec("Lucene45"))) { + codec = Codec.forName("Lucene45"); + assert codec instanceof Lucene45RWCodec : "fix your classpath to have tests-framework.jar before lucene-core.jar"; } else if (("random".equals(TEST_POSTINGSFORMAT) == false) || ("random".equals(TEST_DOCVALUESFORMAT) == false)) { // the user wired postings or DV: this is messy // refactor into RandomCodec.... @@ -199,7 +212,7 @@ dvFormat = DocValuesFormat.forName(TEST_DOCVALUESFORMAT); } - codec = new Lucene45Codec() { + codec = new Lucene46Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return format; Index: lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (revision 1535645) +++ lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (working copy) @@ -44,7 +44,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; import org.apache.lucene.document.BinaryDocValuesField; @@ -54,18 +54,17 @@ import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus; import org.apache.lucene.index.CheckIndex.Status.FieldNormStatus; import org.apache.lucene.index.CheckIndex.Status.StoredFieldStatus; import org.apache.lucene.index.CheckIndex.Status.TermIndexStatus; import org.apache.lucene.index.CheckIndex.Status.TermVectorStatus; -import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; @@ -73,7 +72,8 @@ import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeScheduler; import org.apache.lucene.index.MultiFields; -import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentInfoPerCommit; +import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TieredMergePolicy; @@ -82,9 +82,7 @@ import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; import org.junit.Assert; import com.carrotsearch.randomizedtesting.RandomizedContext; import com.carrotsearch.randomizedtesting.generators.RandomInts; @@ -697,7 +695,7 @@ if (LuceneTestCase.VERBOSE) { System.out.println("forcing postings format to:" + format); } - return new Lucene45Codec() { + return new Lucene46Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return format; @@ -715,7 +713,7 @@ if (LuceneTestCase.VERBOSE) { System.out.println("forcing docvalues format to:" + format); } - return new Lucene45Codec() { + return new Lucene46Codec() { @Override public DocValuesFormat getDocValuesFormatForField(String field) { return format; @@ -975,27 +973,6 @@ } } - public static FieldInfos getFieldInfos(SegmentInfo info) throws IOException { - Directory cfsDir = null; - try { - if (info.getUseCompoundFile()) { - cfsDir = new CompoundFileDirectory(info.dir, - IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), - IOContext.READONCE, - false); - } else { - cfsDir = info.dir; - } - return info.getCodec().fieldInfosFormat().getFieldInfosReader().read(cfsDir, - info.name, - IOContext.READONCE); - } finally { - if (info.getUseCompoundFile() && cfsDir != null) { - cfsDir.close(); - } - } - } - /** * Returns a valid (compiling) Pattern instance with random stuff inside. Be careful * when applying random patterns to longer strings as certain types of patterns Index: lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec =================================================================== --- lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (revision 1535645) +++ lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (working copy) @@ -24,3 +24,4 @@ org.apache.lucene.codecs.lucene40.Lucene40RWCodec org.apache.lucene.codecs.lucene41.Lucene41RWCodec org.apache.lucene.codecs.lucene42.Lucene42RWCodec +org.apache.lucene.codecs.lucene45.Lucene45RWCodec Index: lucene/test-framework =================================================================== --- lucene/test-framework (revision 1535645) +++ lucene/test-framework (working copy) Property changes on: lucene/test-framework ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/test-framework:r1523461,1527154,1527332,1527391,1531620 Index: lucene =================================================================== --- lucene (revision 1535645) +++ lucene (working copy) Property changes on: lucene ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene:r1523461,1523477,1523525,1524900-1524901,1525231,1527147,1527154,1527332,1527361,1527391,1527460,1528076,1528837,1529611,1531496,1531620,1532670,1535526 Index: solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java (revision 1535645) +++ solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java (working copy) @@ -3,7 +3,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene45.Lucene45Codec; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.solr.common.util.NamedList; import org.apache.solr.schema.SchemaField; import org.apache.solr.util.plugin.SolrCoreAware; @@ -51,7 +51,7 @@ @Override public void init(NamedList args) { super.init(args); - codec = new Lucene45Codec() { + codec = new Lucene46Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { final SchemaField fieldOrNull = core.getLatestSchema().getFieldOrNull(field); Index: solr/core =================================================================== --- solr/core (revision 1535645) +++ solr/core (working copy) Property changes on: solr/core ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/core:r1527154 Index: solr =================================================================== --- solr (revision 1535645) +++ solr (working copy) Property changes on: solr ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr:r1527154 Index: . =================================================================== --- . (revision 1535645) +++ . (working copy) Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk:r1523461,1523477,1523525,1524900-1524901,1525231,1527147,1527154,1527332,1527361,1527391,1527460,1528076,1528837,1529611,1531496,1531620,1532670,1535526