Index: modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
===================================================================
--- modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java	(revision 1128767)
+++ modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java	(working copy)
@@ -34,7 +34,7 @@
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.English;
 import org.apache.lucene.util.LuceneTestCase;
Index: modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java	(revision 1128767)
+++ modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java	(working copy)
@@ -40,7 +40,7 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.ReaderUtil;
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java	(revision 1128767)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java	(working copy)
@@ -119,11 +119,11 @@
 
   /** Return the offsetGap from the analyzer assigned to field */
   @Override
-  public int getOffsetGap(Fieldable field) {
-    Analyzer analyzer = analyzerMap.get(field.name());
+  public int getOffsetGap(String fieldName, boolean isTokenized) {
+    Analyzer analyzer = analyzerMap.get(fieldName);
     if (analyzer == null)
       analyzer = defaultAnalyzer;
-    return analyzer.getOffsetGap(field);
+    return analyzer.getOffsetGap(fieldName, isTokenized);
   }
   
   @Override
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java	(revision 1128767)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java	(working copy)
@@ -60,8 +60,8 @@
   }
 
   @Override
-  public int getOffsetGap(Fieldable field) {
-    return delegate.getOffsetGap(field);
+  public int getOffsetGap(String fieldName, boolean isTokenized) {
+    return delegate.getOffsetGap(fieldName, isTokenized);
   }
   
   @Override
Index: lucene/common-build.xml
===================================================================
--- lucene/common-build.xml	(revision 1128767)
+++ lucene/common-build.xml	(working copy)
@@ -50,6 +50,7 @@
   <property name="spec.version" value="${version}"/>	
   <property name="year" value="2000-${current.year}"/>
   <property name="final.name" value="lucene-${name}-${version}"/>
+  <property name="decls.final.name" value="lucene-${name}-decls-${version}"/>
 
   <property name="junit.jar" value="junit-4.7.jar"/>
   <property name="junit-location.jar" value="${common.dir}/lib/${junit.jar}"/>
@@ -114,7 +115,7 @@
 
   <property name="project.name" value="site"/> <!-- todo: is this used by anakia or something else? -->
   <property name="build.encoding" value="utf-8"/>
-
+  <property name="declarations.src.dir" location="src/declarations"/>
   <property name="src.dir" location="src/java"/>
   <property name="tests.src.dir" location="src/test"/>
   <property name="tests-framework.src.dir" location="${common.dir}/src/test-framework"/>
@@ -296,6 +297,7 @@
 
   <target name="compile-core" depends="init, clover"
           description="Compiles core classes">
+
     <compile
       srcdir="${src.dir}"
       destdir="${build.dir}/classes/java">
@@ -308,13 +310,20 @@
       <fileset dir="src" includes="resources/**"/>
     </copy>
   </target>
-
-  <target name="compile" depends="compile-core">
+  <target name="compile-declarations">
+    <compile
+      srcdir="${declarations.src.dir}"
+      destdir="${build.dir}/classes/declarations">
+      <classpath refid="classpath"/>
+    </compile>
+  </target>
+  <target name="compile" depends="compile-declarations, compile-core">
     <!-- convenience target to compile core -->
   </target>
 
   <target name="jar-core" depends="compile-core"
     description="Packages the JAR file">
+    <jarify basedir="${declarations.src.dir}" destfile="${build.dir}/${decls.final.name}.jar"/>
     <jarify/>
   </target>
 
Index: lucene/src/test/org/apache/lucene/index/TestIndexReader.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexReader.java	(revision 1128767)
+++ lucene/src/test/org/apache/lucene/index/TestIndexReader.java	(working copy)
@@ -29,6 +29,8 @@
 import java.util.HashMap;
 import java.util.Set;
 import java.util.SortedSet;
+
+import org.apache.lucene.util.*;
 import org.junit.Assume;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
@@ -46,17 +48,13 @@
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.store.NoSuchDirectoryException;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.store.LockReleaseFailedException;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.Bits;
 
 public class TestIndexReader extends LuceneTestCase
 {
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java	(revision 1128767)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java	(working copy)
@@ -58,7 +58,7 @@
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.Lock;
@@ -67,12 +67,7 @@
 import org.apache.lucene.store.NoLockFactory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.store.SingleInstanceLockFactory;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CharsRef;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.ThreadInterruptedException;
-import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.*;
 
 public class TestIndexWriter extends LuceneTestCase {
 
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java	(revision 1128767)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java	(working copy)
@@ -22,13 +22,10 @@
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.ThreadInterruptedException;
+import org.apache.lucene.util.*;
 
 /**
  * MultiThreaded IndexWriter tests
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java	(revision 1128767)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java	(working copy)
@@ -37,12 +37,10 @@
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.ThreadInterruptedException;
+import org.apache.lucene.util.*;
+
 import java.util.concurrent.atomic.AtomicInteger;
 
 public class TestIndexWriterReader extends LuceneTestCase {
Index: lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestFieldsReader.java	(revision 1128767)
+++ lucene/src/test/org/apache/lucene/index/TestFieldsReader.java	(working copy)
@@ -32,7 +32,7 @@
 import org.apache.lucene.document.SetBasedFieldSelector;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.search.FieldCache;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
Index: lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java	(revision 1128767)
+++ lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java	(working copy)
@@ -39,14 +39,10 @@
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.SimilarityProvider;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BitVector;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.*;
 
 public class TestIndexReaderReopen extends LuceneTestCase {
     
Index: lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestAddIndexes.java	(revision 1128767)
+++ lucene/src/test/org/apache/lucene/index/TestAddIndexes.java	(working copy)
@@ -35,7 +35,7 @@
 import org.apache.lucene.index.codecs.standard.StandardCodec;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.store.RAMDirectory;
Index: lucene/src/test/org/apache/lucene/store/TestDirectory.java
===================================================================
--- lucene/src/test/org/apache/lucene/store/TestDirectory.java	(revision 1128767)
+++ lucene/src/test/org/apache/lucene/store/TestDirectory.java	(working copy)
@@ -17,6 +17,7 @@
  * limitations under the License.
  */
 
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
 
Index: lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java	(working copy)
@@ -1,312 +0,0 @@
-package org.apache.lucene.analysis;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.Attribute;
-import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.AttributeReflector;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.NumericUtils;
-import org.apache.lucene.document.NumericField; // for javadocs
-import org.apache.lucene.search.NumericRangeQuery; // for javadocs
-import org.apache.lucene.search.NumericRangeFilter; // for javadocs
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-/**
- * <b>Expert:</b> This class provides a {@link TokenStream}
- * for indexing numeric values that can be used by {@link
- * NumericRangeQuery} or {@link NumericRangeFilter}.
- *
- * <p>Note that for simple usage, {@link NumericField} is
- * recommended.  {@link NumericField} disables norms and
- * term freqs, as they are not usually needed during
- * searching.  If you need to change these settings, you
- * should use this class.
- *
- * <p>See {@link NumericField} for capabilities of fields
- * indexed numerically.</p>
- *
- * <p>Here's an example usage, for an <code>int</code> field:
- *
- * <pre>
- *  Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
- *  field.setOmitNorms(true);
- *  field.setOmitTermFreqAndPositions(true);
- *  document.add(field);
- * </pre>
- *
- * <p>For optimal performance, re-use the TokenStream and Field instance
- * for more than one document:
- *
- * <pre>
- *  NumericTokenStream stream = new NumericTokenStream(precisionStep);
- *  Field field = new Field(name, stream);
- *  field.setOmitNorms(true);
- *  field.setOmitTermFreqAndPositions(true);
- *  Document document = new Document();
- *  document.add(field);
- *
- *  for(all documents) {
- *    stream.setIntValue(value)
- *    writer.addDocument(document);
- *  }
- * </pre>
- *
- * <p>This stream is not intended to be used in analyzers;
- * it's more for iterating the different precisions during
- * indexing a specific numeric value.</p>
-
- * <p><b>NOTE</b>: as token streams are only consumed once
- * the document is added to the index, if you index more
- * than one numeric field, use a separate <code>NumericTokenStream</code>
- * instance for each.</p>
- *
- * <p>See {@link NumericRangeQuery} for more details on the
- * <a
- * href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
- * parameter as well as how numeric fields work under the hood.</p>
- *
- * @since 2.9
- */
-public final class NumericTokenStream extends TokenStream {
-
-  /** The full precision token gets this token type assigned. */
-  public static final String TOKEN_TYPE_FULL_PREC  = "fullPrecNumeric";
-
-  /** The lower precision tokens gets this token type assigned. */
-  public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
-  
-  /** <b>Expert:</b> Use this attribute to get the details of the currently generated token.
-   * @lucene.experimental
-   * @since 4.0
-   */
-  public interface NumericTermAttribute extends Attribute {
-    /** Returns current shift value, undefined before first token */
-    int getShift();
-    /** Returns current token's raw value as {@code long} with all {@link #getShift} applied, undefined before first token */
-    long getRawValue();
-    /** Returns value size in bits (32 for {@code float}, {@code int}; 64 for {@code double}, {@code long}) */
-    int getValueSize();
-    
-    /** <em>Don't call this method!</em>
-      * @lucene.internal */
-    void init(long value, int valSize, int precisionStep, int shift);
-
-    /** <em>Don't call this method!</em>
-      * @lucene.internal */
-    void setShift(int shift);
-
-    /** <em>Don't call this method!</em>
-      * @lucene.internal */
-    int incShift();
-  }
-  
-  // just a wrapper to prevent adding CTA
-  private static final class NumericAttributeFactory extends AttributeFactory {
-    private final AttributeFactory delegate;
-
-    NumericAttributeFactory(AttributeFactory delegate) {
-      this.delegate = delegate;
-    }
-  
-    @Override
-    public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
-      if (CharTermAttribute.class.isAssignableFrom(attClass))
-        throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute.");
-      return delegate.createAttributeInstance(attClass);
-    }
-  }
-
-  /** Implementation of {@link NumericTermAttribute}.
-   * @lucene.internal
-   * @since 4.0
-   */
-  public static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute {
-    private long value = 0L;
-    private int valueSize = 0, shift = 0, precisionStep = 0;
-    private BytesRef bytes = new BytesRef();
-
-    public BytesRef getBytesRef() {
-      return bytes;
-    }
-    
-    public int fillBytesRef() {
-      try {
-        assert valueSize == 64 || valueSize == 32;
-        return (valueSize == 64) ? 
-          NumericUtils.longToPrefixCoded(value, shift, bytes) :
-          NumericUtils.intToPrefixCoded((int) value, shift, bytes);
-      } catch (IllegalArgumentException iae) {
-        // return empty token before first or after last
-        bytes.length = 0;
-        return 0;
-      }
-    }
-
-    public int getShift() { return shift; }
-    public void setShift(int shift) { this.shift = shift; }
-    public int incShift() {
-      return (shift += precisionStep);
-    }
-
-    public long getRawValue() { return value  & ~((1L << shift) - 1L); }
-    public int getValueSize() { return valueSize; }
-
-    public void init(long value, int valueSize, int precisionStep, int shift) {
-      this.value = value;
-      this.valueSize = valueSize;
-      this.precisionStep = precisionStep;
-      this.shift = shift;
-    }
-
-    @Override
-    public void clear() {
-      // this attribute has no contents to clear!
-      // we keep it untouched as it's fully controlled by outer class.
-    }
-    
-    @Override
-    public void reflectWith(AttributeReflector reflector) {
-      fillBytesRef();
-      reflector.reflect(TermToBytesRefAttribute.class, "bytes", new BytesRef(bytes));
-      reflector.reflect(NumericTermAttribute.class, "shift", shift);
-      reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());
-      reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize);
-    }
-  
-    @Override
-    public void copyTo(AttributeImpl target) {
-      final NumericTermAttribute a = (NumericTermAttribute) target;
-      a.init(value, valueSize, precisionStep, shift);
-    }
-  }
-  
-  /**
-   * Creates a token stream for numeric values using the default <code>precisionStep</code>
-   * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized,
-   * before using set a value using the various set<em>???</em>Value() methods.
-   */
-  public NumericTokenStream() {
-    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, NumericUtils.PRECISION_STEP_DEFAULT);
-  }
-  
-  /**
-   * Creates a token stream for numeric values with the specified
-   * <code>precisionStep</code>. The stream is not yet initialized,
-   * before using set a value using the various set<em>???</em>Value() methods.
-   */
-  public NumericTokenStream(final int precisionStep) {
-    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, precisionStep);
-  }
-
-  /**
-   * Expert: Creates a token stream for numeric values with the specified
-   * <code>precisionStep</code> using the given
-   * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
-   * The stream is not yet initialized,
-   * before using set a value using the various set<em>???</em>Value() methods.
-   */
-  public NumericTokenStream(AttributeFactory factory, final int precisionStep) {
-    super(new NumericAttributeFactory(factory));
-    if (precisionStep < 1)
-      throw new IllegalArgumentException("precisionStep must be >=1");
-    this.precisionStep = precisionStep;
-    numericAtt.setShift(-precisionStep);
-  }
-
-  /**
-   * Initializes the token stream with the supplied <code>long</code> value.
-   * @param value the value, for which this TokenStream should enumerate tokens.
-   * @return this instance, because of this you can use it the following way:
-   * <code>new Field(name, new NumericTokenStream(precisionStep).setLongValue(value))</code>
-   */
-  public NumericTokenStream setLongValue(final long value) {
-    numericAtt.init(value, valSize = 64, precisionStep, -precisionStep);
-    return this;
-  }
-  
-  /**
-   * Initializes the token stream with the supplied <code>int</code> value.
-   * @param value the value, for which this TokenStream should enumerate tokens.
-   * @return this instance, because of this you can use it the following way:
-   * <code>new Field(name, new NumericTokenStream(precisionStep).setIntValue(value))</code>
-   */
-  public NumericTokenStream setIntValue(final int value) {
-    numericAtt.init(value, valSize = 32, precisionStep, -precisionStep);
-    return this;
-  }
-  
-  /**
-   * Initializes the token stream with the supplied <code>double</code> value.
-   * @param value the value, for which this TokenStream should enumerate tokens.
-   * @return this instance, because of this you can use it the following way:
-   * <code>new Field(name, new NumericTokenStream(precisionStep).setDoubleValue(value))</code>
-   */
-  public NumericTokenStream setDoubleValue(final double value) {
-    numericAtt.init(NumericUtils.doubleToSortableLong(value), valSize = 64, precisionStep, -precisionStep);
-    return this;
-  }
-  
-  /**
-   * Initializes the token stream with the supplied <code>float</code> value.
-   * @param value the value, for which this TokenStream should enumerate tokens.
-   * @return this instance, because of this you can use it the following way:
-   * <code>new Field(name, new NumericTokenStream(precisionStep).setFloatValue(value))</code>
-   */
-  public NumericTokenStream setFloatValue(final float value) {
-    numericAtt.init(NumericUtils.floatToSortableInt(value), valSize = 32, precisionStep, -precisionStep);
-    return this;
-  }
-  
-  @Override
-  public void reset() {
-    if (valSize == 0)
-      throw new IllegalStateException("call set???Value() before usage");
-    numericAtt.setShift(-precisionStep);
-  }
-
-  @Override
-  public boolean incrementToken() {
-    if (valSize == 0)
-      throw new IllegalStateException("call set???Value() before usage");
-    
-    // this will only clear all other attributes in this TokenStream
-    clearAttributes();
-
-    final int shift = numericAtt.incShift();
-    typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
-    posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
-    return (shift < valSize);
-  }
-
-  /** Returns the precision step. */
-  public int getPrecisionStep() {
-    return precisionStep;
-  }
-  
-  // members
-  private final NumericTermAttribute numericAtt = addAttribute(NumericTermAttribute.class);
-  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
-  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  
-  private int valSize = 0; // valSize==0 means not initialized
-  private final int precisionStep;
-}
Index: lucene/src/java/org/apache/lucene/analysis/CharReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/CharReader.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/CharReader.java	(working copy)
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis;
-
-import java.io.IOException;
-import java.io.Reader;
-
-/**
- * CharReader is a Reader wrapper. It reads chars from
- * Reader and outputs {@link CharStream}, defining an
- * identify function {@link #correctOffset} method that
- * simply returns the provided offset.
- */
-public final class CharReader extends CharStream {
-
-  private final Reader input;
-  
-  public static CharStream get(Reader input) {
-    return input instanceof CharStream ?
-      (CharStream)input : new CharReader(input);
-  }
-
-  private CharReader(Reader in) {
-    input = in;
-  }
-
-  @Override
-  public int correctOffset(int currentOff) {
-    return currentOff;
-  }
-
-  @Override
-  public void close() throws IOException {
-    input.close();
-  }
-
-  @Override
-  public int read(char[] cbuf, int off, int len) throws IOException {
-    return input.read(cbuf, off, len);
-  }
-
-  @Override
-  public boolean markSupported(){
-    return input.markSupported();
-  }
-
-  @Override
-  public void mark( int readAheadLimit ) throws IOException {
-    input.mark(readAheadLimit);
-  }
-
-  @Override
-  public void reset() throws IOException {
-    input.reset();
-  }
-}
Index: lucene/src/java/org/apache/lucene/analysis/Token.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/Token.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/Token.java	(working copy)
@@ -1,647 +0,0 @@
-package org.apache.lucene.analysis;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.index.Payload;
-import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
-import org.apache.lucene.util.Attribute;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.AttributeReflector;
-
-/** 
-  A Token is an occurrence of a term from the text of a field.  It consists of
-  a term's text, the start and end offset of the term in the text of the field,
-  and a type string.
-  <p>
-  The start and end offsets permit applications to re-associate a token with
-  its source text, e.g., to display highlighted query terms in a document
-  browser, or to show matching text fragments in a <abbr title="KeyWord In Context">KWIC</abbr>
-  display, etc.
-  <p>
-  The type is a string, assigned by a lexical analyzer
-  (a.k.a. tokenizer), naming the lexical or syntactic class that the token
-  belongs to.  For example an end of sentence marker token might be implemented
-  with type "eos".  The default token type is "word".  
-  <p>
-  A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
-  length byte array. Use {@link DocsAndPositionsEnum#getPayload()} to retrieve the 
-  payloads from the index.
-  
-  <br><br>
-  
-  <p><b>NOTE:</b> As of 2.9, Token implements all {@link Attribute} interfaces
-  that are part of core Lucene and can be found in the {@code tokenattributes} subpackage.
-  Even though it is not necessary to use Token anymore, with the new TokenStream API it can
-  be used as convenience class that implements all {@link Attribute}s, which is especially useful
-  to easily switch from the old to the new TokenStream API.
-  
-  <br><br>
-  
-  <p>Tokenizers and TokenFilters should try to re-use a Token
-  instance when possible for best performance, by
-  implementing the {@link TokenStream#incrementToken()} API.
-  Failing that, to create a new Token you should first use
-  one of the constructors that starts with null text.  To load
-  the token from a char[] use {@link #copyBuffer(char[], int, int)}.
-  To load from a String use {@link #setEmpty} followed by {@link #append(CharSequence)} or {@link #append(CharSequence, int, int)}.
-  Alternatively you can get the Token's termBuffer by calling either {@link #buffer()},
-  if you know that your text is shorter than the capacity of the termBuffer
-  or {@link #resizeBuffer(int)}, if there is any possibility
-  that you may need to grow the buffer. Fill in the characters of your term into this
-  buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
-  or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setLength(int)} to
-  set the length of the term text.  See <a target="_top"
-  href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
-  for details.</p>
-  <p>Typical Token reuse patterns:
-  <ul>
-  <li> Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
-  <pre>
-    return reusableToken.reinit(string, startOffset, endOffset[, type]);
-  </pre>
-  </li>
-  <li> Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
-  <pre>
-    return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
-  </pre>
-  </li>
-  </li>
-  <li> Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
-  <pre>
-    return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
-  </pre>
-  </li>
-  <li> Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
-  <pre>
-    return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
-  </pre>
-  </li>
-  <li> Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
-  <pre>
-    return reusableToken.reinit(source.buffer(), 0, source.length(), source.startOffset(), source.endOffset()[, source.type()]);
-  </pre>
-  </li>
-  </ul>
-  A few things to note:
-  <ul>
-  <li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
-  <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
-  <li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
-  <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
-  </ul>
-  </p>
-  <p>
-  <b>Please note:</b> With Lucene 3.1, the <code>{@linkplain #toString toString()}</code> method had to be changed to match the
-  {@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
-  This method now only prints the term text, no additional information anymore.
-  </p>
-  @see org.apache.lucene.index.Payload
-*/
-public class Token extends CharTermAttributeImpl 
-                   implements TypeAttribute, PositionIncrementAttribute,
-                              FlagsAttribute, OffsetAttribute, PayloadAttribute {
-
-  private int startOffset,endOffset;
-  private String type = DEFAULT_TYPE;
-  private int flags;
-  private Payload payload;
-  private int positionIncrement = 1;
-
-  /** Constructs a Token will null text. */
-  public Token() {
-  }
-
-  /** Constructs a Token with null text and start & end
-   *  offsets.
-   *  @param start start offset in the source text
-   *  @param end end offset in the source text */
-  public Token(int start, int end) {
-    startOffset = start;
-    endOffset = end;
-  }
-
-  /** Constructs a Token with null text and start & end
-   *  offsets plus the Token type.
-   *  @param start start offset in the source text
-   *  @param end end offset in the source text
-   *  @param typ the lexical type of this Token */
-  public Token(int start, int end, String typ) {
-    startOffset = start;
-    endOffset = end;
-    type = typ;
-  }
-
-  /**
-   * Constructs a Token with null text and start & end
-   *  offsets plus flags. NOTE: flags is EXPERIMENTAL.
-   *  @param start start offset in the source text
-   *  @param end end offset in the source text
-   *  @param flags The bits to set for this token
-   */
-  public Token(int start, int end, int flags) {
-    startOffset = start;
-    endOffset = end;
-    this.flags = flags;
-  }
-
-  /** Constructs a Token with the given term text, and start
-   *  & end offsets.  The type defaults to "word."
-   *  <b>NOTE:</b> for better indexing speed you should
-   *  instead use the char[] termBuffer methods to set the
-   *  term text.
-   *  @param text term text
-   *  @param start start offset
-   *  @param end end offset
-   */
-  public Token(String text, int start, int end) {
-    append(text);
-    startOffset = start;
-    endOffset = end;
-  }
-
-  /** Constructs a Token with the given text, start and end
-   *  offsets, & type.  <b>NOTE:</b> for better indexing
-   *  speed you should instead use the char[] termBuffer
-   *  methods to set the term text.
-   *  @param text term text
-   *  @param start start offset
-   *  @param end end offset
-   *  @param typ token type
-   */
-  public Token(String text, int start, int end, String typ) {
-    append(text);
-    startOffset = start;
-    endOffset = end;
-    type = typ;
-  }
-
-  /**
-   *  Constructs a Token with the given text, start and end
-   *  offsets, & type.  <b>NOTE:</b> for better indexing
-   *  speed you should instead use the char[] termBuffer
-   *  methods to set the term text.
-   * @param text
-   * @param start
-   * @param end
-   * @param flags token type bits
-   */
-  public Token(String text, int start, int end, int flags) {
-    append(text);
-    startOffset = start;
-    endOffset = end;
-    this.flags = flags;
-  }
-
-  /**
-   *  Constructs a Token with the given term buffer (offset
-   *  & length), start and end
-   *  offsets
-   * @param startTermBuffer
-   * @param termBufferOffset
-   * @param termBufferLength
-   * @param start
-   * @param end
-   */
-  public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
-    copyBuffer(startTermBuffer, termBufferOffset, termBufferLength);
-    startOffset = start;
-    endOffset = end;
-  }
-
-  /** Set the position increment.  This determines the position of this token
-   * relative to the previous Token in a {@link TokenStream}, used in phrase
-   * searching.
-   *
-   * <p>The default value is one.
-   *
-   * <p>Some common uses for this are:<ul>
-   *
-   * <li>Set it to zero to put multiple terms in the same position.  This is
-   * useful if, e.g., a word has multiple stems.  Searches for phrases
-   * including either stem will match.  In this case, all but the first stem's
-   * increment should be set to zero: the increment of the first instance
-   * should be one.  Repeating a token with an increment of zero can also be
-   * used to boost the scores of matches on that token.
-   *
-   * <li>Set it to values greater than one to inhibit exact phrase matches.
-   * If, for example, one does not want phrases to match across removed stop
-   * words, then one could build a stop word filter that removes stop words and
-   * also sets the increment to the number of stop words removed before each
-   * non-stop word.  Then exact phrase queries will only match when the terms
-   * occur with no intervening stop words.
-   *
-   * </ul>
-   * @param positionIncrement the distance from the prior term
-   * @see org.apache.lucene.index.DocsAndPositionsEnum
-   */
-  public void setPositionIncrement(int positionIncrement) {
-    if (positionIncrement < 0)
-      throw new IllegalArgumentException
-        ("Increment must be zero or greater: " + positionIncrement);
-    this.positionIncrement = positionIncrement;
-  }
-
-  /** Returns the position increment of this Token.
-   * @see #setPositionIncrement
-   */
-  public int getPositionIncrement() {
-    return positionIncrement;
-  }
-
-  /** Returns this Token's starting offset, the position of the first character
-    corresponding to this token in the source text.
-
-    Note that the difference between endOffset() and startOffset() may not be
-    equal to {@link #length}, as the term text may have been altered by a
-    stemmer or some other filter. */
-  public final int startOffset() {
-    return startOffset;
-  }
-
-  /** Set the starting offset.
-      @see #startOffset() */
-  public void setStartOffset(int offset) {
-    this.startOffset = offset;
-  }
-
-  /** Returns this Token's ending offset, one greater than the position of the
-    last character corresponding to this token in the source text. The length
-    of the token in the source text is (endOffset - startOffset). */
-  public final int endOffset() {
-    return endOffset;
-  }
-
-  /** Set the ending offset.
-      @see #endOffset() */
-  public void setEndOffset(int offset) {
-    this.endOffset = offset;
-  }
-  
-  /** Set the starting and ending offset.
-  @see #startOffset() and #endOffset()*/
-  public void setOffset(int startOffset, int endOffset) {
-    this.startOffset = startOffset;
-    this.endOffset = endOffset;
-  }
-
-  /** Returns this Token's lexical type.  Defaults to "word". */
-  public final String type() {
-    return type;
-  }
-
-  /** Set the lexical type.
-      @see #type() */
-  public final void setType(String type) {
-    this.type = type;
-  }
-
-  /**
-   * <p/>
-   *
-   * Get the bitset for any bits that have been set.  This is completely distinct from {@link #type()}, although they do share similar purposes.
-   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
-   *
-   * 
-   * @return The bits
-   * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
-   */
-  public int getFlags() {
-    return flags;
-  }
-
-  /**
-   * @see #getFlags()
-   */
-  public void setFlags(int flags) {
-    this.flags = flags;
-  }
-
-  /**
-   * Returns this Token's payload.
-   */ 
-  public Payload getPayload() {
-    return this.payload;
-  }
-
-  /** 
-   * Sets this Token's payload.
-   */
-  public void setPayload(Payload payload) {
-    this.payload = payload;
-  }
-  
-  /** Resets the term text, payload, flags, and positionIncrement,
-   * startOffset, endOffset and token type to default.
-   */
-  @Override
-  public void clear() {
-    super.clear();
-    payload = null;
-    positionIncrement = 1;
-    flags = 0;
-    startOffset = endOffset = 0;
-    type = DEFAULT_TYPE;
-  }
-
-  @Override
-  public Object clone() {
-    Token t = (Token)super.clone();
-    // Do a deep clone
-    if (payload != null) {
-      t.payload = (Payload) payload.clone();
-    }
-    return t;
-  }
-
-  /** Makes a clone, but replaces the term buffer &
-   * start/end offset in the process.  This is more
-   * efficient than doing a full clone (and then calling
-   * {@link #copyBuffer}) because it saves a wasted copy of the old
-   * termBuffer. */
-  public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
-    final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
-    t.positionIncrement = positionIncrement;
-    t.flags = flags;
-    t.type = type;
-    if (payload != null)
-      t.payload = (Payload) payload.clone();
-    return t;
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (obj == this)
-      return true;
-
-    if (obj instanceof Token) {
-      final Token other = (Token) obj;
-      return (startOffset == other.startOffset &&
-          endOffset == other.endOffset && 
-          flags == other.flags &&
-          positionIncrement == other.positionIncrement &&
-          (type == null ? other.type == null : type.equals(other.type)) &&
-          (payload == null ? other.payload == null : payload.equals(other.payload)) &&
-          super.equals(obj)
-      );
-    } else
-      return false;
-  }
-
-  @Override
-  public int hashCode() {
-    int code = super.hashCode();
-    code = code * 31 + startOffset;
-    code = code * 31 + endOffset;
-    code = code * 31 + flags;
-    code = code * 31 + positionIncrement;
-    if (type != null)
-      code = code * 31 + type.hashCode();
-    if (payload != null)
-      code = code * 31 + payload.hashCode();
-    return code;
-  }
-      
-  // like clear() but doesn't clear termBuffer/text
-  private void clearNoTermBuffer() {
-    payload = null;
-    positionIncrement = 1;
-    flags = 0;
-    startOffset = endOffset = 0;
-    type = DEFAULT_TYPE;
-  }
-
-  /** Shorthand for calling {@link #clear},
-   *  {@link #copyBuffer(char[], int, int)},
-   *  {@link #setStartOffset},
-   *  {@link #setEndOffset},
-   *  {@link #setType}
-   *  @return this Token instance */
-  public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
-    clearNoTermBuffer();
-    copyBuffer(newTermBuffer, newTermOffset, newTermLength);
-    payload = null;
-    positionIncrement = 1;
-    startOffset = newStartOffset;
-    endOffset = newEndOffset;
-    type = newType;
-    return this;
-  }
-
-  /** Shorthand for calling {@link #clear},
-   *  {@link #copyBuffer(char[], int, int)},
-   *  {@link #setStartOffset},
-   *  {@link #setEndOffset}
-   *  {@link #setType} on Token.DEFAULT_TYPE
-   *  @return this Token instance */
-  public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
-    clearNoTermBuffer();
-    copyBuffer(newTermBuffer, newTermOffset, newTermLength);
-    startOffset = newStartOffset;
-    endOffset = newEndOffset;
-    type = DEFAULT_TYPE;
-    return this;
-  }
-
-  /** Shorthand for calling {@link #clear},
-   *  {@link #append(CharSequence)},
-   *  {@link #setStartOffset},
-   *  {@link #setEndOffset}
-   *  {@link #setType}
-   *  @return this Token instance */
-  public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) {
-    clear();
-    append(newTerm);
-    startOffset = newStartOffset;
-    endOffset = newEndOffset;
-    type = newType;
-    return this;
-  }
-
-  /** Shorthand for calling {@link #clear},
-   *  {@link #append(CharSequence, int, int)},
-   *  {@link #setStartOffset},
-   *  {@link #setEndOffset}
-   *  {@link #setType}
-   *  @return this Token instance */
-  public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
-    clear();
-    append(newTerm, newTermOffset, newTermOffset + newTermLength);
-    startOffset = newStartOffset;
-    endOffset = newEndOffset;
-    type = newType;
-    return this;
-  }
-
-  /** Shorthand for calling {@link #clear},
-   *  {@link #append(CharSequence)},
-   *  {@link #setStartOffset},
-   *  {@link #setEndOffset}
-   *  {@link #setType} on Token.DEFAULT_TYPE
-   *  @return this Token instance */
-  public Token reinit(String newTerm, int newStartOffset, int newEndOffset) {
-    clear();
-    append(newTerm);
-    startOffset = newStartOffset;
-    endOffset = newEndOffset;
-    type = DEFAULT_TYPE;
-    return this;
-  }
-
-  /** Shorthand for calling {@link #clear},
-   *  {@link #append(CharSequence, int, int)},
-   *  {@link #setStartOffset},
-   *  {@link #setEndOffset}
-   *  {@link #setType} on Token.DEFAULT_TYPE
-   *  @return this Token instance */
-  public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
-    clear();
-    append(newTerm, newTermOffset, newTermOffset + newTermLength);
-    startOffset = newStartOffset;
-    endOffset = newEndOffset;
-    type = DEFAULT_TYPE;
-    return this;
-  }
-
-  /**
-   * Copy the prototype token's fields into this one. Note: Payloads are shared.
-   * @param prototype
-   */
-  public void reinit(Token prototype) {
-    copyBuffer(prototype.buffer(), 0, prototype.length());
-    positionIncrement = prototype.positionIncrement;
-    flags = prototype.flags;
-    startOffset = prototype.startOffset;
-    endOffset = prototype.endOffset;
-    type = prototype.type;
-    payload =  prototype.payload;
-  }
-
-  /**
-   * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
-   * @param prototype
-   * @param newTerm
-   */
-  public void reinit(Token prototype, String newTerm) {
-    setEmpty().append(newTerm);
-    positionIncrement = prototype.positionIncrement;
-    flags = prototype.flags;
-    startOffset = prototype.startOffset;
-    endOffset = prototype.endOffset;
-    type = prototype.type;
-    payload =  prototype.payload;
-  }
-
-  /**
-   * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
-   * @param prototype
-   * @param newTermBuffer
-   * @param offset
-   * @param length
-   */
-  public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
-    copyBuffer(newTermBuffer, offset, length);
-    positionIncrement = prototype.positionIncrement;
-    flags = prototype.flags;
-    startOffset = prototype.startOffset;
-    endOffset = prototype.endOffset;
-    type = prototype.type;
-    payload =  prototype.payload;
-  }
-
-  @Override
-  public void copyTo(AttributeImpl target) {
-    if (target instanceof Token) {
-      final Token to = (Token) target;
-      to.reinit(this);
-      // reinit shares the payload, so clone it:
-      if (payload !=null) {
-        to.payload = (Payload) payload.clone();
-      }
-    } else {
-      super.copyTo(target);
-      ((OffsetAttribute) target).setOffset(startOffset, endOffset);
-      ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
-      ((PayloadAttribute) target).setPayload((payload == null) ? null : (Payload) payload.clone());
-      ((FlagsAttribute) target).setFlags(flags);
-      ((TypeAttribute) target).setType(type);
-    }
-  }
-
-  @Override
-  public void reflectWith(AttributeReflector reflector) {
-    super.reflectWith(reflector);
-    reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
-    reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
-    reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
-    reflector.reflect(PayloadAttribute.class, "payload", payload);
-    reflector.reflect(FlagsAttribute.class, "flags", flags);
-    reflector.reflect(TypeAttribute.class, "type", type);
-  }
-
-  /** Convenience factory that returns <code>Token</code> as implementation for the basic
-   * attributes and return the default impl (with &quot;Impl&quot; appended) for all other
-   * attributes.
-   * @since 3.0
-   */
-  public static final AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
-    new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
-  
-  /** <b>Expert:</b> Creates a TokenAttributeFactory returning {@link Token} as instance for the basic attributes
-   * and for all other attributes calls the given delegate factory.
-   * @since 3.0
-   */
-  public static final class TokenAttributeFactory extends AttributeSource.AttributeFactory {
-    
-    private final AttributeSource.AttributeFactory delegate;
-    
-    /** <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
-     * and for all other attributes calls the given delegate factory. */
-    public TokenAttributeFactory(AttributeSource.AttributeFactory delegate) {
-      this.delegate = delegate;
-    }
-  
-    @Override
-    public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
-      return attClass.isAssignableFrom(Token.class)
-        ? new Token() : delegate.createAttributeInstance(attClass);
-    }
-    
-    @Override
-    public boolean equals(Object other) {
-      if (this == other) return true;
-      if (other instanceof TokenAttributeFactory) {
-        final TokenAttributeFactory af = (TokenAttributeFactory) other;
-        return this.delegate.equals(af.delegate);
-      }
-      return false;
-    }
-    
-    @Override
-    public int hashCode() {
-      return delegate.hashCode() ^ 0x0a45aa31;
-    }
-  }
-
-}
Index: lucene/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/CachingTokenFilter.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/CachingTokenFilter.java	(working copy)
@@ -1,86 +0,0 @@
-package org.apache.lucene.analysis;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-
-import org.apache.lucene.util.AttributeSource;
-
-/**
- * This class can be used if the token attributes of a TokenStream
- * are intended to be consumed more than once. It caches
- * all token attribute states locally in a List.
- * 
- * <P>CachingTokenFilter implements the optional method
- * {@link TokenStream#reset()}, which repositions the
- * stream to the first Token. 
- */
-public final class CachingTokenFilter extends TokenFilter {
-  private List<AttributeSource.State> cache = null;
-  private Iterator<AttributeSource.State> iterator = null; 
-  private AttributeSource.State finalState;
-  
-  public CachingTokenFilter(TokenStream input) {
-    super(input);
-  }
-  
-  @Override
-  public final boolean incrementToken() throws IOException {
-    if (cache == null) {
-      // fill cache lazily
-      cache = new LinkedList<AttributeSource.State>();
-      fillCache();
-      iterator = cache.iterator();
-    }
-    
-    if (!iterator.hasNext()) {
-      // the cache is exhausted, return false
-      return false;
-    }
-    // Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
-    restoreState(iterator.next());
-    return true;
-  }
-  
-  @Override
-  public final void end() throws IOException {
-    if (finalState != null) {
-      restoreState(finalState);
-    }
-  }
-
-  @Override
-  public void reset() throws IOException {
-    if(cache != null) {
-      iterator = cache.iterator();
-    }
-  }
-  
-  private void fillCache() throws IOException {
-    while(input.incrementToken()) {
-      cache.add(captureState());
-    }
-    // capture final state
-    input.end();
-    finalState = captureState();
-  }
-
-}
Index: lucene/src/java/org/apache/lucene/analysis/Analyzer.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/Analyzer.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/Analyzer.java	(working copy)
@@ -1,153 +0,0 @@
-package org.apache.lucene.analysis;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-import java.io.IOException;
-import java.io.Closeable;
-import java.lang.reflect.Modifier;
-
-import org.apache.lucene.util.CloseableThreadLocal;
-import org.apache.lucene.store.AlreadyClosedException;
-
-import org.apache.lucene.document.Fieldable;
-
-/** An Analyzer builds TokenStreams, which analyze text.  It thus represents a
- *  policy for extracting index terms from text.
- *  <p>
- *  Typical implementations first build a Tokenizer, which breaks the stream of
- *  characters from the Reader into raw Tokens.  One or more TokenFilters may
- *  then be applied to the output of the Tokenizer.
- * <p>The {@code Analyzer}-API in Lucene is based on the decorator pattern.
- * Therefore all non-abstract subclasses must be final or their {@link #tokenStream}
- * and {@link #reusableTokenStream} implementations must be final! This is checked
- * when Java assertions are enabled.
- */
-public abstract class Analyzer implements Closeable {
-
-  protected Analyzer() {
-    super();
-    assert assertFinal();
-  }
-  
-  private boolean assertFinal() {
-    try {
-      final Class<?> clazz = getClass();
-      assert clazz.isAnonymousClass() ||
-        (clazz.getModifiers() & (Modifier.FINAL | Modifier.PRIVATE)) != 0 ||
-        (
-          Modifier.isFinal(clazz.getMethod("tokenStream", String.class, Reader.class).getModifiers()) &&
-          Modifier.isFinal(clazz.getMethod("reusableTokenStream", String.class, Reader.class).getModifiers())
-        ) :
-        "Analyzer implementation classes or at least their tokenStream() and reusableTokenStream() implementations must be final";
-      return true;
-    } catch (NoSuchMethodException nsme) {
-      return false;
-    }
-  }
-
-  /** Creates a TokenStream which tokenizes all the text in the provided
-   * Reader.  Must be able to handle null field name for
-   * backward compatibility.
-   */
-  public abstract TokenStream tokenStream(String fieldName, Reader reader);
-
-  /** Creates a TokenStream that is allowed to be re-used
-   *  from the previous time that the same thread called
-   *  this method.  Callers that do not need to use more
-   *  than one TokenStream at the same time from this
-   *  analyzer should use this method for better
-   *  performance.
-   */
-  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
-    return tokenStream(fieldName, reader);
-  }
-
-  private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
-
-  /** Used by Analyzers that implement reusableTokenStream
-   *  to retrieve previously saved TokenStreams for re-use
-   *  by the same thread. */
-  protected Object getPreviousTokenStream() {
-    try {
-      return tokenStreams.get();
-    } catch (NullPointerException npe) {
-      if (tokenStreams == null) {
-        throw new AlreadyClosedException("this Analyzer is closed");
-      } else {
-        throw npe;
-      }
-    }
-  }
-
-  /** Used by Analyzers that implement reusableTokenStream
-   *  to save a TokenStream for later re-use by the same
-   *  thread. */
-  protected void setPreviousTokenStream(Object obj) {
-    try {
-      tokenStreams.set(obj);
-    } catch (NullPointerException npe) {
-      if (tokenStreams == null) {
-        throw new AlreadyClosedException("this Analyzer is closed");
-      } else {
-        throw npe;
-      }
-    }
-  }
-
-  /**
-   * Invoked before indexing a Fieldable instance if
-   * terms have already been added to that field.  This allows custom
-   * analyzers to place an automatic position increment gap between
-   * Fieldable instances using the same field name.  The default value
-   * position increment gap is 0.  With a 0 position increment gap and
-   * the typical default token position increment of 1, all terms in a field,
-   * including across Fieldable instances, are in successive positions, allowing
-   * exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
-   *
-   * @param fieldName Fieldable name being indexed.
-   * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
-   */
-  public int getPositionIncrementGap(String fieldName) {
-    return 0;
-  }
-
-  /**
-   * Just like {@link #getPositionIncrementGap}, except for
-   * Token offsets instead.  By default this returns 1 for
-   * tokenized fields and, as if the fields were joined
-   * with an extra space character, and 0 for un-tokenized
-   * fields.  This method is only called if the field
-   * produced at least one token for indexing.
-   *
-   * @param field the field just indexed
-   * @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
-   */
-  public int getOffsetGap(Fieldable field) {
-    if (field.isTokenized())
-      return 1;
-    else
-      return 0;
-  }
-
-  /** Frees persistent resources used by this Analyzer */
-  public void close() {
-    tokenStreams.close();
-    tokenStreams = null;
-  }
-}
Index: lucene/src/java/org/apache/lucene/analysis/TokenFilter.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/TokenFilter.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/TokenFilter.java	(working copy)
@@ -1,56 +0,0 @@
-package org.apache.lucene.analysis;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-/** A TokenFilter is a TokenStream whose input is another TokenStream.
-  <p>
-  This is an abstract class; subclasses must override {@link #incrementToken()}.
-  @see TokenStream
-  */
-public abstract class TokenFilter extends TokenStream {
-  /** The source of tokens for this filter. */
-  protected final TokenStream input;
-
-  /** Construct a token stream filtering the given input. */
-  protected TokenFilter(TokenStream input) {
-    super(input);
-    this.input = input;
-  }
-  
-  /** Performs end-of-stream operations, if any, and calls then <code>end()</code> on the
-   * input TokenStream.<p/> 
-   * <b>NOTE:</b> Be sure to call <code>super.end()</code> first when overriding this method.*/
-  @Override
-  public void end() throws IOException {
-    input.end();
-  }
-  
-  /** Close the input TokenStream. */
-  @Override
-  public void close() throws IOException {
-    input.close();
-  }
-
-  /** Reset the filter as well as the input TokenStream. */
-  @Override
-  public void reset() throws IOException {
-    input.reset();
-  }
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java	(working copy)
@@ -1,99 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.Payload;
-import org.apache.lucene.util.AttributeImpl;
-
-/**
- * The payload of a Token. See also {@link Payload}.
- */
-public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable {
-  private Payload payload;  
-  
-  /**
-   * Initialize this attribute with no payload.
-   */
-  public PayloadAttributeImpl() {}
-  
-  /**
-   * Initialize this attribute with the given payload. 
-   */
-  public PayloadAttributeImpl(Payload payload) {
-    this.payload = payload;
-  }
-  
-  /**
-   * Returns this Token's payload.
-   */ 
-  public Payload getPayload() {
-    return this.payload;
-  }
-
-  /** 
-   * Sets this Token's payload.
-   */
-  public void setPayload(Payload payload) {
-    this.payload = payload;
-  }
-  
-  @Override
-  public void clear() {
-    payload = null;
-  }
-
-  @Override
-  public Object clone()  {
-    PayloadAttributeImpl clone = (PayloadAttributeImpl) super.clone();
-    if (payload != null) {
-      clone.payload = (Payload) payload.clone();
-    }
-    return clone;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == this) {
-      return true;
-    }
-    
-    if (other instanceof PayloadAttribute) {
-      PayloadAttributeImpl o = (PayloadAttributeImpl) other;
-      if (o.payload == null || payload == null) {
-        return o.payload == null && payload == null;
-      }
-      
-      return o.payload.equals(payload);
-    }
-    
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    return (payload == null) ? 0 : payload.hashCode();
-  }
-
-  @Override
-  public void copyTo(AttributeImpl target) {
-    PayloadAttribute t = (PayloadAttribute) target;
-    t.setPayload((payload == null) ? null : (Payload) payload.clone());
-  }  
-
-  
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermToBytesRefAttribute.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermToBytesRefAttribute.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermToBytesRefAttribute.java	(working copy)
@@ -1,77 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.Attribute;
-import org.apache.lucene.util.BytesRef;
-
-/**
- * This attribute is requested by TermsHashPerField to index the contents.
- * This attribute can be used to customize the final byte[] encoding of terms.
- * <p>
- * Consumers of this attribute call {@link #getBytesRef()} up-front, and then
- * invoke {@link #fillBytesRef()} for each term. Example:
- * <pre class="prettyprint">
- *   final TermToBytesRefAttribute termAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
- *   final BytesRef bytes = termAtt.getBytesRef();
- *
- *   while (termAtt.incrementToken() {
- *
- *     // you must call termAtt.fillBytesRef() before doing something with the bytes.
- *     // this encodes the term value (internally it might be a char[], etc) into the bytes.
- *     int hashCode = termAtt.fillBytesRef();
- *
- *     if (isInteresting(bytes)) {
- *     
- *       // because the bytes are reused by the attribute (like CharTermAttribute's char[] buffer),
- *       // you should make a copy if you need persistent access to the bytes, otherwise they will
- *       // be rewritten across calls to incrementToken()
- *
- *       doSomethingWith(new BytesRef(bytes));
- *     }
- *   }
- *   ...
- * </pre>
- * @lucene.experimental This is a very expert API, please use
- * {@link CharTermAttributeImpl} and its implementation of this method
- * for UTF-8 terms.
- */
-public interface TermToBytesRefAttribute extends Attribute {
-  /** 
-   * Updates the bytes {@link #getBytesRef()} to contain this term's
-   * final encoding, and returns its hashcode.
-   * @return the hashcode as defined by {@link BytesRef#hashCode}:
-   * <pre>
-   *  int hash = 0;
-   *  for (int i = termBytes.offset; i &lt; termBytes.offset+termBytes.length; i++) {
-   *    hash = 31*hash + termBytes.bytes[i];
-   *  }
-   * </pre>
-   * Implement this for performance reasons, if your code can calculate
-   * the hash on-the-fly. If this is not the case, just return
-   * {@code termBytes.hashCode()}.
-   */
-  public int fillBytesRef();
-  
-  /**
-   * Retrieve this attribute's BytesRef. The bytes are updated 
-   * from the current term when the consumer calls {@link #fillBytesRef()}.
-   * @return this Attributes internal BytesRef.
-   */
-  public BytesRef getBytesRef();
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java	(working copy)
@@ -1,78 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.AttributeImpl;
-
-/**
- * This attribute can be used to pass different flags down the tokenizer chain,
- * eg from one TokenFilter to another one. 
- * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
- */
-public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable {
-  private int flags = 0;
-  
-  /**
-   * <p/>
-   *
-   * Get the bitset for any bits that have been set.  This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
-   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
-   *
-   *
-   * @return The bits
-   */
-  public int getFlags() {
-    return flags;
-  }
-
-  /**
-   * @see #getFlags()
-   */
-  public void setFlags(int flags) {
-    this.flags = flags;
-  }
-  
-  @Override
-  public void clear() {
-    flags = 0;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (this == other) {
-      return true;
-    }
-    
-    if (other instanceof FlagsAttributeImpl) {
-      return ((FlagsAttributeImpl) other).flags == flags;
-    }
-    
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    return flags;
-  }
-  
-  @Override
-  public void copyTo(AttributeImpl target) {
-    FlagsAttribute t = (FlagsAttribute) target;
-    t.setFlags(flags);
-  }
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttribute.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttribute.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttribute.java	(working copy)
@@ -1,36 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.Attribute;
-
-/**
- * A Token's lexical type. The Default value is "word". 
- */
-public interface TypeAttribute extends Attribute {
-
-  /** the default type */
-  public static final String DEFAULT_TYPE = "word";
-
-  /** Returns this Token's lexical type.  Defaults to "word". */
-  public String type();
-
-  /** Set the lexical type.
-      @see #type() */
-  public void setType(String type);
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java	(working copy)
@@ -1,49 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.util.Attribute;
-
-/**
- * This attribute can be used to mark a token as a keyword. Keyword aware
- * {@link TokenStream}s can decide to modify a token based on the return value
- * of {@link #isKeyword()} if the token is modified. Stemming filters for
- * instance can use this attribute to conditionally skip a term if
- * {@link #isKeyword()} returns <code>true</code>.
- */
-public interface KeywordAttribute extends Attribute {
-
-  /**
-   * Returns <code>true</code> iff the current token is a keyword, otherwise
-   * <code>false</code>/
-   * 
-   * @return <code>true</code> iff the current token is a keyword, otherwise
-   *         <code>false</code>/
-   */
-  public boolean isKeyword();
-
-  /**
-   * Marks the current token as keyword iff set to <code>true</code>.
-   * 
-   * @param isKeyword
-   *          <code>true</code> iff the current token is a keyword, otherwise
-   *          <code>false</code>.
-   */
-  public void setKeyword(boolean isKeyword);
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java	(working copy)
@@ -1,44 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.Attribute;
-
-/**
- * The start and end character offset of a Token. 
- */
-public interface OffsetAttribute extends Attribute {
-  /** Returns this Token's starting offset, the position of the first character
-  corresponding to this token in the source text.
-
-  Note that the difference between endOffset() and startOffset() may not be
-  equal to termText.length(), as the term text may have been altered by a
-  stemmer or some other filter. */
-  public int startOffset();
-
-  
-  /** Set the starting and ending offset.
-    @see #startOffset() and #endOffset()*/
-  public void setOffset(int startOffset, int endOffset);
-  
-
-  /** Returns this Token's ending offset, one greater than the position of the
-  last character corresponding to this token in the source text. The length
-  of the token in the source text is (endOffset - startOffset). */
-  public int endOffset();
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java	(working copy)
@@ -1,268 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.nio.CharBuffer;
-
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.AttributeReflector;
-import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.UnicodeUtil;
-
-/**
- * The term text of a Token.
- */
-public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttribute, TermToBytesRefAttribute, Cloneable {
-  private static int MIN_BUFFER_SIZE = 10;
-  
-  private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
-  private int termLength = 0;
-
-  public final void copyBuffer(char[] buffer, int offset, int length) {
-    growTermBuffer(length);
-    System.arraycopy(buffer, offset, termBuffer, 0, length);
-    termLength = length;
-  }
-
-  public final char[] buffer() {
-    return termBuffer;
-  }
-  
-  public final char[] resizeBuffer(int newSize) {
-    if(termBuffer.length < newSize){
-      // Not big enough; create a new array with slight
-      // over allocation and preserve content
-      final char[] newCharBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
-      System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
-      termBuffer = newCharBuffer;
-    }
-    return termBuffer;   
-  }
-  
-  private void growTermBuffer(int newSize) {
-    if(termBuffer.length < newSize){
-      // Not big enough; create a new array with slight
-      // over allocation:
-      termBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
-    }
-  }
-
-  public final CharTermAttribute setLength(int length) {
-    if (length > termBuffer.length)
-      throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
-    termLength = length;
-    return this;
-  }
-  
-  public final CharTermAttribute setEmpty() {
-    termLength = 0;
-    return this;
-  }
-  
-  // *** TermToBytesRefAttribute interface ***
-  private BytesRef bytes = new BytesRef(MIN_BUFFER_SIZE);
-
-  // not until java 6 @Override
-  public int fillBytesRef() {
-    return UnicodeUtil.UTF16toUTF8WithHash(termBuffer, 0, termLength, bytes);
-  }
-
-  // not until java 6 @Override
-  public BytesRef getBytesRef() {
-    return bytes;
-  }
-  
-  // *** CharSequence interface ***
-  public final int length() {
-    return termLength;
-  }
-  
-  public final char charAt(int index) {
-    if (index >= termLength)
-      throw new IndexOutOfBoundsException();
-    return termBuffer[index];
-  }
-  
-  public final CharSequence subSequence(final int start, final int end) {
-    if (start > termLength || end > termLength)
-      throw new IndexOutOfBoundsException();
-    return new String(termBuffer, start, end - start);
-  }
-  
-  // *** Appendable interface ***
-
-  public final CharTermAttribute append(CharSequence csq) {
-    if (csq == null) // needed for Appendable compliance
-      return appendNull();
-    return append(csq, 0, csq.length());
-  }
-  
-  public final CharTermAttribute append(CharSequence csq, int start, int end) {
-    if (csq == null) // needed for Appendable compliance
-      csq = "null";
-    final int len = end - start, csqlen = csq.length();
-    if (len < 0 || start > csqlen || end > csqlen)
-      throw new IndexOutOfBoundsException();
-    if (len == 0)
-      return this;
-    resizeBuffer(termLength + len);
-    if (len > 4) { // only use instanceof check series for longer CSQs, else simply iterate
-      if (csq instanceof String) {
-        ((String) csq).getChars(start, end, termBuffer, termLength);
-      } else if (csq instanceof StringBuilder) {
-        ((StringBuilder) csq).getChars(start, end, termBuffer, termLength);
-      } else if (csq instanceof CharTermAttribute) {
-        System.arraycopy(((CharTermAttribute) csq).buffer(), start, termBuffer, termLength, len);
-      } else if (csq instanceof CharBuffer && ((CharBuffer) csq).hasArray()) {
-        final CharBuffer cb = (CharBuffer) csq;
-        System.arraycopy(cb.array(), cb.arrayOffset() + cb.position() + start, termBuffer, termLength, len);
-      } else if (csq instanceof StringBuffer) {
-        ((StringBuffer) csq).getChars(start, end, termBuffer, termLength);
-      } else {
-        while (start < end)
-          termBuffer[termLength++] = csq.charAt(start++);
-        // no fall-through here, as termLength is updated!
-        return this;
-      }
-      termLength += len;
-      return this;
-    } else {
-      while (start < end)
-        termBuffer[termLength++] = csq.charAt(start++);
-      return this;
-    }
-  }
-  
-  public final CharTermAttribute append(char c) {
-    resizeBuffer(termLength + 1)[termLength++] = c;
-    return this;
-  }
-  
-  // *** For performance some convenience methods in addition to CSQ's ***
-  
-  public final CharTermAttribute append(String s) {
-    if (s == null) // needed for Appendable compliance
-      return appendNull();
-    final int len = s.length();
-    s.getChars(0, len, resizeBuffer(termLength + len), termLength);
-    termLength += len;
-    return this;
-  }
-  
-  public final CharTermAttribute append(StringBuilder s) {
-    if (s == null) // needed for Appendable compliance
-      return appendNull();
-    final int len = s.length();
-    s.getChars(0, len, resizeBuffer(termLength + len), termLength);
-    termLength += len;
-    return this;
-  }
-  
-  public final CharTermAttribute append(CharTermAttribute ta) {
-    if (ta == null) // needed for Appendable compliance
-      return appendNull();
-    final int len = ta.length();
-    System.arraycopy(ta.buffer(), 0, resizeBuffer(termLength + len), termLength, len);
-    termLength += len;
-    return this;
-  }
-
-  private CharTermAttribute appendNull() {
-    resizeBuffer(termLength + 4);
-    termBuffer[termLength++] = 'n';
-    termBuffer[termLength++] = 'u';
-    termBuffer[termLength++] = 'l';
-    termBuffer[termLength++] = 'l';
-    return this;
-  }
-  
-  // *** AttributeImpl ***
-
-  @Override
-  public int hashCode() {
-    int code = termLength;
-    code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
-    return code;
-  }
-
-  @Override
-  public void clear() {
-    termLength = 0;    
-  }
-
-  @Override
-  public Object clone() {
-    CharTermAttributeImpl t = (CharTermAttributeImpl)super.clone();
-    // Do a deep clone
-    t.termBuffer = new char[this.termLength];
-    System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength);
-    t.bytes = new BytesRef(bytes);
-    return t;
-  }
-  
-  @Override
-  public boolean equals(Object other) {
-    if (other == this) {
-      return true;
-    }
-    
-    if (other instanceof CharTermAttributeImpl) {
-      final CharTermAttributeImpl o = ((CharTermAttributeImpl) other);
-      if (termLength != o.termLength)
-        return false;
-      for(int i=0;i<termLength;i++) {
-        if (termBuffer[i] != o.termBuffer[i]) {
-          return false;
-        }
-      }
-      return true;
-    }
-    
-    return false;
-  }
-
-  /** 
-   * Returns solely the term text as specified by the
-   * {@link CharSequence} interface.
-   * <p>This method changed the behavior with Lucene 3.1,
-   * before it returned a String representation of the whole
-   * term with all attributes.
-   * This affects especially the
-   * {@link org.apache.lucene.analysis.Token} subclass.
-   */
-  @Override
-  public String toString() {
-    return new String(termBuffer, 0, termLength);
-  }
-  
-  @Override
-  public void reflectWith(AttributeReflector reflector) {
-    reflector.reflect(CharTermAttribute.class, "term", toString());
-    fillBytesRef();
-    reflector.reflect(TermToBytesRefAttribute.class, "bytes", new BytesRef(bytes));
-  }
-  
-  @Override
-  public void copyTo(AttributeImpl target) {
-    CharTermAttribute t = (CharTermAttribute) target;
-    t.copyBuffer(termBuffer, 0, termLength);
-  }
-
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java	(working copy)
@@ -1,36 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.Payload;
-import org.apache.lucene.util.Attribute;
-
-/**
- * The payload of a Token. See also {@link Payload}.
- */
-public interface PayloadAttribute extends Attribute {
-  /**
-   * Returns this Token's payload.
-   */ 
-  public Payload getPayload();
-
-  /** 
-   * Sets this Token's payload.
-   */
-  public void setPayload(Payload payload);
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java	(working copy)
@@ -1,97 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.util.AttributeImpl;
-
-/** The positionIncrement determines the position of this token
- * relative to the previous Token in a {@link TokenStream}, used in phrase
- * searching.
- *
- * <p>The default value is one.
- *
- * <p>Some common uses for this are:<ul>
- *
- * <li>Set it to zero to put multiple terms in the same position.  This is
- * useful if, e.g., a word has multiple stems.  Searches for phrases
- * including either stem will match.  In this case, all but the first stem's
- * increment should be set to zero: the increment of the first instance
- * should be one.  Repeating a token with an increment of zero can also be
- * used to boost the scores of matches on that token.
- *
- * <li>Set it to values greater than one to inhibit exact phrase matches.
- * If, for example, one does not want phrases to match across removed stop
- * words, then one could build a stop word filter that removes stop words and
- * also sets the increment to the number of stop words removed before each
- * non-stop word.  Then exact phrase queries will only match when the terms
- * occur with no intervening stop words.
- *
- * </ul>
- */
-public class PositionIncrementAttributeImpl extends AttributeImpl implements PositionIncrementAttribute, Cloneable {
-  private int positionIncrement = 1;
-  
-  /** Set the position increment. The default value is one.
-   *
-   * @param positionIncrement the distance from the prior term
-   */
-  public void setPositionIncrement(int positionIncrement) {
-    if (positionIncrement < 0)
-      throw new IllegalArgumentException
-        ("Increment must be zero or greater: " + positionIncrement);
-    this.positionIncrement = positionIncrement;
-  }
-
-  /** Returns the position increment of this Token.
-   * @see #setPositionIncrement
-   */
-  public int getPositionIncrement() {
-    return positionIncrement;
-  }
-
-  @Override
-  public void clear() {
-    this.positionIncrement = 1;
-  }
-  
-  @Override
-  public boolean equals(Object other) {
-    if (other == this) {
-      return true;
-    }
-    
-    if (other instanceof PositionIncrementAttributeImpl) {
-      return positionIncrement == ((PositionIncrementAttributeImpl) other).positionIncrement;
-    }
- 
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    return positionIncrement;
-  }
-  
-  @Override
-  public void copyTo(AttributeImpl target) {
-    PositionIncrementAttribute t = (PositionIncrementAttribute) target;
-    t.setPositionIncrement(positionIncrement);
-  }  
-
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java	(working copy)
@@ -1,44 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.util.Attribute;
-
-/**
- * This attribute can be used to pass different flags down the {@link Tokenizer} chain,
- * eg from one TokenFilter to another one. 
- * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
- */
-public interface FlagsAttribute extends Attribute {
-  /**
-   * <p/>
-   *
-   * Get the bitset for any bits that have been set.  This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
-   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
-   *
-   *
-   * @return The bits
-   */
-  public int getFlags();
-
-  /**
-   * @see #getFlags()
-   */
-  public void setFlags(int flags);  
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java	(working copy)
@@ -1,76 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.AttributeImpl;
-
-/**
- * A Token's lexical type. The Default value is "word". 
- */
-public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute, Cloneable {
-  private String type;
-  
-  public TypeAttributeImpl() {
-    this(DEFAULT_TYPE); 
-  }
-  
-  public TypeAttributeImpl(String type) {
-    this.type = type;
-  }
-  
-  /** Returns this Token's lexical type.  Defaults to "word". */
-  public String type() {
-    return type;
-  }
-
-  /** Set the lexical type.
-      @see #type() */
-  public void setType(String type) {
-    this.type = type;
-  }
-
-  @Override
-  public void clear() {
-    type = DEFAULT_TYPE;    
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == this) {
-      return true;
-    }
-    
-    if (other instanceof TypeAttributeImpl) {
-      final TypeAttributeImpl o = (TypeAttributeImpl) other;
-      return (this.type == null ? o.type == null : this.type.equals(o.type));
-    }
-    
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    return (type == null) ? 0 : type.hashCode();
-  }
-  
-  @Override
-  public void copyTo(AttributeImpl target) {
-    TypeAttribute t = (TypeAttribute) target;
-    t.setType(type);
-  }
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java	(working copy)
@@ -1,91 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.Attribute;
-
-/**
- * The term text of a Token.
- */
-public interface CharTermAttribute extends Attribute, CharSequence, Appendable {
-  
-  /** Copies the contents of buffer, starting at offset for
-   *  length characters, into the termBuffer array.
-   *  @param buffer the buffer to copy
-   *  @param offset the index in the buffer of the first character to copy
-   *  @param length the number of characters to copy
-   */
-  public void copyBuffer(char[] buffer, int offset, int length);
-  
-  /** Returns the internal termBuffer character array which
-   *  you can then directly alter.  If the array is too
-   *  small for your token, use {@link
-   *  #resizeBuffer(int)} to increase it.  After
-   *  altering the buffer be sure to call {@link
-   *  #setLength} to record the number of valid
-   *  characters that were placed into the termBuffer. */
-  public char[] buffer();
-
-  /** Grows the termBuffer to at least size newSize, preserving the
-   *  existing content.
-   *  @param newSize minimum size of the new termBuffer
-   *  @return newly created termBuffer with length >= newSize
-   */
-  public char[] resizeBuffer(int newSize);
-
-  /** Set number of valid characters (length of the term) in
-   *  the termBuffer array. Use this to truncate the termBuffer
-   *  or to synchronize with external manipulation of the termBuffer.
-   *  Note: to grow the size of the array,
-   *  use {@link #resizeBuffer(int)} first.
-   *  @param length the truncated length
-   */
-  public CharTermAttribute setLength(int length);
-  
-  /** Sets the length of the termBuffer to zero.
-   * Use this method before appending contents
-   * using the {@link Appendable} interface.
-   */
-  public CharTermAttribute setEmpty();
-  
-  // the following methods are redefined to get rid of IOException declaration:
-  public CharTermAttribute append(CharSequence csq);
-  public CharTermAttribute append(CharSequence csq, int start, int end);
-  public CharTermAttribute append(char c);
-
-  /** Appends the specified {@code String} to this character sequence. 
-   * <p>The characters of the {@code String} argument are appended, in order, increasing the length of
-   * this sequence by the length of the argument. If argument is {@code null}, then the four
-   * characters {@code "null"} are appended. 
-   */
-  public CharTermAttribute append(String s);
-
-  /** Appends the specified {@code StringBuilder} to this character sequence. 
-   * <p>The characters of the {@code StringBuilder} argument are appended, in order, increasing the length of
-   * this sequence by the length of the argument. If argument is {@code null}, then the four
-   * characters {@code "null"} are appended. 
-   */
-  public CharTermAttribute append(StringBuilder sb);
-
-  /** Appends the contents of the other {@code CharTermAttribute} to this character sequence. 
-   * <p>The characters of the {@code CharTermAttribute} argument are appended, in order, increasing the length of
-   * this sequence by the length of the argument. If argument is {@code null}, then the four
-   * characters {@code "null"} are appended. 
-   */
-  public CharTermAttribute append(CharTermAttribute termAtt);
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java	(working copy)
@@ -1,82 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.util.AttributeImpl;
-
-/**
- *This attribute can be used to mark a token as a keyword. Keyword aware
- * {@link TokenStream}s can decide to modify a token based on the return value
- * of {@link #isKeyword()} if the token is modified. Stemming filters for
- * instance can use this attribute to conditionally skip a term if
- * {@link #isKeyword()} returns <code>true</code>.
- */
-public final class KeywordAttributeImpl extends AttributeImpl implements
-    KeywordAttribute {
-  private boolean keyword;
-
-  @Override
-  public void clear() {
-    keyword = false;
-  }
-
-  @Override
-  public void copyTo(AttributeImpl target) {
-    KeywordAttribute attr = (KeywordAttribute) target;
-    attr.setKeyword(keyword);
-  }
-
-  @Override
-  public int hashCode() {
-    return keyword ? 31 : 37;
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj)
-      return true;
-    if (getClass() != obj.getClass())
-      return false;
-    final KeywordAttributeImpl other = (KeywordAttributeImpl) obj;
-    return keyword == other.keyword;
-  }
-
-  /**
-   * Returns <code>true</code> iff the current token is a keyword, otherwise
-   * <code>false</code>/
-   * 
-   * @return <code>true</code> iff the current token is a keyword, otherwise
-   *         <code>false</code>/
-   */
-  public boolean isKeyword() {
-    return keyword;
-  }
-
-  /**
-   * Marks the current token as keyword iff set to <code>true</code>.
-   * 
-   * @param isKeyword
-   *          <code>true</code> iff the current token is a keyword, otherwise
-   *          <code>false</code>.
-   */
-  public void setKeyword(boolean isKeyword) {
-    keyword = isKeyword;
-  }
-
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java	(working copy)
@@ -1,88 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.AttributeImpl;
-
-/**
- * The start and end character offset of a Token. 
- */
-public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable {
-  private int startOffset;
-  private int endOffset;
-
-  /** Returns this Token's starting offset, the position of the first character
-  corresponding to this token in the source text.
-
-  Note that the difference between endOffset() and startOffset() may not be
-  equal to termText.length(), as the term text may have been altered by a
-  stemmer or some other filter. */
-  public int startOffset() {
-    return startOffset;
-  }
-
-  
-  /** Set the starting and ending offset.
-    @see #startOffset() and #endOffset()*/
-  public void setOffset(int startOffset, int endOffset) {
-    this.startOffset = startOffset;
-    this.endOffset = endOffset;
-  }
-  
-
-  /** Returns this Token's ending offset, one greater than the position of the
-  last character corresponding to this token in the source text. The length
-  of the token in the source text is (endOffset - startOffset). */
-  public int endOffset() {
-    return endOffset;
-  }
-
-
-  @Override
-  public void clear() {
-    startOffset = 0;
-    endOffset = 0;
-  }
-  
-  @Override
-  public boolean equals(Object other) {
-    if (other == this) {
-      return true;
-    }
-    
-    if (other instanceof OffsetAttributeImpl) {
-      OffsetAttributeImpl o = (OffsetAttributeImpl) other;
-      return o.startOffset == startOffset && o.endOffset == endOffset;
-    }
-    
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    int code = startOffset;
-    code = code * 31 + endOffset;
-    return code;
-  } 
-  
-  @Override
-  public void copyTo(AttributeImpl target) {
-    OffsetAttribute t = (OffsetAttribute) target;
-    t.setOffset(startOffset, endOffset);
-  }  
-}
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java	(working copy)
@@ -1,59 +0,0 @@
-package org.apache.lucene.analysis.tokenattributes;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.Attribute;
-
-/** The positionIncrement determines the position of this token
- * relative to the previous Token in a TokenStream, used in phrase
- * searching.
- *
- * <p>The default value is one.
- *
- * <p>Some common uses for this are:<ul>
- *
- * <li>Set it to zero to put multiple terms in the same position.  This is
- * useful if, e.g., a word has multiple stems.  Searches for phrases
- * including either stem will match.  In this case, all but the first stem's
- * increment should be set to zero: the increment of the first instance
- * should be one.  Repeating a token with an increment of zero can also be
- * used to boost the scores of matches on that token.
- *
- * <li>Set it to values greater than one to inhibit exact phrase matches.
- * If, for example, one does not want phrases to match across removed stop
- * words, then one could build a stop word filter that removes stop words and
- * also sets the increment to the number of stop words removed before each
- * non-stop word.  Then exact phrase queries will only match when the terms
- * occur with no intervening stop words.
- *
- * </ul>
- * 
- * @see org.apache.lucene.index.DocsAndPositionsEnum
- */
-public interface PositionIncrementAttribute extends Attribute {
-  /** Set the position increment. The default value is one.
-   *
-   * @param positionIncrement the distance from the prior term
-   */
-  public void setPositionIncrement(int positionIncrement);
-
-  /** Returns the position increment of this Token.
-   * @see #setPositionIncrement
-   */
-  public int getPositionIncrement();
-}
Index: lucene/src/java/org/apache/lucene/analysis/CharStream.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/CharStream.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/CharStream.java	(working copy)
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis;
-
-import java.io.Reader;
-
-/**
- * CharStream adds {@link #correctOffset}
- * functionality over {@link Reader}.  All Tokenizers accept a
- * CharStream instead of {@link Reader} as input, which enables
- * arbitrary character based filtering before tokenization. 
- * The {@link #correctOffset} method fixed offsets to account for
- * removal or insertion of characters, so that the offsets
- * reported in the tokens match the character offsets of the
- * original Reader.
- */
-public abstract class CharStream extends Reader {
-
-  /**
-   * Called by CharFilter(s) and Tokenizer to correct token offset.
-   *
-   * @param currentOff offset as seen in the output
-   * @return corrected offset based on the input
-   */
-  public abstract int correctOffset(int currentOff);
-}
Index: lucene/src/java/org/apache/lucene/analysis/Tokenizer.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/Tokenizer.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/Tokenizer.java	(working copy)
@@ -1,95 +0,0 @@
-package org.apache.lucene.analysis;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.AttributeSource;
-
-import java.io.Reader;
-import java.io.IOException;
-
-/** A Tokenizer is a TokenStream whose input is a Reader.
-  <p>
-  This is an abstract class; subclasses must override {@link #incrementToken()}
-  <p>
-  NOTE: Subclasses overriding {@link #incrementToken()} must
-  call {@link AttributeSource#clearAttributes()} before
-  setting attributes.
- */
-public abstract class Tokenizer extends TokenStream {
-  /** The text source for this Tokenizer. */
-  protected Reader input;
-
-  /** Construct a tokenizer with null input. */
-  protected Tokenizer() {}
-  
-  /** Construct a token stream processing the given input. */
-  protected Tokenizer(Reader input) {
-    this.input = CharReader.get(input);
-  }
-  
-  /** Construct a tokenizer with null input using the given AttributeFactory. */
-  protected Tokenizer(AttributeFactory factory) {
-    super(factory);
-  }
-
-  /** Construct a token stream processing the given input using the given AttributeFactory. */
-  protected Tokenizer(AttributeFactory factory, Reader input) {
-    super(factory);
-    this.input = CharReader.get(input);
-  }
-
-  /** Construct a token stream processing the given input using the given AttributeSource. */
-  protected Tokenizer(AttributeSource source) {
-    super(source);
-  }
-
-  /** Construct a token stream processing the given input using the given AttributeSource. */
-  protected Tokenizer(AttributeSource source, Reader input) {
-    super(source);
-    this.input = CharReader.get(input);
-  }
-  
-  /** By default, closes the input Reader. */
-  @Override
-  public void close() throws IOException {
-    if (input != null) {
-      input.close();
-      // LUCENE-2387: don't hold onto Reader after close, so
-      // GC can reclaim
-      input = null;
-    }
-  }
-  
-  /** Return the corrected offset. If {@link #input} is a {@link CharStream} subclass
-   * this method calls {@link CharStream#correctOffset}, else returns <code>currentOff</code>.
-   * @param currentOff offset as seen in the output
-   * @return corrected offset based on the input
-   * @see CharStream#correctOffset
-   */
-  protected final int correctOffset(int currentOff) {
-    return (input instanceof CharStream) ? ((CharStream) input).correctOffset(currentOff) : currentOff;
-  }
-
-  /** Expert: Reset the tokenizer to a new reader.  Typically, an
-   *  analyzer (in its reusableTokenStream method) will use
-   *  this to re-use a previously created tokenizer. */
-  public void reset(Reader input) throws IOException {
-    this.input = input;
-  }
-}
-
Index: lucene/src/java/org/apache/lucene/analysis/TokenStream.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/TokenStream.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/TokenStream.java	(working copy)
@@ -1,182 +0,0 @@
-package org.apache.lucene.analysis;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Closeable;
-import java.lang.reflect.Modifier;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.util.Attribute;
-import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.AttributeSource;
-
-/**
- * A <code>TokenStream</code> enumerates the sequence of tokens, either from
- * {@link Field}s of a {@link Document} or from query text.
- * <p>
- * This is an abstract class; concrete subclasses are:
- * <ul>
- * <li>{@link Tokenizer}, a <code>TokenStream</code> whose input is a Reader; and
- * <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
- * <code>TokenStream</code>.
- * </ul>
- * A new <code>TokenStream</code> API has been introduced with Lucene 2.9. This API
- * has moved from being {@link Token}-based to {@link Attribute}-based. While
- * {@link Token} still exists in 2.9 as a convenience class, the preferred way
- * to store the information of a {@link Token} is to use {@link AttributeImpl}s.
- * <p>
- * <code>TokenStream</code> now extends {@link AttributeSource}, which provides
- * access to all of the token {@link Attribute}s for the <code>TokenStream</code>.
- * Note that only one instance per {@link AttributeImpl} is created and reused
- * for every token. This approach reduces object creation and allows local
- * caching of references to the {@link AttributeImpl}s. See
- * {@link #incrementToken()} for further details.
- * <p>
- * <b>The workflow of the new <code>TokenStream</code> API is as follows:</b>
- * <ol>
- * <li>Instantiation of <code>TokenStream</code>/{@link TokenFilter}s which add/get
- * attributes to/from the {@link AttributeSource}.
- * <li>The consumer calls {@link TokenStream#reset()}.
- * <li>The consumer retrieves attributes from the stream and stores local
- * references to all attributes it wants to access.
- * <li>The consumer calls {@link #incrementToken()} until it returns false
- * consuming the attributes after each call.
- * <li>The consumer calls {@link #end()} so that any end-of-stream operations
- * can be performed.
- * <li>The consumer calls {@link #close()} to release any resource when finished
- * using the <code>TokenStream</code>.
- * </ol>
- * To make sure that filters and consumers know which attributes are available,
- * the attributes must be added during instantiation. Filters and consumers are
- * not required to check for availability of attributes in
- * {@link #incrementToken()}.
- * <p>
- * You can find some example code for the new API in the analysis package level
- * Javadoc.
- * <p>
- * Sometimes it is desirable to capture a current state of a <code>TokenStream</code>,
- * e.g., for buffering purposes (see {@link CachingTokenFilter},
- * TeeSinkTokenFilter). For this usecase
- * {@link AttributeSource#captureState} and {@link AttributeSource#restoreState}
- * can be used.
- * <p>The {@code TokenStream}-API in Lucene is based on the decorator pattern.
- * Therefore all non-abstract subclasses must be final or have at least a final
- * implementation of {@link #incrementToken}! This is checked when Java
- * assertions are enabled.
- */
-public abstract class TokenStream extends AttributeSource implements Closeable {
-
-  /**
-   * A TokenStream using the default attribute factory.
-   */
-  protected TokenStream() {
-    super();
-    assert assertFinal();
-  }
-  
-  /**
-   * A TokenStream that uses the same attributes as the supplied one.
-   */
-  protected TokenStream(AttributeSource input) {
-    super(input);
-    assert assertFinal();
-  }
-  
-  /**
-   * A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances.
-   */
-  protected TokenStream(AttributeFactory factory) {
-    super(factory);
-    assert assertFinal();
-  }
-  
-  private boolean assertFinal() {
-    try {
-      final Class<?> clazz = getClass();
-      assert clazz.isAnonymousClass() ||
-        (clazz.getModifiers() & (Modifier.FINAL | Modifier.PRIVATE)) != 0 ||
-        Modifier.isFinal(clazz.getMethod("incrementToken").getModifiers()) :
-        "TokenStream implementation classes or at least their incrementToken() implementation must be final";
-      return true;
-    } catch (NoSuchMethodException nsme) {
-      return false;
-    }
-  }
-  
-  /**
-   * Consumers (i.e., {@link IndexWriter}) use this method to advance the stream to
-   * the next token. Implementing classes must implement this method and update
-   * the appropriate {@link AttributeImpl}s with the attributes of the next
-   * token.
-   * <P>
-   * The producer must make no assumptions about the attributes after the method
-   * has been returned: the caller may arbitrarily change it. If the producer
-   * needs to preserve the state for subsequent calls, it can use
-   * {@link #captureState} to create a copy of the current attribute state.
-   * <p>
-   * This method is called for every token of a document, so an efficient
-   * implementation is crucial for good performance. To avoid calls to
-   * {@link #addAttribute(Class)} and {@link #getAttribute(Class)},
-   * references to all {@link AttributeImpl}s that this stream uses should be
-   * retrieved during instantiation.
-   * <p>
-   * To ensure that filters and consumers know which attributes are available,
-   * the attributes must be added during instantiation. Filters and consumers
-   * are not required to check for availability of attributes in
-   * {@link #incrementToken()}.
-   * 
-   * @return false for end of stream; true otherwise
-   */
-  public abstract boolean incrementToken() throws IOException;
-  
-  /**
-   * This method is called by the consumer after the last token has been
-   * consumed, after {@link #incrementToken()} returned <code>false</code>
-   * (using the new <code>TokenStream</code> API). Streams implementing the old API
-   * should upgrade to use this feature.
-   * <p/>
-   * This method can be used to perform any end-of-stream operations, such as
-   * setting the final offset of a stream. The final offset of a stream might
-   * differ from the offset of the last token eg in case one or more whitespaces
-   * followed after the last token, but a WhitespaceTokenizer was used.
-   * 
-   * @throws IOException
-   */
-  public void end() throws IOException {
-    // do nothing by default
-  }
-
-  /**
-   * Resets this stream to the beginning. This is an optional operation, so
-   * subclasses may or may not implement this method. {@link #reset()} is not needed for
-   * the standard indexing process. However, if the tokens of a
-   * <code>TokenStream</code> are intended to be consumed more than once, it is
-   * necessary to implement {@link #reset()}. Note that if your TokenStream
-   * caches tokens and feeds them back again after a reset, it is imperative
-   * that you clone the tokens when you store them away (on the first pass) as
-   * well as when you return them (on future passes after {@link #reset()}).
-   */
-  public void reset() throws IOException {}
-  
-  /** Releases resources associated with this stream. */
-  public void close() throws IOException {}
-  
-}
Index: lucene/src/java/org/apache/lucene/analysis/package.html
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/package.html	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/analysis/package.html	(working copy)
@@ -1,630 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<head>
-   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-<p>API and code to convert text into indexable/searchable tokens.  Covers {@link org.apache.lucene.analysis.Analyzer} and related classes.</p>
-<h2>Parsing? Tokenization? Analysis!</h2>
-<p>
-Lucene, indexing and search library, accepts only plain text input.
-<p>
-<h2>Parsing</h2>
-<p>
-Applications that build their search capabilities upon Lucene may support documents in various formats &ndash; HTML, XML, PDF, Word &ndash; just to name a few.
-Lucene does not care about the <i>Parsing</i> of these and other document formats, and it is the responsibility of the 
-application using Lucene to use an appropriate <i>Parser</i> to convert the original format into plain text before passing that plain text to Lucene.
-<p>
-<h2>Tokenization</h2>
-<p>
-Plain text passed to Lucene for indexing goes through a process generally called tokenization. Tokenization is the process
-of breaking input text into small indexing elements &ndash; tokens.
-The way input text is broken into tokens heavily influences how people will then be able to search for that text. 
-For instance, sentences beginnings and endings can be identified to provide for more accurate phrase 
-and proximity searches (though sentence identification is not provided by Lucene).
-<p>
-In some cases simply breaking the input text into tokens is not enough &ndash; a deeper <i>Analysis</i> may be needed.
-There are many post tokenization steps that can be done, including (but not limited to):
-<ul>
-  <li><a href="http://en.wikipedia.org/wiki/Stemming">Stemming</a> &ndash; 
-      Replacing of words by their stems. 
-      For instance with English stemming "bikes" is replaced by "bike"; 
-      now query "bike" can find both documents containing "bike" and those containing "bikes".
-  </li>
-  <li><a href="http://en.wikipedia.org/wiki/Stop_words">Stop Words Filtering</a> &ndash; 
-      Common words like "the", "and" and "a" rarely add any value to a search.
-      Removing them shrinks the index size and increases performance.
-      It may also reduce some "noise" and actually improve search quality.
-  </li>
-  <li><a href="http://en.wikipedia.org/wiki/Text_normalization">Text Normalization</a> &ndash; 
-      Stripping accents and other character markings can make for better searching.
-  </li>
-  <li><a href="http://en.wikipedia.org/wiki/Synonym">Synonym Expansion</a> &ndash; 
-      Adding in synonyms at the same token position as the current word can mean better 
-      matching when users search with words in the synonym set.
-  </li>
-</ul> 
-<p>
-<h2>Core Analysis</h2>
-<p>
-  The analysis package provides the mechanism to convert Strings and Readers into tokens that can be indexed by Lucene.  There
-  are three main classes in the package from which all analysis processes are derived.  These are:
-  <ul>
-    <li>{@link org.apache.lucene.analysis.Analyzer} &ndash; An Analyzer is responsible for building a {@link org.apache.lucene.analysis.TokenStream} which can be consumed
-    by the indexing and searching processes.  See below for more information on implementing your own Analyzer.</li>
-    <li>{@link org.apache.lucene.analysis.Tokenizer} &ndash; A Tokenizer is a {@link org.apache.lucene.analysis.TokenStream} and is responsible for breaking
-    up incoming text into tokens. In most cases, an Analyzer will use a Tokenizer as the first step in
-    the analysis process.</li>
-    <li>{@link org.apache.lucene.analysis.TokenFilter} &ndash; A TokenFilter is also a {@link org.apache.lucene.analysis.TokenStream} and is responsible
-    for modifying tokens that have been created by the Tokenizer.  Common modifications performed by a
-    TokenFilter are: deletion, stemming, synonym injection, and down casing.  Not all Analyzers require TokenFilters</li>
-  </ul>
-  <b>Lucene 2.9 introduces a new TokenStream API. Please see the section "New TokenStream API" below for more details.</b>
-</p>
-<h2>Hints, Tips and Traps</h2>
-<p>
-   The synergy between {@link org.apache.lucene.analysis.Analyzer} and {@link org.apache.lucene.analysis.Tokenizer}
-   is sometimes confusing. To ease on this confusion, some clarifications:
-   <ul>
-      <li>The {@link org.apache.lucene.analysis.Analyzer} is responsible for the entire task of 
-          <u>creating</u> tokens out of the input text, while the {@link org.apache.lucene.analysis.Tokenizer}
-          is only responsible for <u>breaking</u> the input text into tokens. Very likely, tokens created 
-          by the {@link org.apache.lucene.analysis.Tokenizer} would be modified or even omitted 
-          by the {@link org.apache.lucene.analysis.Analyzer} (via one or more
-          {@link org.apache.lucene.analysis.TokenFilter}s) before being returned.
-       </li>
-       <li>{@link org.apache.lucene.analysis.Tokenizer} is a {@link org.apache.lucene.analysis.TokenStream}, 
-           but {@link org.apache.lucene.analysis.Analyzer} is not.
-       </li>
-       <li>{@link org.apache.lucene.analysis.Analyzer} is "field aware", but 
-           {@link org.apache.lucene.analysis.Tokenizer} is not.
-       </li>
-   </ul>
-</p>
-<p>
-  Lucene Java provides a number of analysis capabilities, the most commonly used one being the StandardAnalyzer.  
-  Many applications will have a long and industrious life with nothing more
-  than the StandardAnalyzer.  However, there are a few other classes/packages that are worth mentioning:
-  <ol>
-    <li>PerFieldAnalyzerWrapper &ndash; Most Analyzers perform the same operation on all
-      {@link org.apache.lucene.document.Field}s.  The PerFieldAnalyzerWrapper can be used to associate a different Analyzer with different
-      {@link org.apache.lucene.document.Field}s.</li>
-    <li>The modules/analysis library located at the root of the Lucene distribution has a number of different Analyzer implementations to solve a variety
-    of different problems related to searching.  Many of the Analyzers are designed to analyze non-English languages.</li>
-    <li>There are a variety of Tokenizer and TokenFilter implementations in this package.  Take a look around, chances are someone has implemented what you need.</li>
-  </ol>
-</p>
-<p>
-  Analysis is one of the main causes of performance degradation during indexing.  Simply put, the more you analyze the slower the indexing (in most cases).
-  Perhaps your application would be just fine using the simple WhitespaceTokenizer combined with a StopFilter. The contrib/benchmark library can be useful 
-  for testing out the speed of the analysis process.
-</p>
-<h2>Invoking the Analyzer</h2>
-<p>
-  Applications usually do not invoke analysis &ndash; Lucene does it for them:
-  <ul>
-    <li>At indexing, as a consequence of 
-        {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document) addDocument(doc)},
-        the Analyzer in effect for indexing is invoked for each indexed field of the added document.
-    </li>
-    <li>At search, as a consequence of
-        {@link org.apache.lucene.queryParser.QueryParser#parse(java.lang.String) QueryParser.parse(queryText)},
-        the QueryParser may invoke the Analyzer in effect.
-        Note that for some queries analysis does not take place, e.g. wildcard queries.
-    </li>
-  </ul>
-  However an application might invoke Analysis of any text for testing or for any other purpose, something like:
-  <PRE class="prettyprint">
-      Analyzer analyzer = new StandardAnalyzer(); // or any other analyzer
-      TokenStream ts = analyzer.tokenStream("myfield",new StringReader("some text goes here"));
-      while (ts.incrementToken()) {
-        System.out.println("token: "+ts));
-      }
-  </PRE>
-</p>
-<h2>Indexing Analysis vs. Search Analysis</h2>
-<p>
-  Selecting the "correct" analyzer is crucial
-  for search quality, and can also affect indexing and search performance.
-  The "correct" analyzer differs between applications.
-  Lucene java's wiki page 
-  <a href="http://wiki.apache.org/lucene-java/AnalysisParalysis">AnalysisParalysis</a> 
-  provides some data on "analyzing your analyzer".
-  Here are some rules of thumb:
-  <ol>
-    <li>Test test test... (did we say test?)</li>
-    <li>Beware of over analysis &ndash; might hurt indexing performance.</li>
-    <li>Start with same analyzer for indexing and search, otherwise searches would not find what they are supposed to...</li>
-    <li>In some cases a different analyzer is required for indexing and search, for instance:
-        <ul>
-           <li>Certain searches require more stop words to be filtered. (I.e. more than those that were filtered at indexing.)</li>
-           <li>Query expansion by synonyms, acronyms, auto spell correction, etc.</li>
-        </ul>
-        This might sometimes require a modified analyzer &ndash; see the next section on how to do that.
-    </li>
-  </ol>
-</p>
-<h2>Implementing your own Analyzer</h2>
-<p>Creating your own Analyzer is straightforward. It usually involves either wrapping an existing Tokenizer and  set of TokenFilters to create a new Analyzer
-or creating both the Analyzer and a Tokenizer or TokenFilter.  Before pursuing this approach, you may find it worthwhile
-to explore the modules/analysis library and/or ask on the java-user@lucene.apache.org mailing list first to see if what you need already exists.
-If you are still committed to creating your own Analyzer or TokenStream derivation (Tokenizer or TokenFilter) have a look at
-the source code of any one of the many samples located in this package.
-</p>
-<p>
-  The following sections discuss some aspects of implementing your own analyzer.
-</p>
-<h3>Field Section Boundaries</h3>
-<p>
-  When {@link org.apache.lucene.document.Document#add(org.apache.lucene.document.Fieldable) document.add(field)}
-  is called multiple times for the same field name, we could say that each such call creates a new 
-  section for that field in that document. 
-  In fact, a separate call to 
-  {@link org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader) tokenStream(field,reader)}
-  would take place for each of these so called "sections".
-  However, the default Analyzer behavior is to treat all these sections as one large section. 
-  This allows phrase search and proximity search to seamlessly cross 
-  boundaries between these "sections".
-  In other words, if a certain field "f" is added like this:
-  <PRE class="prettyprint">
-      document.add(new Field("f","first ends",...);
-      document.add(new Field("f","starts two",...);
-      indexWriter.addDocument(document);
-  </PRE>
-  Then, a phrase search for "ends starts" would find that document.
-  Where desired, this behavior can be modified by introducing a "position gap" between consecutive field "sections", 
-  simply by overriding 
-  {@link org.apache.lucene.analysis.Analyzer#getPositionIncrementGap(java.lang.String) Analyzer.getPositionIncrementGap(fieldName)}:
-  <PRE class="prettyprint">
-      Analyzer myAnalyzer = new StandardAnalyzer() {
-         public int getPositionIncrementGap(String fieldName) {
-           return 10;
-         }
-      };
-  </PRE>
-</p>
-<h3>Token Position Increments</h3>
-<p>
-   By default, all tokens created by Analyzers and Tokenizers have a 
-   {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute#getPositionIncrement() position increment} of one.
-   This means that the position stored for that token in the index would be one more than
-   that of the previous token.
-   Recall that phrase and proximity searches rely on position info.
-</p>
-<p>
-   If the selected analyzer filters the stop words "is" and "the", then for a document 
-   containing the string "blue is the sky", only the tokens "blue", "sky" are indexed, 
-   with position("sky") = 1 + position("blue"). Now, a phrase query "blue is the sky"
-   would find that document, because the same analyzer filters the same stop words from
-   that query. But also the phrase query "blue sky" would find that document.
-</p>
-<p>   
-   If this behavior does not fit the application needs,
-   a modified analyzer can be used, that would increment further the positions of
-   tokens following a removed stop word, using
-   {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute#setPositionIncrement(int)}.
-   This can be done with something like:
-   <PRE class="prettyprint">
-      public TokenStream tokenStream(final String fieldName, Reader reader) {
-        final TokenStream ts = someAnalyzer.tokenStream(fieldName, reader);
-        TokenStream res = new TokenStream() {
-          CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-          PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-        
-          public boolean incrementToken() throws IOException {
-            int extraIncrement = 0;
-            while (true) {
-              boolean hasNext = ts.incrementToken();
-              if (hasNext) {
-                if (stopWords.contains(termAtt.toString())) {
-                  extraIncrement++; // filter this word
-                  continue;
-                } 
-                if (extraIncrement>0) {
-                  posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+extraIncrement);
-                }
-              }
-              return hasNext;
-            }
-          }
-        };
-        return res;
-      }
-   </PRE>
-   Now, with this modified analyzer, the phrase query "blue sky" would find that document.
-   But note that this is yet not a perfect solution, because any phrase query "blue w1 w2 sky"
-   where both w1 and w2 are stop words would match that document.
-</p>
-<p>
-   Few more use cases for modifying position increments are:
-   <ol>
-     <li>Inhibiting phrase and proximity matches in sentence boundaries &ndash; for this, a tokenizer that 
-         identifies a new sentence can add 1 to the position increment of the first token of the new sentence.</li>
-     <li>Injecting synonyms &ndash; here, synonyms of a token should be added after that token, 
-         and their position increment should be set to 0.
-         As result, all synonyms of a token would be considered to appear in exactly the 
-         same position as that token, and so would they be seen by phrase and proximity searches.</li>
-   </ol>
-</p>
-<h2>New TokenStream API</h2>
-<p>
-	With Lucene 2.9 we introduce a new TokenStream API. The old API used to produce Tokens. A Token
-	has getter and setter methods for different properties like positionIncrement and termText.
-	While this approach was sufficient for the default indexing format, it is not versatile enough for
-	Flexible Indexing, a term which summarizes the effort of making the Lucene indexer pluggable and extensible for custom
-	index formats.
-</p>
-<p>
-A fully customizable indexer means that users will be able to store custom data structures on disk. Therefore an API
-is necessary that can transport custom types of data from the documents to the indexer.
-</p>
-<h3>Attribute and AttributeSource</h3> 
-Lucene 2.9 therefore introduces a new pair of classes called {@link org.apache.lucene.util.Attribute} and
-{@link org.apache.lucene.util.AttributeSource}. An Attribute serves as a
-particular piece of information about a text token. For example, {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}
- contains the term text of a token, and {@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute} contains the start and end character offsets of a token.
-An AttributeSource is a collection of Attributes with a restriction: there may be only one instance of each attribute type. TokenStream now extends AttributeSource, which
-means that one can add Attributes to a TokenStream. Since TokenFilter extends TokenStream, all filters are also
-AttributeSources.
-<p>
-	Lucene now provides six Attributes out of the box, which replace the variables the Token class has:
-	<ul>
-	  <li>{@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}<p>The term text of a token.</p></li>
-  	  <li>{@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute}<p>The start and end offset of token in characters.</p></li>
-	  <li>{@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute}<p>See above for detailed information about position increment.</p></li>
-	  <li>{@link org.apache.lucene.analysis.tokenattributes.PayloadAttribute}<p>The payload that a Token can optionally have.</p></li>
-	  <li>{@link org.apache.lucene.analysis.tokenattributes.TypeAttribute}<p>The type of the token. Default is 'word'.</p></li>
-	  <li>{@link org.apache.lucene.analysis.tokenattributes.FlagsAttribute}<p>Optional flags a token can have.</p></li>
-	</ul>
-</p>
-<h3>Using the new TokenStream API</h3>
-There are a few important things to know in order to use the new API efficiently which are summarized here. You may want
-to walk through the example below first and come back to this section afterwards.
-<ol><li>
-Please keep in mind that an AttributeSource can only have one instance of a particular Attribute. Furthermore, if 
-a chain of a TokenStream and multiple TokenFilters is used, then all TokenFilters in that chain share the Attributes
-with the TokenStream.
-</li>
-<br>
-<li>
-Attribute instances are reused for all tokens of a document. Thus, a TokenStream/-Filter needs to update
-the appropriate Attribute(s) in incrementToken(). The consumer, commonly the Lucene indexer, consumes the data in the
-Attributes and then calls incrementToken() again until it returns false, which indicates that the end of the stream
-was reached. This means that in each call of incrementToken() a TokenStream/-Filter can safely overwrite the data in
-the Attribute instances.
-</li>
-<br>
-<li>
-For performance reasons a TokenStream/-Filter should add/get Attributes during instantiation; i.e., create an attribute in the
-constructor and store references to it in an instance variable.  Using an instance variable instead of calling addAttribute()/getAttribute() 
-in incrementToken() will avoid attribute lookups for every token in the document.
-</li>
-<br>
-<li>
-All methods in AttributeSource are idempotent, which means calling them multiple times always yields the same
-result. This is especially important to know for addAttribute(). The method takes the <b>type</b> (<code>Class</code>)
-of an Attribute as an argument and returns an <b>instance</b>. If an Attribute of the same type was previously added, then
-the already existing instance is returned, otherwise a new instance is created and returned. Therefore TokenStreams/-Filters
-can safely call addAttribute() with the same Attribute type multiple times. Even consumers of TokenStreams should
-normally call addAttribute() instead of getAttribute(), because it would not fail if the TokenStream does not have this
-Attribute (getAttribute() would throw an IllegalArgumentException, if the Attribute is missing). More advanced code
-could simply check with hasAttribute(), if a TokenStream has it, and may conditionally leave out processing for
-extra performance.
-</li></ol>
-<h3>Example</h3>
-In this example we will create a WhiteSpaceTokenizer and use a LengthFilter to suppress all words that only
-have two or less characters. The LengthFilter is part of the Lucene core and its implementation will be explained
-here to illustrate the usage of the new TokenStream API.<br>
-Then we will develop a custom Attribute, a PartOfSpeechAttribute, and add another filter to the chain which
-utilizes the new custom attribute, and call it PartOfSpeechTaggingFilter.
-<h4>Whitespace tokenization</h4>
-<pre class="prettyprint">
-public class MyAnalyzer extends Analyzer {
-
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream stream = new WhitespaceTokenizer(reader);
-    return stream;
-  }
-  
-  public static void main(String[] args) throws IOException {
-    // text to tokenize
-    final String text = "This is a demo of the new TokenStream API";
-    
-    MyAnalyzer analyzer = new MyAnalyzer();
-    TokenStream stream = analyzer.tokenStream("field", new StringReader(text));
-    
-    // get the CharTermAttribute from the TokenStream
-    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
-
-    stream.reset();
-    
-    // print all tokens until stream is exhausted
-    while (stream.incrementToken()) {
-      System.out.println(termAtt.toString());
-    }
-    
-    stream.end()
-    stream.close();
-  }
-}
-</pre>
-In this easy example a simple white space tokenization is performed. In main() a loop consumes the stream and
-prints the term text of the tokens by accessing the CharTermAttribute that the WhitespaceTokenizer provides. 
-Here is the output:
-<pre>
-This
-is
-a
-demo
-of
-the
-new
-TokenStream
-API
-</pre>
-<h4>Adding a LengthFilter</h4>
-We want to suppress all tokens that have 2 or less characters. We can do that easily by adding a LengthFilter 
-to the chain. Only the tokenStream() method in our analyzer needs to be changed:
-<pre class="prettyprint">
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream stream = new WhitespaceTokenizer(reader);
-    stream = new LengthFilter(stream, 3, Integer.MAX_VALUE);
-    return stream;
-  }
-</pre>
-Note how now only words with 3 or more characters are contained in the output:
-<pre>
-This
-demo
-the
-new
-TokenStream
-API
-</pre>
-Now let's take a look how the LengthFilter is implemented (it is part of Lucene's core):
-<pre class="prettyprint">
-public final class LengthFilter extends TokenFilter {
-
-  final int min;
-  final int max;
-  
-  private CharTermAttribute termAtt;
-
-  /**
-   * Build a filter that removes words that are too long or too
-   * short from the text.
-   */
-  public LengthFilter(TokenStream in, int min, int max)
-  {
-    super(in);
-    this.min = min;
-    this.max = max;
-    termAtt = addAttribute(CharTermAttribute.class);
-  }
-  
-  /**
-   * Returns the next input Token whose term() is the right len
-   */
-  public final boolean incrementToken() throws IOException
-  {
-    assert termAtt != null;
-    // return the first non-stop word found
-    while (input.incrementToken()) {
-      int len = termAtt.length();
-      if (len >= min && len <= max) {
-          return true;
-      }
-      // note: else we ignore it but should we index each part of it?
-    }
-    // reached EOS -- return null
-    return false;
-  }
-}
-</pre>
-The CharTermAttribute is added in the constructor and stored in the instance variable <code>termAtt</code>.
-Remember that there can only be a single instance of CharTermAttribute in the chain, so in our example the 
-<code>addAttribute()</code> call in LengthFilter returns the TermAttribute that the WhitespaceTokenizer already added. The tokens
-are retrieved from the input stream in the <code>incrementToken()</code> method. By looking at the term text
-in the CharTermAttribute the length of the term can be determined and too short or too long tokens are skipped. 
-Note how <code>incrementToken()</code> can efficiently access the instance variable; no attribute lookup
-is neccessary. The same is true for the consumer, which can simply use local references to the Attributes.
-
-<h4>Adding a custom Attribute</h4>
-Now we're going to implement our own custom Attribute for part-of-speech tagging and call it consequently 
-<code>PartOfSpeechAttribute</code>. First we need to define the interface of the new Attribute:
-<pre class="prettyprint">
-  public interface PartOfSpeechAttribute extends Attribute {
-    public static enum PartOfSpeech {
-      Noun, Verb, Adjective, Adverb, Pronoun, Preposition, Conjunction, Article, Unknown
-    }
-  
-    public void setPartOfSpeech(PartOfSpeech pos);
-  
-    public PartOfSpeech getPartOfSpeech();
-  }
-</pre>
-
-Now we also need to write the implementing class. The name of that class is important here: By default, Lucene
-checks if there is a class with the name of the Attribute with the postfix 'Impl'. In this example, we would
-consequently call the implementing class <code>PartOfSpeechAttributeImpl</code>. <br/>
-This should be the usual behavior. However, there is also an expert-API that allows changing these naming conventions:
-{@link org.apache.lucene.util.AttributeSource.AttributeFactory}. The factory accepts an Attribute interface as argument
-and returns an actual instance. You can implement your own factory if you need to change the default behavior. <br/><br/>
-
-Now here is the actual class that implements our new Attribute. Notice that the class has to extend
-{@link org.apache.lucene.util.AttributeImpl}:
-
-<pre class="prettyprint">
-public final class PartOfSpeechAttributeImpl extends AttributeImpl 
-                            implements PartOfSpeechAttribute{
-  
-  private PartOfSpeech pos = PartOfSpeech.Unknown;
-  
-  public void setPartOfSpeech(PartOfSpeech pos) {
-    this.pos = pos;
-  }
-  
-  public PartOfSpeech getPartOfSpeech() {
-    return pos;
-  }
-
-  public void clear() {
-    pos = PartOfSpeech.Unknown;
-  }
-
-  public void copyTo(AttributeImpl target) {
-    ((PartOfSpeechAttributeImpl) target).pos = pos;
-  }
-
-  public boolean equals(Object other) {
-    if (other == this) {
-      return true;
-    }
-    
-    if (other instanceof PartOfSpeechAttributeImpl) {
-      return pos == ((PartOfSpeechAttributeImpl) other).pos;
-    }
- 
-    return false;
-  }
-
-  public int hashCode() {
-    return pos.ordinal();
-  }
-}
-</pre>
-This is a simple Attribute implementation has only a single variable that stores the part-of-speech of a token. It extends the
-new <code>AttributeImpl</code> class and therefore implements its abstract methods <code>clear(), copyTo(), equals(), hashCode()</code>.
-Now we need a TokenFilter that can set this new PartOfSpeechAttribute for each token. In this example we show a very naive filter
-that tags every word with a leading upper-case letter as a 'Noun' and all other words as 'Unknown'.
-<pre class="prettyprint">
-  public static class PartOfSpeechTaggingFilter extends TokenFilter {
-    PartOfSpeechAttribute posAtt;
-    CharTermAttribute termAtt;
-    
-    protected PartOfSpeechTaggingFilter(TokenStream input) {
-      super(input);
-      posAtt = addAttribute(PartOfSpeechAttribute.class);
-      termAtt = addAttribute(CharTermAttribute.class);
-    }
-    
-    public boolean incrementToken() throws IOException {
-      if (!input.incrementToken()) {return false;}
-      posAtt.setPartOfSpeech(determinePOS(termAtt.buffer(), 0, termAtt.length()));
-      return true;
-    }
-    
-    // determine the part of speech for the given term
-    protected PartOfSpeech determinePOS(char[] term, int offset, int length) {
-      // naive implementation that tags every uppercased word as noun
-      if (length > 0 && Character.isUpperCase(term[0])) {
-        return PartOfSpeech.Noun;
-      }
-      return PartOfSpeech.Unknown;
-    }
-  }
-</pre>
-Just like the LengthFilter, this new filter accesses the attributes it needs in the constructor and
-stores references in instance variables. Notice how you only need to pass in the interface of the new
-Attribute and instantiating the correct class is automatically been taken care of.
-Now we need to add the filter to the chain:
-<pre class="prettyprint">
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream stream = new WhitespaceTokenizer(reader);
-    stream = new LengthFilter(stream, 3, Integer.MAX_VALUE);
-    stream = new PartOfSpeechTaggingFilter(stream);
-    return stream;
-  }
-</pre>
-Now let's look at the output:
-<pre>
-This
-demo
-the
-new
-TokenStream
-API
-</pre>
-Apparently it hasn't changed, which shows that adding a custom attribute to a TokenStream/Filter chain does not
-affect any existing consumers, simply because they don't know the new Attribute. Now let's change the consumer
-to make use of the new PartOfSpeechAttribute and print it out:
-<pre class="prettyprint">
-  public static void main(String[] args) throws IOException {
-    // text to tokenize
-    final String text = "This is a demo of the new TokenStream API";
-    
-    MyAnalyzer analyzer = new MyAnalyzer();
-    TokenStream stream = analyzer.tokenStream("field", new StringReader(text));
-    
-    // get the CharTermAttribute from the TokenStream
-    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
-    
-    // get the PartOfSpeechAttribute from the TokenStream
-    PartOfSpeechAttribute posAtt = stream.addAttribute(PartOfSpeechAttribute.class);
-    
-    stream.reset();
-
-    // print all tokens until stream is exhausted
-    while (stream.incrementToken()) {
-      System.out.println(termAtt.toString() + ": " + posAtt.getPartOfSpeech());
-    }
-    
-    stream.end();
-    stream.close();
-  }
-</pre>
-The change that was made is to get the PartOfSpeechAttribute from the TokenStream and print out its contents in
-the while loop that consumes the stream. Here is the new output:
-<pre>
-This: Noun
-demo: Unknown
-the: Unknown
-new: Unknown
-TokenStream: Noun
-API: Noun
-</pre>
-Each word is now followed by its assigned PartOfSpeech tag. Of course this is a naive 
-part-of-speech tagging. The word 'This' should not even be tagged as noun; it is only spelled capitalized because it
-is the first word of a sentence. Actually this is a good opportunity for an excerise. To practice the usage of the new
-API the reader could now write an Attribute and TokenFilter that can specify for each word if it was the first token
-of a sentence or not. Then the PartOfSpeechTaggingFilter can make use of this knowledge and only tag capitalized words
-as nouns if not the first word of a sentence (we know, this is still not a correct behavior, but hey, it's a good exercise). 
-As a small hint, this is how the new Attribute class could begin:
-<pre class="prettyprint">
-  public class FirstTokenOfSentenceAttributeImpl extends Attribute
-                   implements FirstTokenOfSentenceAttribute {
-    
-    private boolean firstToken;
-    
-    public void setFirstToken(boolean firstToken) {
-      this.firstToken = firstToken;
-    }
-    
-    public boolean getFirstToken() {
-      return firstToken;
-    }
-
-    public void clear() {
-      firstToken = false;
-    }
-
-  ...
-</pre>
-</body>
-</html>
Index: lucene/src/java/org/apache/lucene/LucenePackage.java
===================================================================
--- lucene/src/java/org/apache/lucene/LucenePackage.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/LucenePackage.java	(working copy)
@@ -1,29 +0,0 @@
-package org.apache.lucene;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/** Lucene's package information, including version. **/
-public final class LucenePackage {
-
-  private LucenePackage() {}                      // can't construct
-
-  /** Return Lucene's package, including version information. */
-  public static Package get() {
-    return LucenePackage.class.getPackage();
-  }
-}
Index: lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/DocInverterPerField.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/index/DocInverterPerField.java	(working copy)
@@ -185,7 +185,7 @@
           }
         }
 
-        fieldState.offset += docState.analyzer.getOffsetGap(field);
+        fieldState.offset += docState.analyzer.getOffsetGap(field.name(), field.isTokenized());
         fieldState.boost *= field.getBoost();
       }
 
Index: lucene/src/java/org/apache/lucene/index/Payload.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/Payload.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/index/Payload.java	(working copy)
@@ -1,199 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.util.ArrayUtil;
-
-/**
- *  A Payload is metadata that can be stored together with each occurrence 
- *  of a term. This metadata is stored inline in the posting list of the
- *  specific term.  
- *  <p>
- *  To store payloads in the index a {@link TokenStream} has to be used that
- *  produces payload data.
- *  <p>
- *  Use {@link DocsAndPositionsEnum#getPayload()}
- *  to retrieve the payloads from the index.<br>
- *
- */
-public class Payload implements Cloneable {
-  /** the byte array containing the payload data */
-  protected byte[] data;
-    
-  /** the offset within the byte array */
-  protected int offset;
-    
-  /** the length of the payload data */
-  protected int length;
-    
-  /** Creates an empty payload and does not allocate a byte array. */
-  public Payload() {
-    // nothing to do
-  }
-    
-  /**
-   * Creates a new payload with the the given array as data.
-   * A reference to the passed-in array is held, i. e. no 
-   * copy is made.
-   * 
-   * @param data the data of this payload
-   */
-  public Payload(byte[] data) {
-    this(data, 0, data.length);
-  }
-
-  /**
-   * Creates a new payload with the the given array as data. 
-   * A reference to the passed-in array is held, i. e. no 
-   * copy is made.
-   * 
-   * @param data the data of this payload
-   * @param offset the offset in the data byte array
-   * @param length the length of the data
-   */
-  public Payload(byte[] data, int offset, int length) {
-    if (offset < 0 || offset + length > data.length) {
-      throw new IllegalArgumentException();
-    }
-    this.data = data;
-    this.offset = offset;
-    this.length = length;
-  }
-    
-  /**
-   * Sets this payloads data. 
-   * A reference to the passed-in array is held, i. e. no 
-   * copy is made.
-   */
-  public void setData(byte[] data) {
-    setData(data, 0, data.length);
-  }
-
-  /**
-   * Sets this payloads data. 
-   * A reference to the passed-in array is held, i. e. no 
-   * copy is made.
-   */
-  public void setData(byte[] data, int offset, int length) {
-    this.data = data;
-    this.offset = offset;
-    this.length = length;
-  }
-    
-  /**
-   * Returns a reference to the underlying byte array
-   * that holds this payloads data.
-   */
-  public byte[] getData() {
-    return this.data;
-  }
-    
-  /**
-   * Returns the offset in the underlying byte array 
-   */
-  public int getOffset() {
-    return this.offset;
-  }
-    
-  /**
-   * Returns the length of the payload data. 
-   */
-  public int length() {
-    return this.length;
-  }
-    
-  /**
-   * Returns the byte at the given index.
-   */
-  public byte byteAt(int index) {
-    if (0 <= index && index < this.length) {
-      return this.data[this.offset + index];    
-    }
-    throw new ArrayIndexOutOfBoundsException(index);
-  }
-    
-  /**
-   * Allocates a new byte array, copies the payload data into it and returns it. 
-   */
-  public byte[] toByteArray() {
-    byte[] retArray = new byte[this.length];
-    System.arraycopy(this.data, this.offset, retArray, 0, this.length);
-    return retArray;
-  }
-    
-  /**
-   * Copies the payload data to a byte array.
-   * 
-   * @param target the target byte array
-   * @param targetOffset the offset in the target byte array
-   */
-  public void copyTo(byte[] target, int targetOffset) {
-    if (this.length > target.length + targetOffset) {
-      throw new ArrayIndexOutOfBoundsException();
-    }
-    System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
-  }
-
-  /**
-   * Clones this payload by creating a copy of the underlying
-   * byte array.
-   */
-  @Override
-  public Object clone() {
-    try {
-      // Start with a shallow copy of data
-      Payload clone = (Payload) super.clone();
-      // Only copy the part of data that belongs to this Payload
-      if (offset == 0 && length == data.length) {
-        // It is the whole thing, so just clone it.
-        clone.data = data.clone();
-      }
-      else {
-        // Just get the part
-        clone.data = this.toByteArray();
-        clone.offset = 0;
-      }
-      return clone;
-    } catch (CloneNotSupportedException e) {
-      throw new RuntimeException(e);  // shouldn't happen
-    }
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (obj == this)
-      return true;
-    if (obj instanceof Payload) {
-      Payload other = (Payload) obj;
-      if (length == other.length) {
-        for(int i=0;i<length;i++)
-          if (data[offset+i] != other.data[other.offset+i])
-            return false;
-        return true;
-      } else
-        return false;
-    } else
-      return false;
-  }
-
-  @Override
-  public int hashCode() {
-    return ArrayUtil.hashCode(data, offset, offset+length);
-  }
-}
Index: lucene/src/java/org/apache/lucene/index/FieldsReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/FieldsReader.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/index/FieldsReader.java	(working copy)
@@ -25,7 +25,7 @@
 import org.apache.lucene.document.FieldSelectorResult;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.document.NumericField;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
Index: lucene/src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/IndexReader.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/index/IndexReader.java	(working copy)
@@ -24,10 +24,7 @@
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.store.*;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.ReaderUtil;         // for javadocs
+import org.apache.lucene.util.*;
 
 import java.io.File;
 import java.io.FileOutputStream;
@@ -618,7 +615,7 @@
    * <p><b>NOTE</b>: Once the writer is closed, any
    * outstanding readers may continue to be used.  However,
    * if you attempt to reopen any of those readers, you'll
-   * hit an {@link AlreadyClosedException}.</p>
+   * hit an {@link org.apache.lucene.util.AlreadyClosedException}.</p>
    *
    * @return IndexReader that covers entire index plus all
    * changes made so far by this IndexWriter instance
Index: lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/DocumentsWriter.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/index/DocumentsWriter.java	(working copy)
@@ -34,7 +34,7 @@
 import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SimilarityProvider;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 
 /**
Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/IndexWriter.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/index/IndexWriter.java	(working copy)
@@ -44,17 +44,12 @@
 import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.Lock;
 import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.util.BitVector;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.Constants;
-import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.ThreadInterruptedException;
-import org.apache.lucene.util.MapBackedSet;
+import org.apache.lucene.util.*;
 
 /**
   An <code>IndexWriter</code> creates and maintains an index.
Index: lucene/src/java/org/apache/lucene/store/MMapDirectory.java
===================================================================
--- lucene/src/java/org/apache/lucene/store/MMapDirectory.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/store/MMapDirectory.java	(working copy)
@@ -31,6 +31,7 @@
 import java.security.PrivilegedActionException;
 import java.lang.reflect.Method;
 
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.util.Constants;
 
 /** File-based {@link Directory} implementation that uses
Index: lucene/src/java/org/apache/lucene/store/Directory.java
===================================================================
--- lucene/src/java/org/apache/lucene/store/Directory.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/store/Directory.java	(working copy)
@@ -22,6 +22,7 @@
 import java.io.Closeable;
 import java.util.Collection; // for javadocs
 
+import org.apache.lucene.util.AlreadyClosedException;
 import org.apache.lucene.util.IOUtils;
 
 /** A Directory is a flat list of files.  Files may be written once, when they
Index: lucene/src/java/org/apache/lucene/store/AlreadyClosedException.java
===================================================================
--- lucene/src/java/org/apache/lucene/store/AlreadyClosedException.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/store/AlreadyClosedException.java	(working copy)
@@ -1,28 +0,0 @@
-package org.apache.lucene.store;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * This exception is thrown when there is an attempt to
- * access something that has already been closed.
- */
-public class AlreadyClosedException extends IllegalStateException {
-  public AlreadyClosedException(String message) {
-    super(message);
-  }
-}
Index: lucene/src/java/org/apache/lucene/util/Attribute.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/Attribute.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/Attribute.java	(working copy)
@@ -1,24 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Base interface for attributes.
- */
-public interface Attribute {
-}
Index: lucene/src/java/org/apache/lucene/util/AttributeReflector.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/AttributeReflector.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/AttributeReflector.java	(working copy)
@@ -1,34 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * This interface is used to reflect contents of {@link AttributeSource} or {@link AttributeImpl}.
- */
-public interface AttributeReflector {
-
-  /**
-   * This method gets called for every property in an {@link AttributeImpl}/{@link AttributeSource}
-   * passing the class name of the {@link Attribute}, a key and the actual value.
-   * E.g., an invocation of {@link org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl#reflectWith}
-   * would call this method once using {@code org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class}
-   * as attribute class, {@code "term"} as key and the actual value as a String.
-   */
-  public void reflect(Class<? extends Attribute> attClass, String key, Object value);
-  
-}
Index: lucene/src/java/org/apache/lucene/util/SorterTemplate.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/SorterTemplate.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/SorterTemplate.java	(working copy)
@@ -1,212 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * This class was inspired by CGLIB, but provides a better
- * QuickSort algorithm without additional InsertionSort
- * at the end.
- * To use, subclass and override the four abstract methods
- * which compare and modify your data.
- * Allows custom swap so that two arrays can be sorted
- * at the same time.
- * @lucene.internal
- */
-public abstract class SorterTemplate {
-
-  private static final int MERGESORT_THRESHOLD = 12;
-  private static final int QUICKSORT_THRESHOLD = 7;
-
-  /** Implement this method, that swaps slots {@code i} and {@code j} in your data */
-  protected abstract void swap(int i, int j);
-  
-  /** Compares slots {@code i} and {@code j} of you data.
-   * Should be implemented like <code><em>valueOf(i)</em>.compareTo(<em>valueOf(j)</em>)</code> */
-  protected abstract int compare(int i, int j);
-
-  /** Implement this method, that stores the value of slot {@code i} as pivot value */
-  protected abstract void setPivot(int i);
-  
-  /** Implements the compare function for the previously stored pivot value.
-   * Should be implemented like <code>pivot.compareTo(<em>valueOf(j)</em>)</code> */
-  protected abstract int comparePivot(int j);
-  
-  /** Sorts via stable in-place InsertionSort algorithm
-   *(ideal for small collections which are mostly presorted). */
-  public final void insertionSort(int lo, int hi) {
-    for (int i = lo + 1 ; i <= hi; i++) {
-      for (int j = i; j > lo; j--) {
-        if (compare(j - 1, j) > 0) {
-          swap(j - 1, j);
-        } else {
-          break;
-        }
-      }
-    }
-  }
-
-  /** Sorts via in-place, but unstable, QuickSort algorithm.
-   * For small collections falls back to {@link #insertionSort(int,int)}. */
-  public final void quickSort(final int lo, final int hi) {
-    if (hi <= lo) return;
-    // from Integer's Javadocs: ceil(log2(x)) = 32 - numberOfLeadingZeros(x - 1)
-    quickSort(lo, hi, (Integer.SIZE - Integer.numberOfLeadingZeros(hi - lo)) << 1);
-  }
-  
-  private void quickSort(int lo, int hi, int maxDepth) {
-    // fall back to insertion when array has short length
-    final int diff = hi - lo;
-    if (diff <= QUICKSORT_THRESHOLD) {
-      insertionSort(lo, hi);
-      return;
-    }
-    
-    // fall back to merge sort when recursion depth gets too big
-    if (--maxDepth == 0) {
-      mergeSort(lo, hi);
-      return;
-    }
-    
-    final int mid = lo + (diff >>> 1);
-    
-    if (compare(lo, mid) > 0) {
-      swap(lo, mid);
-    }
-
-    if (compare(mid, hi) > 0) {
-      swap(mid, hi);
-      if (compare(lo, mid) > 0) {
-        swap(lo, mid);
-      }
-    }
-    
-    int left = lo + 1;
-    int right = hi - 1;
-
-    setPivot(mid);
-    for (;;) {
-      while (comparePivot(right) < 0)
-        --right;
-
-      while (left < right && comparePivot(left) >= 0)
-        ++left;
-
-      if (left < right) {
-        swap(left, right);
-        --right;
-      } else {
-        break;
-      }
-    }
-
-    quickSort(lo, left, maxDepth);
-    quickSort(left + 1, hi, maxDepth);
-  }
-  
-  /** Sorts via stable in-place MergeSort algorithm
-   * For small collections falls back to {@link #insertionSort(int,int)}. */
-  public final void mergeSort(int lo, int hi) {
-    final int diff = hi - lo;
-    if (diff <= MERGESORT_THRESHOLD) {
-      insertionSort(lo, hi);
-      return;
-    }
-    
-    final int mid = lo + (diff >>> 1);
-    
-    mergeSort(lo, mid);
-    mergeSort(mid, hi);
-    merge(lo, mid, hi, mid - lo, hi - mid);
-  }
-
-  private void merge(int lo, int pivot, int hi, int len1, int len2) {
-    if (len1 == 0 || len2 == 0) {
-      return;
-    }
-    if (len1 + len2 == 2) {
-      if (compare(pivot, lo) < 0) {
-          swap(pivot, lo);
-      }
-      return;
-    }
-    int first_cut, second_cut;
-    int len11, len22;
-    if (len1 > len2) {
-      len11 = len1 >>> 1;
-      first_cut = lo + len11;
-      second_cut = lower(pivot, hi, first_cut);
-      len22 = second_cut - pivot;
-    } else {
-      len22 = len2 >>> 1;
-      second_cut = pivot + len22;
-      first_cut = upper(lo, pivot, second_cut);
-      len11 = first_cut - lo;
-    }
-    rotate(first_cut, pivot, second_cut);
-    final int new_mid = first_cut + len22;
-    merge(lo, first_cut, new_mid, len11, len22);
-    merge(new_mid, second_cut, hi, len1 - len11, len2 - len22);
-  }
-
-  private void rotate(int lo, int mid, int hi) {
-    int lot = lo;
-    int hit = mid - 1;
-    while (lot < hit) {
-      swap(lot++, hit--);
-    }
-    lot = mid; hit = hi - 1;
-    while (lot < hit) {
-      swap(lot++, hit--);
-    }
-    lot = lo; hit = hi - 1;
-    while (lot < hit) {
-      swap(lot++, hit--);
-    }
-  }
-
-  private int lower(int lo, int hi, int val) {
-    int len = hi - lo;
-    while (len > 0) {
-      final int half = len >>> 1,
-        mid = lo + half;
-      if (compare(mid, val) < 0) {
-        lo = mid + 1;
-        len = len - half -1;
-      } else {
-        len = half;
-      }
-    }
-    return lo;
-  }
-
-  private int upper(int lo, int hi, int val) {
-    int len = hi - lo;
-    while (len > 0) {
-      final int half = len >>> 1,
-        mid = lo + half;
-      if (compare(val, mid) < 0) {
-        len = half;
-      } else {
-        lo = mid + 1;
-        len = len - half -1;
-      }
-    }
-    return lo;
-  }
-
-}
Index: lucene/src/java/org/apache/lucene/util/Constants.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/Constants.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/Constants.java	(working copy)
@@ -1,89 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.LucenePackage;
-
-/**
- * Some useful constants.
- **/
-
-public final class Constants {
-  private Constants() {}			  // can't construct
-
-  /** The value of <tt>System.getProperty("java.version")<tt>. **/
-  public static final String JAVA_VERSION = System.getProperty("java.version");
-  /** True iff this is Java version 1.1. */
-  public static final boolean JAVA_1_1 = JAVA_VERSION.startsWith("1.1.");
-  /** True iff this is Java version 1.2. */
-  public static final boolean JAVA_1_2 = JAVA_VERSION.startsWith("1.2.");
-  /** True iff this is Java version 1.3. */
-  public static final boolean JAVA_1_3 = JAVA_VERSION.startsWith("1.3.");
- 
-  /** The value of <tt>System.getProperty("os.name")<tt>. **/
-  public static final String OS_NAME = System.getProperty("os.name");
-  /** True iff running on Linux. */
-  public static final boolean LINUX = OS_NAME.startsWith("Linux");
-  /** True iff running on Windows. */
-  public static final boolean WINDOWS = OS_NAME.startsWith("Windows");
-  /** True iff running on SunOS. */
-  public static final boolean SUN_OS = OS_NAME.startsWith("SunOS");
-
-  public static final String OS_ARCH = System.getProperty("os.arch");
-  public static final String OS_VERSION = System.getProperty("os.version");
-  public static final String JAVA_VENDOR = System.getProperty("java.vendor");
-
-  // NOTE: this logic may not be correct; if you know of a
-  // more reliable approach please raise it on java-dev!
-  public static final boolean JRE_IS_64BIT;
-  static {
-    String x = System.getProperty("sun.arch.data.model");
-    if (x != null) {
-      JRE_IS_64BIT = x.indexOf("64") != -1;
-    } else {
-      if (OS_ARCH != null && OS_ARCH.indexOf("64") != -1) {
-        JRE_IS_64BIT = true;
-      } else {
-        JRE_IS_64BIT = false;
-      }
-    }
-  }
-
-  // this method prevents inlining the final version constant in compiled classes,
-  // see: http://www.javaworld.com/community/node/3400
-  private static String ident(final String s) {
-    return s.toString();
-  }
-  
-  // NOTE: we track per-segment version as a String with the "X.Y" format, e.g.
-  // "4.0", "3.1", "3.0". Therefore when we change this constant, we should keep
-  // the format.
-  public static final String LUCENE_MAIN_VERSION = ident("4.0");
-
-  public static final String LUCENE_VERSION;
-  static {
-    Package pkg = LucenePackage.get();
-    String v = (pkg == null) ? null : pkg.getImplementationVersion();
-    if (v == null) {
-      v = LUCENE_MAIN_VERSION + "-SNAPSHOT";
-    } else if (!v.startsWith(LUCENE_MAIN_VERSION)) {
-      v = LUCENE_MAIN_VERSION + "-SNAPSHOT " + v;
-    }
-    LUCENE_VERSION = ident(v);
-  }
-}
Index: lucene/src/java/org/apache/lucene/util/MemoryModel.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/MemoryModel.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/MemoryModel.java	(working copy)
@@ -1,48 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to You under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Returns primitive memory sizes for estimating RAM usage.
- * 
- */
-public abstract class MemoryModel {
-
-  /**
-   * @return size of array beyond contents
-   */
-  public abstract int getArraySize();
-
-  /**
-   * @return Class size overhead
-   */
-  public abstract int getClassSize();
-
-  /**
-   * @param clazz a primitive Class - bool, byte, char, short, long, float,
-   *        short, double, int
-   * @return the size in bytes of given primitive Class
-   */
-  public abstract int getPrimitiveSize(Class<?> clazz);
-
-  /**
-   * @return size of reference
-   */
-  public abstract int getReferenceSize();
-
-}
Index: lucene/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java	(working copy)
@@ -1,78 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.IdentityHashMap;
-import java.util.Map;
-
-/**
- * An average, best guess, MemoryModel that should work okay on most systems.
- * 
- */
-public class AverageGuessMemoryModel extends MemoryModel {
-  // best guess primitive sizes
-  private final Map<Class<?>,Integer> sizes = new IdentityHashMap<Class<?>,Integer>() {
-    {
-      put(boolean.class, Integer.valueOf(1));
-      put(byte.class, Integer.valueOf(1));
-      put(char.class, Integer.valueOf(2));
-      put(short.class, Integer.valueOf(2));
-      put(int.class, Integer.valueOf(4));
-      put(float.class, Integer.valueOf(4));
-      put(double.class, Integer.valueOf(8));
-      put(long.class, Integer.valueOf(8));
-    }
-  };
-
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.lucene.util.MemoryModel#getArraySize()
-   */
-  @Override
-  public int getArraySize() {
-    return 16;
-  }
-
-  /*
-   * (non-Javadoc)
-   * 
-   * @see org.apache.lucene.util.MemoryModel#getClassSize()
-   */
-  @Override
-  public int getClassSize() {
-    return 8;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.lucene.util.MemoryModel#getPrimitiveSize(java.lang.Class)
-   */
-  @Override
-  public int getPrimitiveSize(Class<?> clazz) {
-    return sizes.get(clazz).intValue();
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.lucene.util.MemoryModel#getReferenceSize()
-   */
-  @Override
-  public int getReferenceSize() {
-    return 4;
-  }
-
-}
Index: lucene/src/java/org/apache/lucene/util/CloseableThreadLocal.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/CloseableThreadLocal.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/CloseableThreadLocal.java	(working copy)
@@ -1,107 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Closeable;
-import java.lang.ref.WeakReference;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
-/** Java's builtin ThreadLocal has a serious flaw:
- *  it can take an arbitrarily long amount of time to
- *  dereference the things you had stored in it, even once the
- *  ThreadLocal instance itself is no longer referenced.
- *  This is because there is single, master map stored for
- *  each thread, which all ThreadLocals share, and that
- *  master map only periodically purges "stale" entries.
- *
- *  While not technically a memory leak, because eventually
- *  the memory will be reclaimed, it can take a long time
- *  and you can easily hit OutOfMemoryError because from the
- *  GC's standpoint the stale entries are not reclaimable.
- * 
- *  This class works around that, by only enrolling
- *  WeakReference values into the ThreadLocal, and
- *  separately holding a hard reference to each stored
- *  value.  When you call {@link #close}, these hard
- *  references are cleared and then GC is freely able to
- *  reclaim space by objects stored in it.
- *
- *  We can not rely on {@link ThreadLocal#remove()} as it
- *  only removes the value for the caller thread, whereas
- *  {@link #close} takes care of all
- *  threads.  You should not call {@link #close} until all
- *  threads are done using the instance.
- *
- * @lucene.internal
- */
-
-public class CloseableThreadLocal<T> implements Closeable {
-
-  private ThreadLocal<WeakReference<T>> t = new ThreadLocal<WeakReference<T>>();
-
-  private Map<Thread,T> hardRefs = new HashMap<Thread,T>();
-  
-  protected T initialValue() {
-    return null;
-  }
-  
-  public T get() {
-    WeakReference<T> weakRef = t.get();
-    if (weakRef == null) {
-      T iv = initialValue();
-      if (iv != null) {
-        set(iv);
-        return iv;
-      } else
-        return null;
-    } else {
-      return weakRef.get();
-    }
-  }
-
-  public void set(T object) {
-
-    t.set(new WeakReference<T>(object));
-
-    synchronized(hardRefs) {
-      hardRefs.put(Thread.currentThread(), object);
-
-      // Purge dead threads
-      for (Iterator<Thread> it = hardRefs.keySet().iterator(); it.hasNext();) {
-        final Thread t = it.next();
-        if (!t.isAlive())
-          it.remove();
-      }
-    }
-  }
-
-  public void close() {
-    // Clear the hard refs; then, the only remaining refs to
-    // all values we were storing are weak (unless somewhere
-    // else is still using them) and so GC may reclaim them:
-    hardRefs = null;
-    // Take care of the current thread right now; others will be
-    // taken care of via the WeakReferences.
-    if (t != null) {
-      t.remove();
-    }
-    t = null;
-  }
-}
Index: lucene/src/java/org/apache/lucene/util/CharsRef.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/CharsRef.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/CharsRef.java	(working copy)
@@ -1,215 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Represents char[], as a slice (offset + length) into an existing char[].
- * 
- * @lucene.internal
- */
-public final class CharsRef implements Comparable<CharsRef>, CharSequence {
-  private static final char[] EMPTY_ARRAY = new char[0];
-  public char[] chars;
-  public int offset;
-  public int length;
-
-  /**
-   * Creates a new {@link CharsRef} initialized an empty array zero-length
-   */
-  public CharsRef() {
-    this(EMPTY_ARRAY, 0, 0);
-  }
-
-  /**
-   * Creates a new {@link CharsRef} initialized with an array of the given
-   * capacity
-   */
-  public CharsRef(int capacity) {
-    chars = new char[capacity];
-  }
-
-  /**
-   * Creates a new {@link CharsRef} initialized with the given array, offset and
-   * length
-   */
-  public CharsRef(char[] chars, int offset, int length) {
-    assert chars != null;
-    assert chars.length >= offset + length;
-    this.chars = chars;
-    this.offset = offset;
-    this.length = length;
-  }
-
-  /**
-   * Creates a new {@link CharsRef} initialized with the given Strings character
-   * array
-   */
-  public CharsRef(String string) {
-    this.chars = string.toCharArray();
-    this.offset = 0;
-    this.length = chars.length;
-  }
-
-  /**
-   * Creates a new {@link CharsRef} and copies the contents of the source into
-   * the new instance.
-   * @see #copy(CharsRef)
-   */
-  public CharsRef(CharsRef other) {
-    copy(other);
-  }
-
-  @Override
-  public Object clone() {
-    return new CharsRef(this);
-  }
-
-  @Override
-  public int hashCode() {
-    final int prime = 31;
-    int result = 0;
-    final int end = offset + length;
-    for (int i = offset; i < end; i++) {
-      result = prime * result + chars[i];
-    }
-    return result;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (this == other) {
-      return true;
-    }
-
-    if (other instanceof CharsRef) {
-      return charsEquals((CharsRef) other);
-    }
-
-    if (other instanceof CharSequence) {
-      final CharSequence seq = (CharSequence) other;
-      if (length == seq.length()) {
-        int n = length;
-        int i = offset;
-        int j = 0;
-        while (n-- != 0) {
-          if (chars[i++] != seq.charAt(j++))
-            return false;
-        }
-        return true;
-      }
-    }
-    return false;
-  }
-
-  public boolean charsEquals(CharsRef other) {
-    if (length == other.length) {
-      int otherUpto = other.offset;
-      final char[] otherChars = other.chars;
-      final int end = offset + length;
-      for (int upto = offset; upto < end; upto++, otherUpto++) {
-        if (chars[upto] != otherChars[otherUpto]) {
-          return false;
-        }
-      }
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  /** Signed int order comparison */
-  public int compareTo(CharsRef other) {
-    if (this == other)
-      return 0;
-
-    final char[] aChars = this.chars;
-    int aUpto = this.offset;
-    final char[] bChars = other.chars;
-    int bUpto = other.offset;
-
-    final int aStop = aUpto + Math.min(this.length, other.length);
-
-    while (aUpto < aStop) {
-      int aInt = aChars[aUpto++];
-      int bInt = bChars[bUpto++];
-      if (aInt > bInt) {
-        return 1;
-      } else if (aInt < bInt) {
-        return -1;
-      }
-    }
-
-    // One is a prefix of the other, or, they are equal:
-    return this.length - other.length;
-  }
-  
-  /**
-   * Copies the given {@link CharsRef} referenced content into this instance
-   * starting at offset 0.
-   * 
-   * @param other
-   *          the {@link CharsRef} to copy
-   */
-  public void copy(CharsRef other) {
-    chars = ArrayUtil.grow(chars, other.length);
-    System.arraycopy(other.chars, other.offset, chars, 0, other.length);
-    length = other.length;
-    offset = 0;
-  }
-
-  public void grow(int newLength) {
-    if (chars.length < newLength) {
-      chars = ArrayUtil.grow(chars, newLength);
-    }
-  }
-
-  /**
-   * Copies the given array into this CharsRef starting at offset 0
-   */
-  public void copy(char[] otherChars, int otherOffset, int otherLength) {
-    this.offset = 0;
-    append(otherChars, otherOffset, otherLength);
-  }
-
-  /**
-   * Appends the given array to this CharsRef starting at the current offset
-   */
-  public void append(char[] otherChars, int otherOffset, int otherLength) {
-    grow(this.offset + otherLength);
-    System.arraycopy(otherChars, otherOffset, this.chars, this.offset,
-        otherLength);
-    this.length = otherLength;
-  }
-
-  @Override
-  public String toString() {
-    return new String(chars, offset, length);
-  }
-
-  public int length() {
-    return length;
-  }
-
-  public char charAt(int index) {
-    return chars[offset + index];
-  }
-
-  public CharSequence subSequence(int start, int end) {
-    return new CharsRef(chars, offset + start, offset + end - 1);
-  }
-}
\ No newline at end of file
Index: lucene/src/java/org/apache/lucene/util/IntsRef.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/IntsRef.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/IntsRef.java	(working copy)
@@ -1,140 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/** Represents int[], as a slice (offset + length) into an
- *  existing int[].
- *
- *  @lucene.internal */
-public final class IntsRef implements Comparable<IntsRef> {
-
-  public int[] ints;
-  public int offset;
-  public int length;
-
-  public IntsRef() {
-  }
-
-  public IntsRef(int capacity) {
-    ints = new int[capacity];
-  }
-
-  public IntsRef(int[] ints, int offset, int length) {
-    this.ints = ints;
-    this.offset = offset;
-    this.length = length;
-  }
-
-  public IntsRef(IntsRef other) {
-    copy(other);
-  }
-
-  @Override
-  public Object clone() {
-    return new IntsRef(this);
-  }
-
-  @Override
-  public int hashCode() {
-    final int prime = 31;
-    int result = 0;
-    final int end = offset + length;
-    for(int i = offset; i < end; i++) {
-      result = prime * result + ints[i];
-    }
-    return result;
-  }
-  
-  @Override
-  public boolean equals(Object other) {
-    return this.intsEquals((IntsRef) other);
-  }
-
-  public boolean intsEquals(IntsRef other) {
-    if (length == other.length) {
-      int otherUpto = other.offset;
-      final int[] otherInts = other.ints;
-      final int end = offset + length;
-      for(int upto=offset;upto<end;upto++,otherUpto++) {
-        if (ints[upto] != otherInts[otherUpto]) {
-          return false;
-        }
-      }
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  /** Signed int order comparison */
-  public int compareTo(IntsRef other) {
-    if (this == other) return 0;
-
-    final int[] aInts = this.ints;
-    int aUpto = this.offset;
-    final int[] bInts = other.ints;
-    int bUpto = other.offset;
-
-    final int aStop = aUpto + Math.min(this.length, other.length);
-
-    while(aUpto < aStop) {
-      int aInt = aInts[aUpto++];
-      int bInt = bInts[bUpto++];
-      if (aInt > bInt) {
-        return 1;
-      } else if (aInt < bInt) {
-        return -1;
-      }
-    }
-
-    // One is a prefix of the other, or, they are equal:
-    return this.length - other.length;
-  }
-
-  public void copy(IntsRef other) {
-    if (ints == null) {
-      ints = new int[other.length];
-    } else {
-      ints = ArrayUtil.grow(ints, other.length);
-    }
-    System.arraycopy(other.ints, other.offset, ints, 0, other.length);
-    length = other.length;
-    offset = 0;
-  }
-
-  public void grow(int newLength) {
-    if (ints.length < newLength) {
-      ints = ArrayUtil.grow(ints, newLength);
-    }
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append('[');
-    final int end = offset + length;
-    for(int i=offset;i<end;i++) {
-      if (i > offset) {
-        sb.append(' ');
-      }
-      sb.append(Integer.toHexString(ints[i]));
-    }
-    sb.append(']');
-    return sb.toString();
-  }
-}
Index: lucene/src/java/org/apache/lucene/util/ArrayUtil.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/ArrayUtil.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/ArrayUtil.java	(working copy)
@@ -1,677 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Collection;
-import java.util.Comparator;
-
-/**
- * Methods for manipulating arrays.
- *
- * @lucene.internal
- */
-
-public final class ArrayUtil {
-
-  private ArrayUtil() {} // no instance
-
-  /*
-     Begin Apache Harmony code
-
-     Revision taken on Friday, June 12. https://svn.apache.org/repos/asf/harmony/enhanced/classlib/archive/java6/modules/luni/src/main/java/java/lang/Integer.java
-
-   */
-
-  /**
-   * Parses the string argument as if it was an int value and returns the
-   * result. Throws NumberFormatException if the string does not represent an
-   * int quantity.
-   *
-   * @param chars a string representation of an int quantity.
-   * @return int the value represented by the argument
-   * @throws NumberFormatException if the argument could not be parsed as an int quantity.
-   */
-  public static int parseInt(char[] chars) throws NumberFormatException {
-    return parseInt(chars, 0, chars.length, 10);
-  }
-
-  /**
-   * Parses a char array into an int.
-   * @param chars the character array
-   * @param offset The offset into the array
-   * @param len The length
-   * @return the int
-   * @throws NumberFormatException if it can't parse
-   */
-  public static int parseInt(char[] chars, int offset, int len) throws NumberFormatException {
-    return parseInt(chars, offset, len, 10);
-  }
-
-  /**
-   * Parses the string argument as if it was an int value and returns the
-   * result. Throws NumberFormatException if the string does not represent an
-   * int quantity. The second argument specifies the radix to use when parsing
-   * the value.
-   *
-   * @param chars a string representation of an int quantity.
-   * @param radix the base to use for conversion.
-   * @return int the value represented by the argument
-   * @throws NumberFormatException if the argument could not be parsed as an int quantity.
-   */
-  public static int parseInt(char[] chars, int offset, int len, int radix)
-          throws NumberFormatException {
-    if (chars == null || radix < Character.MIN_RADIX
-            || radix > Character.MAX_RADIX) {
-      throw new NumberFormatException();
-    }
-    int  i = 0;
-    if (len == 0) {
-      throw new NumberFormatException("chars length is 0");
-    }
-    boolean negative = chars[offset + i] == '-';
-    if (negative && ++i == len) {
-      throw new NumberFormatException("can't convert to an int");
-    }
-    if (negative == true){
-      offset++;
-      len--;
-    }
-    return parse(chars, offset, len, radix, negative);
-  }
-
-
-  private static int parse(char[] chars, int offset, int len, int radix,
-                           boolean negative) throws NumberFormatException {
-    int max = Integer.MIN_VALUE / radix;
-    int result = 0;
-    for (int i = 0; i < len; i++){
-      int digit = Character.digit(chars[i + offset], radix);
-      if (digit == -1) {
-        throw new NumberFormatException("Unable to parse");
-      }
-      if (max > result) {
-        throw new NumberFormatException("Unable to parse");
-      }
-      int next = result * radix - digit;
-      if (next > result) {
-        throw new NumberFormatException("Unable to parse");
-      }
-      result = next;
-    }
-    /*while (offset < len) {
-
-    }*/
-    if (!negative) {
-      result = -result;
-      if (result < 0) {
-        throw new NumberFormatException("Unable to parse");
-      }
-    }
-    return result;
-  }
-
-
-  /*
-
- END APACHE HARMONY CODE
-  */
-
-  /** Returns an array size >= minTargetSize, generally
-   *  over-allocating exponentially to achieve amortized
-   *  linear-time cost as the array grows.
-   *
-   *  NOTE: this was originally borrowed from Python 2.4.2
-   *  listobject.c sources (attribution in LICENSE.txt), but
-   *  has now been substantially changed based on
-   *  discussions from java-dev thread with subject "Dynamic
-   *  array reallocation algorithms", started on Jan 12
-   *  2010.
-   *
-   * @param minTargetSize Minimum required value to be returned.
-   * @param bytesPerElement Bytes used by each element of
-   * the array.  See constants in {@link RamUsageEstimator}.
-   *
-   * @lucene.internal
-   */
-
-  public static int oversize(int minTargetSize, int bytesPerElement) {
-
-    if (minTargetSize < 0) {
-      // catch usage that accidentally overflows int
-      throw new IllegalArgumentException("invalid array size " + minTargetSize);
-    }
-
-    if (minTargetSize == 0) {
-      // wait until at least one element is requested
-      return 0;
-    }
-
-    // asymptotic exponential growth by 1/8th, favors
-    // spending a bit more CPU to not tie up too much wasted
-    // RAM:
-    int extra = minTargetSize >> 3;
-
-    if (extra < 3) {
-      // for very small arrays, where constant overhead of
-      // realloc is presumably relatively high, we grow
-      // faster
-      extra = 3;
-    }
-
-    int newSize = minTargetSize + extra;
-
-    // add 7 to allow for worst case byte alignment addition below:
-    if (newSize+7 < 0) {
-      // int overflowed -- return max allowed array size
-      return Integer.MAX_VALUE;
-    }
-
-    if (Constants.JRE_IS_64BIT) {
-      // round up to 8 byte alignment in 64bit env
-      switch(bytesPerElement) {
-      case 4:
-        // round up to multiple of 2
-        return (newSize + 1) & 0x7ffffffe;
-      case 2:
-        // round up to multiple of 4
-        return (newSize + 3) & 0x7ffffffc;
-      case 1:
-        // round up to multiple of 8
-        return (newSize + 7) & 0x7ffffff8;
-      case 8:
-        // no rounding
-      default:
-        // odd (invalid?) size
-        return newSize;
-      }
-    } else {
-      // round up to 4 byte alignment in 64bit env
-      switch(bytesPerElement) {
-      case 2:
-        // round up to multiple of 2
-        return (newSize + 1) & 0x7ffffffe;
-      case 1:
-        // round up to multiple of 4
-        return (newSize + 3) & 0x7ffffffc;
-      case 4:
-      case 8:
-        // no rounding
-      default:
-        // odd (invalid?) size
-        return newSize;
-      }
-    }
-  }
-
-  public static int getShrinkSize(int currentSize, int targetSize, int bytesPerElement) {
-    final int newSize = oversize(targetSize, bytesPerElement);
-    // Only reallocate if we are "substantially" smaller.
-    // This saves us from "running hot" (constantly making a
-    // bit bigger then a bit smaller, over and over):
-    if (newSize < currentSize / 2)
-      return newSize;
-    else
-      return currentSize;
-  }
-
-  public static short[] grow(short[] array, int minSize) {
-    if (array.length < minSize) {
-      short[] newArray = new short[oversize(minSize, RamUsageEstimator.NUM_BYTES_SHORT)];
-      System.arraycopy(array, 0, newArray, 0, array.length);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static short[] grow(short[] array) {
-    return grow(array, 1 + array.length);
-  }
-  
-  public static float[] grow(float[] array, int minSize) {
-    if (array.length < minSize) {
-      float[] newArray = new float[oversize(minSize, RamUsageEstimator.NUM_BYTES_FLOAT)];
-      System.arraycopy(array, 0, newArray, 0, array.length);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static float[] grow(float[] array) {
-    return grow(array, 1 + array.length);
-  }
-
-  public static short[] shrink(short[] array, int targetSize) {
-    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_SHORT);
-    if (newSize != array.length) {
-      short[] newArray = new short[newSize];
-      System.arraycopy(array, 0, newArray, 0, newSize);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static int[] grow(int[] array, int minSize) {
-    if (array.length < minSize) {
-      int[] newArray = new int[oversize(minSize, RamUsageEstimator.NUM_BYTES_INT)];
-      System.arraycopy(array, 0, newArray, 0, array.length);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static int[] grow(int[] array) {
-    return grow(array, 1 + array.length);
-  }
-
-  public static int[] shrink(int[] array, int targetSize) {
-    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_INT);
-    if (newSize != array.length) {
-      int[] newArray = new int[newSize];
-      System.arraycopy(array, 0, newArray, 0, newSize);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static long[] grow(long[] array, int minSize) {
-    if (array.length < minSize) {
-      long[] newArray = new long[oversize(minSize, RamUsageEstimator.NUM_BYTES_LONG)];
-      System.arraycopy(array, 0, newArray, 0, array.length);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static long[] grow(long[] array) {
-    return grow(array, 1 + array.length);
-  }
-
-  public static long[] shrink(long[] array, int targetSize) {
-    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_LONG);
-    if (newSize != array.length) {
-      long[] newArray = new long[newSize];
-      System.arraycopy(array, 0, newArray, 0, newSize);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static byte[] grow(byte[] array, int minSize) {
-    if (array.length < minSize) {
-      byte[] newArray = new byte[oversize(minSize, 1)];
-      System.arraycopy(array, 0, newArray, 0, array.length);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static byte[] grow(byte[] array) {
-    return grow(array, 1 + array.length);
-  }
-
-  public static byte[] shrink(byte[] array, int targetSize) {
-    final int newSize = getShrinkSize(array.length, targetSize, 1);
-    if (newSize != array.length) {
-      byte[] newArray = new byte[newSize];
-      System.arraycopy(array, 0, newArray, 0, newSize);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static boolean[] grow(boolean[] array, int minSize) {
-    if (array.length < minSize) {
-      boolean[] newArray = new boolean[oversize(minSize, 1)];
-      System.arraycopy(array, 0, newArray, 0, array.length);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static boolean[] grow(boolean[] array) {
-    return grow(array, 1 + array.length);
-  }
-
-  public static boolean[] shrink(boolean[] array, int targetSize) {
-    final int newSize = getShrinkSize(array.length, targetSize, 1);
-    if (newSize != array.length) {
-      boolean[] newArray = new boolean[newSize];
-      System.arraycopy(array, 0, newArray, 0, newSize);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static char[] grow(char[] array, int minSize) {
-    if (array.length < minSize) {
-      char[] newArray = new char[oversize(minSize, RamUsageEstimator.NUM_BYTES_CHAR)];
-      System.arraycopy(array, 0, newArray, 0, array.length);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static char[] grow(char[] array) {
-    return grow(array, 1 + array.length);
-  }
-
-  public static char[] shrink(char[] array, int targetSize) {
-    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_CHAR);
-    if (newSize != array.length) {
-      char[] newArray = new char[newSize];
-      System.arraycopy(array, 0, newArray, 0, newSize);
-      return newArray;
-    } else
-      return array;
-  }
-
-  /**
-   * Returns hash of chars in range start (inclusive) to
-   * end (inclusive)
-   */
-  public static int hashCode(char[] array, int start, int end) {
-    int code = 0;
-    for (int i = end - 1; i >= start; i--)
-      code = code * 31 + array[i];
-    return code;
-  }
-
-  /**
-   * Returns hash of bytes in range start (inclusive) to
-   * end (inclusive)
-   */
-  public static int hashCode(byte[] array, int start, int end) {
-    int code = 0;
-    for (int i = end - 1; i >= start; i--)
-      code = code * 31 + array[i];
-    return code;
-  }
-
-
-  // Since Arrays.equals doesn't implement offsets for equals
-  /**
-   * See if two array slices are the same.
-   *
-   * @param left        The left array to compare
-   * @param offsetLeft  The offset into the array.  Must be positive
-   * @param right       The right array to compare
-   * @param offsetRight the offset into the right array.  Must be positive
-   * @param length      The length of the section of the array to compare
-   * @return true if the two arrays, starting at their respective offsets, are equal
-   * 
-   * @see java.util.Arrays#equals(char[], char[])
-   */
-  public static boolean equals(char[] left, int offsetLeft, char[] right, int offsetRight, int length) {
-    if ((offsetLeft + length <= left.length) && (offsetRight + length <= right.length)) {
-      for (int i = 0; i < length; i++) {
-        if (left[offsetLeft + i] != right[offsetRight + i]) {
-          return false;
-        }
-
-      }
-      return true;
-    }
-    return false;
-  }
-
-  /* DISABLE THIS FOR NOW: This has performance problems until Java creates intrinsics for Class#getComponentType() and Array.newInstance()
-  public static <T> T[] grow(T[] array, int minSize) {
-    if (array.length < minSize) {
-      @SuppressWarnings("unchecked") final T[] newArray =
-        (T[]) Array.newInstance(array.getClass().getComponentType(), oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
-      System.arraycopy(array, 0, newArray, 0, array.length);
-      return newArray;
-    } else
-      return array;
-  }
-
-  public static <T> T[] grow(T[] array) {
-    return grow(array, 1 + array.length);
-  }
-
-  public static <T> T[] shrink(T[] array, int targetSize) {
-    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
-    if (newSize != array.length) {
-      @SuppressWarnings("unchecked") final T[] newArray =
-        (T[]) Array.newInstance(array.getClass().getComponentType(), newSize);
-      System.arraycopy(array, 0, newArray, 0, newSize);
-      return newArray;
-    } else
-      return array;
-  }
-  */
-
-  // Since Arrays.equals doesn't implement offsets for equals
-  /**
-   * See if two array slices are the same.
-   *
-   * @param left        The left array to compare
-   * @param offsetLeft  The offset into the array.  Must be positive
-   * @param right       The right array to compare
-   * @param offsetRight the offset into the right array.  Must be positive
-   * @param length      The length of the section of the array to compare
-   * @return true if the two arrays, starting at their respective offsets, are equal
-   * 
-   * @see java.util.Arrays#equals(char[], char[])
-   */
-  public static boolean equals(int[] left, int offsetLeft, int[] right, int offsetRight, int length) {
-    if ((offsetLeft + length <= left.length) && (offsetRight + length <= right.length)) {
-      for (int i = 0; i < length; i++) {
-        if (left[offsetLeft + i] != right[offsetRight + i]) {
-          return false;
-        }
-
-      }
-      return true;
-    }
-    return false;
-  }
-
-  public static int[] toIntArray(Collection<Integer> ints) {
-
-    final int[] result = new int[ints.size()];
-    int upto = 0;
-    for(int v : ints) {
-      result[upto++] = v;
-    }
-
-    // paranoia:
-    assert upto == result.length;
-
-    return result;
-  }
-  
-  /** SorterTemplate with custom {@link Comparator} */
-  private static <T> SorterTemplate getSorter(final T[] a, final Comparator<? super T> comp) {
-    return new SorterTemplate() {
-      @Override
-      protected void swap(int i, int j) {
-        final T o = a[i];
-        a[i] = a[j];
-        a[j] = o;
-      }
-      
-      @Override
-      protected int compare(int i, int j) {
-        return comp.compare(a[i], a[j]);
-      }
-
-      @Override
-      protected void setPivot(int i) {
-        pivot = a[i];
-      }
-  
-      @Override
-      protected int comparePivot(int j) {
-        return comp.compare(pivot, a[j]);
-      }
-      
-      private T pivot;
-    };
-  }
-  
-  /** Natural SorterTemplate */
-  private static <T extends Comparable<? super T>> SorterTemplate getSorter(final T[] a) {
-    return new SorterTemplate() {
-      @Override
-      protected void swap(int i, int j) {
-        final T o = a[i];
-        a[i] = a[j];
-        a[j] = o;
-      }
-      
-      @Override
-      protected int compare(int i, int j) {
-        return a[i].compareTo(a[j]);
-      }
-
-      @Override
-      protected void setPivot(int i) {
-        pivot = a[i];
-      }
-  
-      @Override
-      protected int comparePivot(int j) {
-        return pivot.compareTo(a[j]);
-      }
-      
-      private T pivot;
-    };
-  }
-
-  // quickSorts (endindex is exclusive!):
-  
-  /**
-   * Sorts the given array slice using the {@link Comparator}. This method uses the quick sort
-   * algorithm, but falls back to insertion sort for small arrays.
-   * @param fromIndex start index (inclusive)
-   * @param toIndex end index (exclusive)
-   */
-  public static <T> void quickSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
-    if (toIndex-fromIndex <= 1) return;
-    getSorter(a, comp).quickSort(fromIndex, toIndex-1);
-  }
-  
-  /**
-   * Sorts the given array using the {@link Comparator}. This method uses the quick sort
-   * algorithm, but falls back to insertion sort for small arrays.
-   */
-  public static <T> void quickSort(T[] a, Comparator<? super T> comp) {
-    quickSort(a, 0, a.length, comp);
-  }
-  
-  /**
-   * Sorts the given array slice in natural order. This method uses the quick sort
-   * algorithm, but falls back to insertion sort for small arrays.
-   * @param fromIndex start index (inclusive)
-   * @param toIndex end index (exclusive)
-   */
-  public static <T extends Comparable<? super T>> void quickSort(T[] a, int fromIndex, int toIndex) {
-    if (toIndex-fromIndex <= 1) return;
-    getSorter(a).quickSort(fromIndex, toIndex-1);
-  }
-  
-  /**
-   * Sorts the given array in natural order. This method uses the quick sort
-   * algorithm, but falls back to insertion sort for small arrays.
-   */
-  public static <T extends Comparable<? super T>> void quickSort(T[] a) {
-    quickSort(a, 0, a.length);
-  }
-
-  // mergeSorts:
-  
-  /**
-   * Sorts the given array slice using the {@link Comparator}. This method uses the merge sort
-   * algorithm, but falls back to insertion sort for small arrays.
-   * @param fromIndex start index (inclusive)
-   * @param toIndex end index (exclusive)
-   */
-  public static <T> void mergeSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
-    if (toIndex-fromIndex <= 1) return;
-    getSorter(a, comp).mergeSort(fromIndex, toIndex-1);
-  }
-  
-  /**
-   * Sorts the given array using the {@link Comparator}. This method uses the merge sort
-   * algorithm, but falls back to insertion sort for small arrays.
-   */
-  public static <T> void mergeSort(T[] a, Comparator<? super T> comp) {
-    mergeSort(a, 0, a.length, comp);
-  }
-  
-  /**
-   * Sorts the given array slice in natural order. This method uses the merge sort
-   * algorithm, but falls back to insertion sort for small arrays.
-   * @param fromIndex start index (inclusive)
-   * @param toIndex end index (exclusive)
-   */
-  public static <T extends Comparable<? super T>> void mergeSort(T[] a, int fromIndex, int toIndex) {
-    if (toIndex-fromIndex <= 1) return;
-    getSorter(a).mergeSort(fromIndex, toIndex-1);
-  }
-  
-  /**
-   * Sorts the given array in natural order. This method uses the merge sort
-   * algorithm, but falls back to insertion sort for small arrays.
-   */
-  public static <T extends Comparable<? super T>> void mergeSort(T[] a) {
-    mergeSort(a, 0, a.length);
-  }
-
-  // insertionSorts:
-  
-  /**
-   * Sorts the given array slice using the {@link Comparator}. This method uses the insertion sort
-   * algorithm. It is only recommended to use this algorithm for partially sorted small arrays!
-   * @param fromIndex start index (inclusive)
-   * @param toIndex end index (exclusive)
-   */
-  public static <T> void insertionSort(T[] a, int fromIndex, int toIndex, Comparator<? super T> comp) {
-    if (toIndex-fromIndex <= 1) return;
-    getSorter(a, comp).insertionSort(fromIndex, toIndex-1);
-  }
-  
-  /**
-   * Sorts the given array using the {@link Comparator}. This method uses the insertion sort
-   * algorithm. It is only recommended to use this algorithm for partially sorted small arrays!
-   */
-  public static <T> void insertionSort(T[] a, Comparator<? super T> comp) {
-    insertionSort(a, 0, a.length, comp);
-  }
-  
-  /**
-   * Sorts the given array slice in natural order. This method uses the insertion sort
-   * algorithm. It is only recommended to use this algorithm for partially sorted small arrays!
-   * @param fromIndex start index (inclusive)
-   * @param toIndex end index (exclusive)
-   */
-  public static <T extends Comparable<? super T>> void insertionSort(T[] a, int fromIndex, int toIndex) {
-    if (toIndex-fromIndex <= 1) return;
-    getSorter(a).insertionSort(fromIndex, toIndex-1);
-  }
-  
-  /**
-   * Sorts the given array in natural order. This method uses the insertion sort
-   * algorithm. It is only recommended to use this algorithm for partially sorted small arrays!
-   */
-  public static <T extends Comparable<? super T>> void insertionSort(T[] a) {
-    insertionSort(a, 0, a.length);
-  }
-
-}
\ No newline at end of file
Index: lucene/src/java/org/apache/lucene/util/BytesRef.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/BytesRef.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/BytesRef.java	(working copy)
@@ -1,369 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Comparator;
-
-/** Represents byte[], as a slice (offset + length) into an
- *  existing byte[].
- *
- *  @lucene.experimental */
-public final class BytesRef implements Comparable<BytesRef> {
-
-  static final int HASH_PRIME = 31;
-  public static final byte[] EMPTY_BYTES = new byte[0]; 
-
-  /** The contents of the BytesRef. Should never be {@code null}. */
-  public byte[] bytes;
-
-  /** Offset of first valid byte. */
-  public int offset;
-
-  /** Length of used bytes. */
-  public int length;
-
-  public BytesRef() {
-    bytes = EMPTY_BYTES;
-  }
-
-  /** This instance will directly reference bytes w/o making a copy.
-   * bytes should not be null.
-   */
-  public BytesRef(byte[] bytes, int offset, int length) {
-    assert bytes != null;
-    this.bytes = bytes;
-    this.offset = offset;
-    this.length = length;
-  }
-
-  /** This instance will directly reference bytes w/o making a copy.
-   * bytes should not be null */
-  public BytesRef(byte[] bytes) {
-    assert bytes != null;
-    this.bytes = bytes;
-    this.offset = 0;
-    this.length = bytes.length;
-  }
-
-  public BytesRef(int capacity) {
-    this.bytes = new byte[capacity];
-  }
-
-  /**
-   * @param text Initialize the byte[] from the UTF8 bytes
-   * for the provided Sring.  This must be well-formed
-   * unicode text, with no unpaired surrogates or U+FFFF.
-   */
-  public BytesRef(CharSequence text) {
-    this();
-    copy(text);
-  }
-  
-  /**
-   * @param text Initialize the byte[] from the UTF8 bytes
-   * for the provided array.  This must be well-formed
-   * unicode text, with no unpaired surrogates or U+FFFF.
-   */
-  public BytesRef(char text[], int offset, int length) {
-    this(length * 4);
-    copy(text, offset, length);
-  }
-
-  public BytesRef(BytesRef other) {
-    this();
-    copy(other);
-  }
-
-  /* // maybe?
-  public BytesRef(BytesRef other, boolean shallow) {
-    this();
-    if (shallow) {
-      offset = other.offset;
-      length = other.length;
-      bytes = other.bytes;
-    } else {
-      copy(other);
-    }
-  }
-  */
-
-  /**
-   * Copies the UTF8 bytes for this string.
-   * 
-   * @param text Must be well-formed unicode text, with no
-   * unpaired surrogates or invalid UTF16 code units.
-   */
-  public void copy(CharSequence text) {
-    UnicodeUtil.UTF16toUTF8(text, 0, text.length(), this);
-  }
-
-  /**
-   * Copies the UTF8 bytes for this string.
-   * 
-   * @param text Must be well-formed unicode text, with no
-   * unpaired surrogates or invalid UTF16 code units.
-   */
-  public void copy(char text[], int offset, int length) {
-    UnicodeUtil.UTF16toUTF8(text, offset, length, this);
-  }
-  
-  public boolean bytesEquals(BytesRef other) {
-    if (length == other.length) {
-      int otherUpto = other.offset;
-      final byte[] otherBytes = other.bytes;
-      final int end = offset + length;
-      for(int upto=offset;upto<end;upto++,otherUpto++) {
-        if (bytes[upto] != otherBytes[otherUpto]) {
-          return false;
-        }
-      }
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public Object clone() {
-    return new BytesRef(this);
-  }
-
-  private boolean sliceEquals(BytesRef other, int pos) {
-    if (pos < 0 || length - pos < other.length) {
-      return false;
-    }
-    int i = offset + pos;
-    int j = other.offset;
-    final int k = other.offset + other.length;
-    
-    while (j < k) {
-      if (bytes[i++] != other.bytes[j++]) {
-        return false;
-      }
-    }
-    
-    return true;
-  }
-  
-  public boolean startsWith(BytesRef other) {
-    return sliceEquals(other, 0);
-  }
-
-  public boolean endsWith(BytesRef other) {
-    return sliceEquals(other, length - other.length);
-  }
-  
-  /** Calculates the hash code as required by TermsHash during indexing.
-   * <p>It is defined as:
-   * <pre>
-   *  int hash = 0;
-   *  for (int i = offset; i &lt; offset + length; i++) {
-   *    hash = 31*hash + bytes[i];
-   *  }
-   * </pre>
-   */
-  @Override
-  public int hashCode() {
-    int result = 0;
-    final int end = offset + length;
-    for(int i=offset;i<end;i++) {
-      result = HASH_PRIME * result + bytes[i];
-    }
-    return result;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null) {
-      return false;
-    }
-    return this.bytesEquals((BytesRef) other);
-  }
-
-  /** Interprets stored bytes as UTF8 bytes, returning the
-   *  resulting string */
-  public String utf8ToString() {
-    final CharsRef ref = new CharsRef(length);
-    UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
-    return ref.toString(); 
-  }
-  
-  /** Interprets stored bytes as UTF8 bytes into the given {@link CharsRef} */
-  public CharsRef utf8ToChars(CharsRef ref) {
-    UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
-    return ref;
-  }
-
-  /** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append('[');
-    final int end = offset + length;
-    for(int i=offset;i<end;i++) {
-      if (i > offset) {
-        sb.append(' ');
-      }
-      sb.append(Integer.toHexString(bytes[i]&0xff));
-    }
-    sb.append(']');
-    return sb.toString();
-  }
-
-  public void copy(BytesRef other) {
-    if (bytes.length < other.length) {
-      bytes = new byte[other.length];
-    }
-    System.arraycopy(other.bytes, other.offset, bytes, 0, other.length);
-    length = other.length;
-    offset = 0;
-  }
-
-  public void append(BytesRef other) {
-    int newLen = length + other.length;
-    if (bytes.length < newLen) {
-      byte[] newBytes = new byte[newLen];
-      System.arraycopy(bytes, offset, newBytes, 0, length);
-      offset = 0;
-      bytes = newBytes;
-    }
-    System.arraycopy(other.bytes, other.offset, bytes, length+offset, other.length);
-    length = newLen;
-  }
-
-  public void grow(int newLength) {
-    bytes = ArrayUtil.grow(bytes, newLength);
-  }
-
-  /** Unsigned byte order comparison */
-  public int compareTo(BytesRef other) {
-    if (this == other) return 0;
-
-    final byte[] aBytes = this.bytes;
-    int aUpto = this.offset;
-    final byte[] bBytes = other.bytes;
-    int bUpto = other.offset;
-
-    final int aStop = aUpto + Math.min(this.length, other.length);
-
-    while(aUpto < aStop) {
-      int aByte = aBytes[aUpto++] & 0xff;
-      int bByte = bBytes[bUpto++] & 0xff;
-      int diff = aByte - bByte;
-      if (diff != 0) return diff;
-    }
-
-    // One is a prefix of the other, or, they are equal:
-    return this.length - other.length;
-  }
-
-  private final static Comparator<BytesRef> utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();
-
-  public static Comparator<BytesRef> getUTF8SortedAsUnicodeComparator() {
-    return utf8SortedAsUnicodeSortOrder;
-  }
-
-  private static class UTF8SortedAsUnicodeComparator implements Comparator<BytesRef> {
-    // Only singleton
-    private UTF8SortedAsUnicodeComparator() {};
-
-    public int compare(BytesRef a, BytesRef b) {
-      final byte[] aBytes = a.bytes;
-      int aUpto = a.offset;
-      final byte[] bBytes = b.bytes;
-      int bUpto = b.offset;
-      
-      final int aStop;
-      if (a.length < b.length) {
-        aStop = aUpto + a.length;
-      } else {
-        aStop = aUpto + b.length;
-      }
-
-      while(aUpto < aStop) {
-        int aByte = aBytes[aUpto++] & 0xff;
-        int bByte = bBytes[bUpto++] & 0xff;
-
-        int diff = aByte - bByte;
-        if (diff != 0) {
-          return diff;
-        }
-      }
-
-      // One is a prefix of the other, or, they are equal:
-      return a.length - b.length;
-    }    
-  }
-
-  private final static Comparator<BytesRef> utf8SortedAsUTF16SortOrder = new UTF8SortedAsUTF16Comparator();
-
-  public static Comparator<BytesRef> getUTF8SortedAsUTF16Comparator() {
-    return utf8SortedAsUTF16SortOrder;
-  }
-
-  private static class UTF8SortedAsUTF16Comparator implements Comparator<BytesRef> {
-    // Only singleton
-    private UTF8SortedAsUTF16Comparator() {};
-
-    public int compare(BytesRef a, BytesRef b) {
-
-      final byte[] aBytes = a.bytes;
-      int aUpto = a.offset;
-      final byte[] bBytes = b.bytes;
-      int bUpto = b.offset;
-      
-      final int aStop;
-      if (a.length < b.length) {
-        aStop = aUpto + a.length;
-      } else {
-        aStop = aUpto + b.length;
-      }
-
-      while(aUpto < aStop) {
-        int aByte = aBytes[aUpto++] & 0xff;
-        int bByte = bBytes[bUpto++] & 0xff;
-
-        if (aByte != bByte) {
-
-          // See http://icu-project.org/docs/papers/utf16_code_point_order.html#utf-8-in-utf-16-order
-
-          // We know the terms are not equal, but, we may
-          // have to carefully fixup the bytes at the
-          // difference to match UTF16's sort order:
-          
-          // NOTE: instead of moving supplementary code points (0xee and 0xef) to the unused 0xfe and 0xff, 
-          // we move them to the unused 0xfc and 0xfd [reserved for future 6-byte character sequences]
-          // this reserves 0xff for preflex's term reordering (surrogate dance), and if unicode grows such
-          // that 6-byte sequences are needed we have much bigger problems anyway.
-          if (aByte >= 0xee && bByte >= 0xee) {
-            if ((aByte & 0xfe) == 0xee) {
-              aByte += 0xe;
-            }
-            if ((bByte&0xfe) == 0xee) {
-              bByte += 0xe;
-            }
-          }
-          return aByte - bByte;
-        }
-      }
-
-      // One is a prefix of the other, or, they are equal:
-      return a.length - b.length;
-    }
-  }
-}
Index: lucene/src/java/org/apache/lucene/util/NumericUtils.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/NumericUtils.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/NumericUtils.java	(working copy)
@@ -1,432 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.NumericTokenStream;
-import org.apache.lucene.document.NumericField;
-import org.apache.lucene.search.NumericRangeFilter;
-import org.apache.lucene.search.NumericRangeQuery; // for javadocs
-
-/**
- * This is a helper class to generate prefix-encoded representations for numerical values
- * and supplies converters to represent float/double values as sortable integers/longs.
- *
- * <p>To quickly execute range queries in Apache Lucene, a range is divided recursively
- * into multiple intervals for searching: The center of the range is searched only with
- * the lowest possible precision in the trie, while the boundaries are matched
- * more exactly. This reduces the number of terms dramatically.
- *
- * <p>This class generates terms to achieve this: First the numerical integer values need to
- * be converted to bytes. For that integer values (32 bit or 64 bit) are made unsigned
- * and the bits are converted to ASCII chars with each 7 bit. The resulting byte[] is
- * sortable like the original integer value (even using UTF-8 sort order). Each value is also
- * prefixed (in the first char) by the <code>shift</code> value (number of bits removed) used
- * during encoding.
- *
- * <p>To also index floating point numbers, this class supplies two methods to convert them
- * to integer values by changing their bit layout: {@link #doubleToSortableLong},
- * {@link #floatToSortableInt}. You will have no precision loss by
- * converting floating point numbers to integers and back (only that the integer form
- * is not usable). Other data types like dates can easily converted to longs or ints (e.g.
- * date to long: {@link java.util.Date#getTime}).
- *
- * <p>For easy usage, the trie algorithm is implemented for indexing inside
- * {@link NumericTokenStream} that can index <code>int</code>, <code>long</code>,
- * <code>float</code>, and <code>double</code>. For querying,
- * {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query part
- * for the same data types.
- *
- * <p>This class can also be used, to generate lexicographically sortable (according to
- * {@link BytesRef#getUTF8SortedAsUTF16Comparator()}) representations of numeric data
- * types for other usages (e.g. sorting).
- *
- * @lucene.internal
- * @since 2.9, API changed non backwards-compliant in 4.0
- */
-public final class NumericUtils {
-
-  private NumericUtils() {} // no instance!
-  
-  /**
-   * The default precision step used by {@link NumericField}, {@link NumericTokenStream},
-   * {@link NumericRangeQuery}, and {@link NumericRangeFilter} as default
-   */
-  public static final int PRECISION_STEP_DEFAULT = 4;
-  
-  /**
-   * Longs are stored at lower precision by shifting off lower bits. The shift count is
-   * stored as <code>SHIFT_START_LONG+shift</code> in the first byte
-   */
-  public static final byte SHIFT_START_LONG = 0x20;
-
-  /**
-   * The maximum term length (used for <code>byte[]</code> buffer size)
-   * for encoding <code>long</code> values.
-   * @see #longToPrefixCoded(long,int,BytesRef)
-   */
-  public static final int BUF_SIZE_LONG = 63/7 + 2;
-
-  /**
-   * Integers are stored at lower precision by shifting off lower bits. The shift count is
-   * stored as <code>SHIFT_START_INT+shift</code> in the first byte
-   */
-  public static final byte SHIFT_START_INT  = 0x60;
-
-  /**
-   * The maximum term length (used for <code>byte[]</code> buffer size)
-   * for encoding <code>int</code> values.
-   * @see #intToPrefixCoded(int,int,BytesRef)
-   */
-  public static final int BUF_SIZE_INT = 31/7 + 2;
-
-  /**
-   * Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
-   * This is method is used by {@link NumericTokenStream}.
-   * After encoding, {@code bytes.offset} will always be 0. 
-   * @param val the numeric value
-   * @param shift how many bits to strip from the right
-   * @param bytes will contain the encoded value
-   * @return the hash code for indexing (TermsHash)
-   */
-  public static int longToPrefixCoded(final long val, final int shift, final BytesRef bytes) {
-    if (shift>63 || shift<0)
-      throw new IllegalArgumentException("Illegal shift value, must be 0..63");
-    int hash, nChars = (63-shift)/7 + 1;
-    bytes.offset = 0;
-    bytes.length = nChars+1;
-    if (bytes.bytes.length < bytes.length) {
-      bytes.grow(NumericUtils.BUF_SIZE_LONG);
-    }
-    bytes.bytes[0] = (byte) (hash = (SHIFT_START_LONG + shift));
-    long sortableBits = val ^ 0x8000000000000000L;
-    sortableBits >>>= shift;
-    while (nChars > 0) {
-      // Store 7 bits per byte for compatibility
-      // with UTF-8 encoding of terms
-      bytes.bytes[nChars--] = (byte)(sortableBits & 0x7f);
-      sortableBits >>>= 7;
-    }
-    // calculate hash
-    for (int i = 1; i < bytes.length; i++) {
-      hash = 31*hash + bytes.bytes[i];
-    }
-    return hash;
-  }
-
-  /**
-   * Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
-   * This is method is used by {@link NumericTokenStream}.
-   * After encoding, {@code bytes.offset} will always be 0. 
-   * @param val the numeric value
-   * @param shift how many bits to strip from the right
-   * @param bytes will contain the encoded value
-   * @return the hash code for indexing (TermsHash)
-   */
-  public static int intToPrefixCoded(final int val, final int shift, final BytesRef bytes) {
-    if (shift>31 || shift<0)
-      throw new IllegalArgumentException("Illegal shift value, must be 0..31");
-    int hash, nChars = (31-shift)/7 + 1;
-    bytes.offset = 0;
-    bytes.length = nChars+1;
-    if (bytes.bytes.length < bytes.length) {
-      bytes.grow(NumericUtils.BUF_SIZE_INT);
-    }
-    bytes.bytes[0] = (byte) (hash = (SHIFT_START_INT + shift));
-    int sortableBits = val ^ 0x80000000;
-    sortableBits >>>= shift;
-    while (nChars > 0) {
-      // Store 7 bits per byte for compatibility
-      // with UTF-8 encoding of terms
-      bytes.bytes[nChars--] = (byte)(sortableBits & 0x7f);
-      sortableBits >>>= 7;
-    }
-    // calculate hash
-    for (int i = 1; i < bytes.length; i++) {
-      hash = 31*hash + bytes.bytes[i];
-    }
-    return hash;
-  }
-
-  /**
-   * Returns the shift value from a prefix encoded {@code long}.
-   * @throws NumberFormatException if the supplied {@link BytesRef} is
-   * not correctly prefix encoded.
-   */
-  public static int getPrefixCodedLongShift(final BytesRef val) {
-    final int shift = val.bytes[val.offset] - SHIFT_START_LONG;
-    if (shift > 63 || shift < 0)
-      throw new NumberFormatException("Invalid shift value (" + shift + ") in prefixCoded bytes (is encoded value really an INT?)");
-    return shift;
-  }
-
-  /**
-   * Returns the shift value from a prefix encoded {@code int}.
-   * @throws NumberFormatException if the supplied {@link BytesRef} is
-   * not correctly prefix encoded.
-   */
-  public static int getPrefixCodedIntShift(final BytesRef val) {
-    final int shift = val.bytes[val.offset] - SHIFT_START_INT;
-    if (shift > 31 || shift < 0)
-      throw new NumberFormatException("Invalid shift value in prefixCoded bytes (is encoded value really an INT?)");
-    return shift;
-  }
-
-  /**
-   * Returns a long from prefixCoded bytes.
-   * Rightmost bits will be zero for lower precision codes.
-   * This method can be used to decode a term's value.
-   * @throws NumberFormatException if the supplied {@link BytesRef} is
-   * not correctly prefix encoded.
-   * @see #longToPrefixCoded(long,int,BytesRef)
-   */
-  public static long prefixCodedToLong(final BytesRef val) {
-    long sortableBits = 0L;
-    for (int i=val.offset+1, limit=val.offset+val.length; i<limit; i++) {
-      sortableBits <<= 7;
-      final byte b = val.bytes[i];
-      if (b < 0) {
-        throw new NumberFormatException(
-          "Invalid prefixCoded numerical value representation (byte "+
-          Integer.toHexString(b&0xff)+" at position "+(i-val.offset)+" is invalid)"
-        );
-      }
-      sortableBits |= b;
-    }
-    return (sortableBits << getPrefixCodedLongShift(val)) ^ 0x8000000000000000L;
-  }
-
-  /**
-   * Returns an int from prefixCoded bytes.
-   * Rightmost bits will be zero for lower precision codes.
-   * This method can be used to decode a term's value.
-   * @throws NumberFormatException if the supplied {@link BytesRef} is
-   * not correctly prefix encoded.
-   * @see #intToPrefixCoded(int,int,BytesRef)
-   */
-  public static int prefixCodedToInt(final BytesRef val) {
-    int sortableBits = 0;
-    for (int i=val.offset+1, limit=val.offset+val.length; i<limit; i++) {
-      sortableBits <<= 7;
-      final byte b = val.bytes[i];
-      if (b < 0) {
-        throw new NumberFormatException(
-          "Invalid prefixCoded numerical value representation (byte "+
-          Integer.toHexString(b&0xff)+" at position "+(i-val.offset)+" is invalid)"
-        );
-      }
-      sortableBits |= b;
-    }
-    return (sortableBits << getPrefixCodedIntShift(val)) ^ 0x80000000;
-  }
-
-  /**
-   * Converts a <code>double</code> value to a sortable signed <code>long</code>.
-   * The value is converted by getting their IEEE 754 floating-point &quot;double format&quot;
-   * bit layout and then some bits are swapped, to be able to compare the result as long.
-   * By this the precision is not reduced, but the value can easily used as a long.
-   * @see #sortableLongToDouble
-   */
-  public static long doubleToSortableLong(double val) {
-    long f = Double.doubleToRawLongBits(val);
-    if (f<0) f ^= 0x7fffffffffffffffL;
-    return f;
-  }
-
-  /**
-   * Converts a sortable <code>long</code> back to a <code>double</code>.
-   * @see #doubleToSortableLong
-   */
-  public static double sortableLongToDouble(long val) {
-    if (val<0) val ^= 0x7fffffffffffffffL;
-    return Double.longBitsToDouble(val);
-  }
-
-  /**
-   * Converts a <code>float</code> value to a sortable signed <code>int</code>.
-   * The value is converted by getting their IEEE 754 floating-point &quot;float format&quot;
-   * bit layout and then some bits are swapped, to be able to compare the result as int.
-   * By this the precision is not reduced, but the value can easily used as an int.
-   * @see #sortableIntToFloat
-   */
-  public static int floatToSortableInt(float val) {
-    int f = Float.floatToRawIntBits(val);
-    if (f<0) f ^= 0x7fffffff;
-    return f;
-  }
-
-  /**
-   * Converts a sortable <code>int</code> back to a <code>float</code>.
-   * @see #floatToSortableInt
-   */
-  public static float sortableIntToFloat(int val) {
-    if (val<0) val ^= 0x7fffffff;
-    return Float.intBitsToFloat(val);
-  }
-
-  /**
-   * Splits a long range recursively.
-   * You may implement a builder that adds clauses to a
-   * {@link org.apache.lucene.search.BooleanQuery} for each call to its
-   * {@link LongRangeBuilder#addRange(BytesRef,BytesRef)}
-   * method.
-   * <p>This method is used by {@link NumericRangeQuery}.
-   */
-  public static void splitLongRange(final LongRangeBuilder builder,
-    final int precisionStep,  final long minBound, final long maxBound
-  ) {
-    splitRange(builder, 64, precisionStep, minBound, maxBound);
-  }
-  
-  /**
-   * Splits an int range recursively.
-   * You may implement a builder that adds clauses to a
-   * {@link org.apache.lucene.search.BooleanQuery} for each call to its
-   * {@link IntRangeBuilder#addRange(BytesRef,BytesRef)}
-   * method.
-   * <p>This method is used by {@link NumericRangeQuery}.
-   */
-  public static void splitIntRange(final IntRangeBuilder builder,
-    final int precisionStep,  final int minBound, final int maxBound
-  ) {
-    splitRange(builder, 32, precisionStep, minBound, maxBound);
-  }
-  
-  /** This helper does the splitting for both 32 and 64 bit. */
-  private static void splitRange(
-    final Object builder, final int valSize,
-    final int precisionStep, long minBound, long maxBound
-  ) {
-    if (precisionStep < 1)
-      throw new IllegalArgumentException("precisionStep must be >=1");
-    if (minBound > maxBound) return;
-    for (int shift=0; ; shift += precisionStep) {
-      // calculate new bounds for inner precision
-      final long diff = 1L << (shift+precisionStep),
-        mask = ((1L<<precisionStep) - 1L) << shift;
-      final boolean
-        hasLower = (minBound & mask) != 0L,
-        hasUpper = (maxBound & mask) != mask;
-      final long
-        nextMinBound = (hasLower ? (minBound + diff) : minBound) & ~mask,
-        nextMaxBound = (hasUpper ? (maxBound - diff) : maxBound) & ~mask;
-      final boolean
-        lowerWrapped = nextMinBound < minBound,
-        upperWrapped = nextMaxBound > maxBound;
-      
-      if (shift+precisionStep>=valSize || nextMinBound>nextMaxBound || lowerWrapped || upperWrapped) {
-        // We are in the lowest precision or the next precision is not available.
-        addRange(builder, valSize, minBound, maxBound, shift);
-        // exit the split recursion loop
-        break;
-      }
-      
-      if (hasLower)
-        addRange(builder, valSize, minBound, minBound | mask, shift);
-      if (hasUpper)
-        addRange(builder, valSize, maxBound & ~mask, maxBound, shift);
-      
-      // recurse to next precision
-      minBound = nextMinBound;
-      maxBound = nextMaxBound;
-    }
-  }
-  
-  /** Helper that delegates to correct range builder */
-  private static void addRange(
-    final Object builder, final int valSize,
-    long minBound, long maxBound,
-    final int shift
-  ) {
-    // for the max bound set all lower bits (that were shifted away):
-    // this is important for testing or other usages of the splitted range
-    // (e.g. to reconstruct the full range). The prefixEncoding will remove
-    // the bits anyway, so they do not hurt!
-    maxBound |= (1L << shift) - 1L;
-    // delegate to correct range builder
-    switch(valSize) {
-      case 64:
-        ((LongRangeBuilder)builder).addRange(minBound, maxBound, shift);
-        break;
-      case 32:
-        ((IntRangeBuilder)builder).addRange((int)minBound, (int)maxBound, shift);
-        break;
-      default:
-        // Should not happen!
-        throw new IllegalArgumentException("valSize must be 32 or 64.");
-    }
-  }
-
-  /**
-   * Callback for {@link #splitLongRange}.
-   * You need to overwrite only one of the methods.
-   * @lucene.internal
-   * @since 2.9, API changed non backwards-compliant in 4.0
-   */
-  public static abstract class LongRangeBuilder {
-    
-    /**
-     * Overwrite this method, if you like to receive the already prefix encoded range bounds.
-     * You can directly build classical (inclusive) range queries from them.
-     */
-    public void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) {
-      throw new UnsupportedOperationException();
-    }
-    
-    /**
-     * Overwrite this method, if you like to receive the raw long range bounds.
-     * You can use this for e.g. debugging purposes (print out range bounds).
-     */
-    public void addRange(final long min, final long max, final int shift) {
-      final BytesRef minBytes = new BytesRef(BUF_SIZE_LONG), maxBytes = new BytesRef(BUF_SIZE_LONG);
-      longToPrefixCoded(min, shift, minBytes);
-      longToPrefixCoded(max, shift, maxBytes);
-      addRange(minBytes, maxBytes);
-    }
-  
-  }
-  
-  /**
-   * Callback for {@link #splitIntRange}.
-   * You need to overwrite only one of the methods.
-   * @lucene.internal
-   * @since 2.9, API changed non backwards-compliant in 4.0
-   */
-  public static abstract class IntRangeBuilder {
-    
-    /**
-     * Overwrite this method, if you like to receive the already prefix encoded range bounds.
-     * You can directly build classical range (inclusive) queries from them.
-     */
-    public void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) {
-      throw new UnsupportedOperationException();
-    }
-    
-    /**
-     * Overwrite this method, if you like to receive the raw int range bounds.
-     * You can use this for e.g. debugging purposes (print out range bounds).
-     */
-    public void addRange(final int min, final int max, final int shift) {
-      final BytesRef minBytes = new BytesRef(BUF_SIZE_INT), maxBytes = new BytesRef(BUF_SIZE_INT);
-      intToPrefixCoded(min, shift, minBytes);
-      intToPrefixCoded(max, shift, maxBytes);
-      addRange(minBytes, maxBytes);
-    }
-  
-  }
-  
-}
Index: lucene/src/java/org/apache/lucene/util/AttributeSource.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/AttributeSource.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/AttributeSource.java	(working copy)
@@ -1,504 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.lang.ref.WeakReference;
-import java.util.Collections;
-import java.util.NoSuchElementException;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.WeakHashMap;
-import java.util.LinkedList;
-import java.util.Map;
-import java.util.Map.Entry;
-
-import org.apache.lucene.analysis.TokenStream; // for javadocs
-
-/**
- * An AttributeSource contains a list of different {@link AttributeImpl}s,
- * and methods to add and get them. There can only be a single instance
- * of an attribute in the same AttributeSource instance. This is ensured
- * by passing in the actual type of the Attribute (Class&lt;Attribute&gt;) to 
- * the {@link #addAttribute(Class)}, which then checks if an instance of
- * that type is already present. If yes, it returns the instance, otherwise
- * it creates a new instance and returns it.
- */
-public class AttributeSource {
-  /**
-   * An AttributeFactory creates instances of {@link AttributeImpl}s.
-   */
-  public static abstract class AttributeFactory {
-    /**
-     * returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class.
-     */
-    public abstract AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass);
-    
-    /**
-     * This is the default factory that creates {@link AttributeImpl}s using the
-     * class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
-     */
-    public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory();
-    
-    private static final class DefaultAttributeFactory extends AttributeFactory {
-      private static final WeakHashMap<Class<? extends Attribute>, WeakReference<Class<? extends AttributeImpl>>> attClassImplMap =
-        new WeakHashMap<Class<? extends Attribute>, WeakReference<Class<? extends AttributeImpl>>>();
-      
-      private DefaultAttributeFactory() {}
-    
-      @Override
-      public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
-        try {
-          return getClassForInterface(attClass).newInstance();
-        } catch (InstantiationException e) {
-          throw new IllegalArgumentException("Could not instantiate implementing class for " + attClass.getName());
-        } catch (IllegalAccessException e) {
-          throw new IllegalArgumentException("Could not instantiate implementing class for " + attClass.getName());
-        }
-      }
-      
-      private static Class<? extends AttributeImpl> getClassForInterface(Class<? extends Attribute> attClass) {
-        synchronized(attClassImplMap) {
-          final WeakReference<Class<? extends AttributeImpl>> ref = attClassImplMap.get(attClass);
-          Class<? extends AttributeImpl> clazz = (ref == null) ? null : ref.get();
-          if (clazz == null) {
-            try {
-              attClassImplMap.put(attClass,
-                new WeakReference<Class<? extends AttributeImpl>>(
-                  clazz = Class.forName(attClass.getName() + "Impl", true, attClass.getClassLoader())
-                  .asSubclass(AttributeImpl.class)
-                )
-              );
-            } catch (ClassNotFoundException e) {
-              throw new IllegalArgumentException("Could not find implementing class for " + attClass.getName());
-            }
-          }
-          return clazz;
-        }
-      }
-    }
-  }
-      
-  /**
-   * This class holds the state of an AttributeSource.
-   * @see #captureState
-   * @see #restoreState
-   */
-  public static final class State implements Cloneable {
-    AttributeImpl attribute;
-    State next;
-    
-    @Override
-    public Object clone() {
-      State clone = new State();
-      clone.attribute = (AttributeImpl) attribute.clone();
-      
-      if (next != null) {
-        clone.next = (State) next.clone();
-      }
-      
-      return clone;
-    }
-  }
-    
-  // These two maps must always be in sync!!!
-  // So they are private, final and read-only from the outside (read-only iterators)
-  private final Map<Class<? extends Attribute>, AttributeImpl> attributes;
-  private final Map<Class<? extends AttributeImpl>, AttributeImpl> attributeImpls;
-  private final State[] currentState;
-
-  private AttributeFactory factory;
-  
-  /**
-   * An AttributeSource using the default attribute factory {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}.
-   */
-  public AttributeSource() {
-    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
-  }
-  
-  /**
-   * An AttributeSource that uses the same attributes as the supplied one.
-   */
-  public AttributeSource(AttributeSource input) {
-    if (input == null) {
-      throw new IllegalArgumentException("input AttributeSource must not be null");
-    }
-    this.attributes = input.attributes;
-    this.attributeImpls = input.attributeImpls;
-    this.currentState = input.currentState;
-    this.factory = input.factory;
-  }
-  
-  /**
-   * An AttributeSource using the supplied {@link AttributeFactory} for creating new {@link Attribute} instances.
-   */
-  public AttributeSource(AttributeFactory factory) {
-    this.attributes = new LinkedHashMap<Class<? extends Attribute>, AttributeImpl>();
-    this.attributeImpls = new LinkedHashMap<Class<? extends AttributeImpl>, AttributeImpl>();
-    this.currentState = new State[1];
-    this.factory = factory;
-  }
-  
-  /**
-   * returns the used AttributeFactory.
-   */
-  public final AttributeFactory getAttributeFactory() {
-    return this.factory;
-  }
-  
-  /** Returns a new iterator that iterates the attribute classes
-   * in the same order they were added in.
-   */
-  public final Iterator<Class<? extends Attribute>> getAttributeClassesIterator() {
-    return Collections.unmodifiableSet(attributes.keySet()).iterator();
-  }
-  
-  /** Returns a new iterator that iterates all unique Attribute implementations.
-   * This iterator may contain less entries that {@link #getAttributeClassesIterator},
-   * if one instance implements more than one Attribute interface.
-   */
-  public final Iterator<AttributeImpl> getAttributeImplsIterator() {
-    final State initState = getCurrentState();
-    if (initState != null) {
-      return new Iterator<AttributeImpl>() {
-        private State state = initState;
-      
-        public void remove() {
-          throw new UnsupportedOperationException();
-        }
-        
-        public AttributeImpl next() {
-          if (state == null)
-            throw new NoSuchElementException();
-          final AttributeImpl att = state.attribute;
-          state = state.next;
-          return att;
-        }
-        
-        public boolean hasNext() {
-          return state != null;
-        }
-      };
-    } else {
-      return Collections.<AttributeImpl>emptySet().iterator();
-    }
-  }
-  
-  /** a cache that stores all interfaces for known implementation classes for performance (slow reflection) */
-  private static final WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>> knownImplClasses =
-    new WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>>();
-  
-  static LinkedList<WeakReference<Class<? extends Attribute>>> getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) {
-    synchronized(knownImplClasses) {
-      LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces = knownImplClasses.get(clazz);
-      if (foundInterfaces == null) {
-        // we have a strong reference to the class instance holding all interfaces in the list (parameter "att"),
-        // so all WeakReferences are never evicted by GC
-        knownImplClasses.put(clazz, foundInterfaces = new LinkedList<WeakReference<Class<? extends Attribute>>>());
-        // find all interfaces that this attribute instance implements
-        // and that extend the Attribute interface
-        Class<?> actClazz = clazz;
-        do {
-          for (Class<?> curInterface : actClazz.getInterfaces()) {
-            if (curInterface != Attribute.class && Attribute.class.isAssignableFrom(curInterface)) {
-              foundInterfaces.add(new WeakReference<Class<? extends Attribute>>(curInterface.asSubclass(Attribute.class)));
-            }
-          }
-          actClazz = actClazz.getSuperclass();
-        } while (actClazz != null);
-      }
-      return foundInterfaces;
-    }
-  }
-  
-  /** <b>Expert:</b> Adds a custom AttributeImpl instance with one or more Attribute interfaces.
-   * <p><font color="red"><b>Please note:</b> It is not guaranteed, that <code>att</code> is added to
-   * the <code>AttributeSource</code>, because the provided attributes may already exist.
-   * You should always retrieve the wanted attributes using {@link #getAttribute} after adding
-   * with this method and cast to your class.
-   * The recommended way to use custom implementations is using an {@link AttributeFactory}.
-   * </font></p>
-   */
-  public final void addAttributeImpl(final AttributeImpl att) {
-    final Class<? extends AttributeImpl> clazz = att.getClass();
-    if (attributeImpls.containsKey(clazz)) return;
-    final LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces =
-      getAttributeInterfaces(clazz);
-    
-    // add all interfaces of this AttributeImpl to the maps
-    for (WeakReference<Class<? extends Attribute>> curInterfaceRef : foundInterfaces) {
-      final Class<? extends Attribute> curInterface = curInterfaceRef.get();
-      assert (curInterface != null) :
-        "We have a strong reference on the class holding the interfaces, so they should never get evicted";
-      // Attribute is a superclass of this interface
-      if (!attributes.containsKey(curInterface)) {
-        // invalidate state to force recomputation in captureState()
-        this.currentState[0] = null;
-        attributes.put(curInterface, att);
-        attributeImpls.put(clazz, att);
-      }
-    }
-  }
-  
-  /**
-   * The caller must pass in a Class&lt;? extends Attribute&gt; value.
-   * This method first checks if an instance of that class is 
-   * already in this AttributeSource and returns it. Otherwise a
-   * new instance is created, added to this AttributeSource and returned. 
-   */
-  public final <A extends Attribute> A addAttribute(Class<A> attClass) {
-    AttributeImpl attImpl = attributes.get(attClass);
-    if (attImpl == null) {
-      if (!(attClass.isInterface() && Attribute.class.isAssignableFrom(attClass))) {
-        throw new IllegalArgumentException(
-          "addAttribute() only accepts an interface that extends Attribute, but " +
-          attClass.getName() + " does not fulfil this contract."
-        );
-      }
-      addAttributeImpl(attImpl = this.factory.createAttributeInstance(attClass));
-    }
-    return attClass.cast(attImpl);
-  }
-  
-  /** Returns true, iff this AttributeSource has any attributes */
-  public final boolean hasAttributes() {
-    return !this.attributes.isEmpty();
-  }
-
-  /**
-   * The caller must pass in a Class&lt;? extends Attribute&gt; value. 
-   * Returns true, iff this AttributeSource contains the passed-in Attribute.
-   */
-  public final boolean hasAttribute(Class<? extends Attribute> attClass) {
-    return this.attributes.containsKey(attClass);
-  }
-
-  /**
-   * The caller must pass in a Class&lt;? extends Attribute&gt; value. 
-   * Returns the instance of the passed in Attribute contained in this AttributeSource
-   * 
-   * @throws IllegalArgumentException if this AttributeSource does not contain the
-   *         Attribute. It is recommended to always use {@link #addAttribute} even in consumers
-   *         of TokenStreams, because you cannot know if a specific TokenStream really uses
-   *         a specific Attribute. {@link #addAttribute} will automatically make the attribute
-   *         available. If you want to only use the attribute, if it is available (to optimize
-   *         consuming), use {@link #hasAttribute}.
-   */
-  public final <A extends Attribute> A getAttribute(Class<A> attClass) {
-    AttributeImpl attImpl = attributes.get(attClass);
-    if (attImpl == null) {
-      throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass.getName() + "'.");
-    }
-    return attClass.cast(attImpl);
-  }
-    
-  private State getCurrentState() {
-    State s  = currentState[0];
-    if (s != null || !hasAttributes()) {
-      return s;
-    }
-    State c = s = currentState[0] = new State();
-    final Iterator<AttributeImpl> it = attributeImpls.values().iterator();
-    c.attribute = it.next();
-    while (it.hasNext()) {
-      c.next = new State();
-      c = c.next;
-      c.attribute = it.next();
-    }
-    return s;
-  }
-  
-  /**
-   * Resets all Attributes in this AttributeSource by calling
-   * {@link AttributeImpl#clear()} on each Attribute implementation.
-   */
-  public final void clearAttributes() {
-    for (State state = getCurrentState(); state != null; state = state.next) {
-      state.attribute.clear();
-    }
-  }
-  
-  /**
-   * Captures the state of all Attributes. The return value can be passed to
-   * {@link #restoreState} to restore the state of this or another AttributeSource.
-   */
-  public final State captureState() {
-    final State state = this.getCurrentState();
-    return (state == null) ? null : (State) state.clone();
-  }
-  
-  /**
-   * Restores this state by copying the values of all attribute implementations
-   * that this state contains into the attributes implementations of the targetStream.
-   * The targetStream must contain a corresponding instance for each argument
-   * contained in this state (e.g. it is not possible to restore the state of
-   * an AttributeSource containing a TermAttribute into a AttributeSource using
-   * a Token instance as implementation).
-   * <p>
-   * Note that this method does not affect attributes of the targetStream
-   * that are not contained in this state. In other words, if for example
-   * the targetStream contains an OffsetAttribute, but this state doesn't, then
-   * the value of the OffsetAttribute remains unchanged. It might be desirable to
-   * reset its value to the default, in which case the caller should first
-   * call {@link TokenStream#clearAttributes()} on the targetStream.   
-   */
-  public final void restoreState(State state) {
-    if (state == null)  return;
-    
-    do {
-      AttributeImpl targetImpl = attributeImpls.get(state.attribute.getClass());
-      if (targetImpl == null) {
-        throw new IllegalArgumentException("State contains AttributeImpl of type " +
-          state.attribute.getClass().getName() + " that is not in in this AttributeSource");
-      }
-      state.attribute.copyTo(targetImpl);
-      state = state.next;
-    } while (state != null);
-  }
-
-  @Override
-  public int hashCode() {
-    int code = 0;
-    for (State state = getCurrentState(); state != null; state = state.next) {
-      code = code * 31 + state.attribute.hashCode();
-    }
-    return code;
-  }
-  
-  @Override
-  public boolean equals(Object obj) {
-    if (obj == this) {
-      return true;
-    }
-
-    if (obj instanceof AttributeSource) {
-      AttributeSource other = (AttributeSource) obj;  
-    
-      if (hasAttributes()) {
-        if (!other.hasAttributes()) {
-          return false;
-        }
-        
-        if (this.attributeImpls.size() != other.attributeImpls.size()) {
-          return false;
-        }
-  
-        // it is only equal if all attribute impls are the same in the same order
-        State thisState = this.getCurrentState();
-        State otherState = other.getCurrentState();
-        while (thisState != null && otherState != null) {
-          if (otherState.attribute.getClass() != thisState.attribute.getClass() || !otherState.attribute.equals(thisState.attribute)) {
-            return false;
-          }
-          thisState = thisState.next;
-          otherState = otherState.next;
-        }
-        return true;
-      } else {
-        return !other.hasAttributes();
-      }
-    } else
-      return false;
-  }
-  
-  /**
-   * This method returns the current attribute values as a string in the following format
-   * by calling the {@link #reflectWith(AttributeReflector)} method:
-   * 
-   * <ul>
-   * <li><em>iff {@code prependAttClass=true}:</em> {@code "AttributeClass#key=value,AttributeClass#key=value"}
-   * <li><em>iff {@code prependAttClass=false}:</em> {@code "key=value,key=value"}
-   * </ul>
-   *
-   * @see #reflectWith(AttributeReflector)
-   */
-  public final String reflectAsString(final boolean prependAttClass) {
-    final StringBuilder buffer = new StringBuilder();
-    reflectWith(new AttributeReflector() {
-      public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
-        if (buffer.length() > 0) {
-          buffer.append(',');
-        }
-        if (prependAttClass) {
-          buffer.append(attClass.getName()).append('#');
-        }
-        buffer.append(key).append('=').append((value == null) ? "null" : value);
-      }
-    });
-    return buffer.toString();
-  }
-  
-  /**
-   * This method is for introspection of attributes, it should simply
-   * add the key/values this AttributeSource holds to the given {@link AttributeReflector}.
-   *
-   * <p>This method iterates over all Attribute implementations and calls the
-   * corresponding {@link AttributeImpl#reflectWith} method.</p>
-   *
-   * @see AttributeImpl#reflectWith
-   */
-  public final void reflectWith(AttributeReflector reflector) {
-    for (State state = getCurrentState(); state != null; state = state.next) {
-      state.attribute.reflectWith(reflector);
-    }
-  }
-
-  /**
-   * Performs a clone of all {@link AttributeImpl} instances returned in a new
-   * {@code AttributeSource} instance. This method can be used to e.g. create another TokenStream
-   * with exactly the same attributes (using {@link #AttributeSource(AttributeSource)}).
-   * You can also use it as a (non-performant) replacement for {@link #captureState}, if you need to look
-   * into / modify the captured state.
-   */
-  public final AttributeSource cloneAttributes() {
-    final AttributeSource clone = new AttributeSource(this.factory);
-    
-    if (hasAttributes()) {
-      // first clone the impls
-      for (State state = getCurrentState(); state != null; state = state.next) {
-        clone.attributeImpls.put(state.attribute.getClass(), (AttributeImpl) state.attribute.clone());
-      }
-      
-      // now the interfaces
-      for (Entry<Class<? extends Attribute>, AttributeImpl> entry : this.attributes.entrySet()) {
-        clone.attributes.put(entry.getKey(), clone.attributeImpls.get(entry.getValue().getClass()));
-      }
-    }
-    
-    return clone;
-  }
-  
-  /**
-   * Copies the contents of this {@code AttributeSource} to the given target {@code AttributeSource}.
-   * The given instance has to provide all {@link Attribute}s this instance contains. 
-   * The actual attribute implementations must be identical in both {@code AttributeSource} instances;
-   * ideally both AttributeSource instances should use the same {@link AttributeFactory}.
-   * You can use this method as a replacement for {@link #restoreState}, if you use
-   * {@link #cloneAttributes} instead of {@link #captureState}.
-   */
-  public final void copyTo(AttributeSource target) {
-    for (State state = getCurrentState(); state != null; state = state.next) {
-      final AttributeImpl targetImpl = target.attributeImpls.get(state.attribute.getClass());
-      if (targetImpl == null) {
-        throw new IllegalArgumentException("This AttributeSource contains AttributeImpl of type " +
-          state.attribute.getClass().getName() + " that is not in the target");
-      }
-      state.attribute.copyTo(targetImpl);
-    }
-  }
-
-}
Index: lucene/src/java/org/apache/lucene/util/UnicodeUtil.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/UnicodeUtil.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/UnicodeUtil.java	(working copy)
@@ -1,612 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-/*
- * Some of this code came from the excellent Unicode
- * conversion examples from:
- *
- *   http://www.unicode.org/Public/PROGRAMS/CVTUTF
- *
- * Full Copyright for that code follows:
-*/
-
-/*
- * Copyright 2001-2004 Unicode, Inc.
- * 
- * Disclaimer
- * 
- * This source code is provided as is by Unicode, Inc. No claims are
- * made as to fitness for any particular purpose. No warranties of any
- * kind are expressed or implied. The recipient agrees to determine
- * applicability of information provided. If this file has been
- * purchased on magnetic or optical media from Unicode, Inc., the
- * sole remedy for any claim will be exchange of defective media
- * within 90 days of receipt.
- * 
- * Limitations on Rights to Redistribute This Code
- * 
- * Unicode, Inc. hereby grants the right to freely use the information
- * supplied in this file in the creation of products supporting the
- * Unicode Standard, and to make copies of this file in any form
- * for internal or external distribution as long as this notice
- * remains attached.
- */
-
-/*
- * Additional code came from the IBM ICU library.
- *
- *  http://www.icu-project.org
- *
- * Full Copyright for that code follows.
- */
-
-/*
- * Copyright (C) 1999-2010, International Business Machines
- * Corporation and others.  All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, and/or sell copies of the
- * Software, and to permit persons to whom the Software is furnished to do so,
- * provided that the above copyright notice(s) and this permission notice appear
- * in all copies of the Software and that both the above copyright notice(s) and
- * this permission notice appear in supporting documentation.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
- * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
- * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR
- * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
- * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
- * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Except as contained in this notice, the name of a copyright holder shall not
- * be used in advertising or otherwise to promote the sale, use or other
- * dealings in this Software without prior written authorization of the
- * copyright holder.
- */
-
-/**
- * Class to encode java's UTF16 char[] into UTF8 byte[]
- * without always allocating a new byte[] as
- * String.getBytes("UTF-8") does.
- *
- * @lucene.internal
- */
-
-public final class UnicodeUtil {
-  
-  /** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms
-   *  one would normally encounter, and definitely bigger than any UTF-8 terms.
-   *  <p>
-   *  WARNING: This is not a valid UTF8 Term  
-   **/
-  public static final BytesRef BIG_TERM = new BytesRef(
-      new byte[] {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
-  ); // TODO this is unrelated here find a better place for it
-  
-  public static void main(String[] args) {
-    System.out.println(Character.toChars(0x10FFFF + 1));
-  }
-
-  private UnicodeUtil() {} // no instance
-
-  public static final int UNI_SUR_HIGH_START = 0xD800;
-  public static final int UNI_SUR_HIGH_END = 0xDBFF;
-  public static final int UNI_SUR_LOW_START = 0xDC00;
-  public static final int UNI_SUR_LOW_END = 0xDFFF;
-  public static final int UNI_REPLACEMENT_CHAR = 0xFFFD;
-
-  private static final long UNI_MAX_BMP = 0x0000FFFF;
-
-  private static final long HALF_SHIFT = 10;
-  private static final long HALF_MASK = 0x3FFL;
-  
-  private static final int SURROGATE_OFFSET = 
-    Character.MIN_SUPPLEMENTARY_CODE_POINT - 
-    (UNI_SUR_HIGH_START << HALF_SHIFT) - UNI_SUR_LOW_START;
-
-  /** Encode characters from a char[] source, starting at
-   *  offset for length chars.  Returns a hash of the resulting bytes.  After encoding, result.offset will always be 0. */
-  public static int UTF16toUTF8WithHash(final char[] source, final int offset, final int length, BytesRef result) {
-    int hash = 0;
-    int upto = 0;
-    int i = offset;
-    final int end = offset + length;
-    byte[] out = result.bytes;
-    // Pre-allocate for worst case 4-for-1
-    final int maxLen = length * 4;
-    if (out.length < maxLen)
-      out = result.bytes = new byte[ArrayUtil.oversize(maxLen, 1)];
-    result.offset = 0;
-
-    while(i < end) {
-      
-      final int code = (int) source[i++];
-
-      if (code < 0x80) {
-        hash = 31*hash + (out[upto++] = (byte) code);
-      } else if (code < 0x800) {
-        hash = 31*hash + (out[upto++] = (byte) (0xC0 | (code >> 6)));
-        hash = 31*hash + (out[upto++] = (byte)(0x80 | (code & 0x3F)));
-      } else if (code < 0xD800 || code > 0xDFFF) {
-        hash = 31*hash + (out[upto++] = (byte)(0xE0 | (code >> 12)));
-        hash = 31*hash + (out[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F)));
-        hash = 31*hash + (out[upto++] = (byte)(0x80 | (code & 0x3F)));
-      } else {
-        // surrogate pair
-        // confirm valid high surrogate
-        if (code < 0xDC00 && i < end) {
-          int utf32 = (int) source[i];
-          // confirm valid low surrogate and write pair
-          if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { 
-            utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
-            i++;
-            hash = 31*hash + (out[upto++] = (byte)(0xF0 | (utf32 >> 18)));
-            hash = 31*hash + (out[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F)));
-            hash = 31*hash + (out[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F)));
-            hash = 31*hash + (out[upto++] = (byte)(0x80 | (utf32 & 0x3F)));
-            continue;
-          }
-        }
-        // replace unpaired surrogate or out-of-order low surrogate
-        // with substitution character
-        hash = 31*hash + (out[upto++] = (byte) 0xEF);
-        hash = 31*hash + (out[upto++] = (byte) 0xBF);
-        hash = 31*hash + (out[upto++] = (byte) 0xBD);
-      }
-    }
-    //assert matches(source, offset, length, out, upto);
-    result.length = upto;
-    return hash;
-  }
-
-  /** Encode characters from a char[] source, starting at
-   *  offset for length chars. After encoding, result.offset will always be 0.
-   */
-  public static void UTF16toUTF8(final char[] source, final int offset, final int length, BytesRef result) {
-
-    int upto = 0;
-    int i = offset;
-    final int end = offset + length;
-    byte[] out = result.bytes;
-    // Pre-allocate for worst case 4-for-1
-    final int maxLen = length * 4;
-    if (out.length < maxLen)
-      out = result.bytes = new byte[maxLen];
-    result.offset = 0;
-
-    while(i < end) {
-      
-      final int code = (int) source[i++];
-
-      if (code < 0x80)
-        out[upto++] = (byte) code;
-      else if (code < 0x800) {
-        out[upto++] = (byte) (0xC0 | (code >> 6));
-        out[upto++] = (byte)(0x80 | (code & 0x3F));
-      } else if (code < 0xD800 || code > 0xDFFF) {
-        out[upto++] = (byte)(0xE0 | (code >> 12));
-        out[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
-        out[upto++] = (byte)(0x80 | (code & 0x3F));
-      } else {
-        // surrogate pair
-        // confirm valid high surrogate
-        if (code < 0xDC00 && i < end) {
-          int utf32 = (int) source[i];
-          // confirm valid low surrogate and write pair
-          if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { 
-            utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
-            i++;
-            out[upto++] = (byte)(0xF0 | (utf32 >> 18));
-            out[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
-            out[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
-            out[upto++] = (byte)(0x80 | (utf32 & 0x3F));
-            continue;
-          }
-        }
-        // replace unpaired surrogate or out-of-order low surrogate
-        // with substitution character
-        out[upto++] = (byte) 0xEF;
-        out[upto++] = (byte) 0xBF;
-        out[upto++] = (byte) 0xBD;
-      }
-    }
-    //assert matches(source, offset, length, out, upto);
-    result.length = upto;
-  }
-
-  /** Encode characters from this String, starting at offset
-   *  for length characters. After encoding, result.offset will always be 0.
-   */
-  public static void UTF16toUTF8(final CharSequence s, final int offset, final int length, BytesRef result) {
-    final int end = offset + length;
-
-    byte[] out = result.bytes;
-    result.offset = 0;
-    // Pre-allocate for worst case 4-for-1
-    final int maxLen = length * 4;
-    if (out.length < maxLen)
-      out = result.bytes = new byte[maxLen];
-
-    int upto = 0;
-    for(int i=offset;i<end;i++) {
-      final int code = (int) s.charAt(i);
-
-      if (code < 0x80)
-        out[upto++] = (byte) code;
-      else if (code < 0x800) {
-        out[upto++] = (byte) (0xC0 | (code >> 6));
-        out[upto++] = (byte)(0x80 | (code & 0x3F));
-      } else if (code < 0xD800 || code > 0xDFFF) {
-        out[upto++] = (byte)(0xE0 | (code >> 12));
-        out[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F));
-        out[upto++] = (byte)(0x80 | (code & 0x3F));
-      } else {
-        // surrogate pair
-        // confirm valid high surrogate
-        if (code < 0xDC00 && (i < end-1)) {
-          int utf32 = (int) s.charAt(i+1);
-          // confirm valid low surrogate and write pair
-          if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { 
-            utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
-            i++;
-            out[upto++] = (byte)(0xF0 | (utf32 >> 18));
-            out[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F));
-            out[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
-            out[upto++] = (byte)(0x80 | (utf32 & 0x3F));
-            continue;
-          }
-        }
-        // replace unpaired surrogate or out-of-order low surrogate
-        // with substitution character
-        out[upto++] = (byte) 0xEF;
-        out[upto++] = (byte) 0xBF;
-        out[upto++] = (byte) 0xBD;
-      }
-    }
-    //assert matches(s, offset, length, out, upto);
-    result.length = upto;
-  }
-
-  // Only called from assert
-  /*
-  private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) {
-    try {
-      String s1 = new String(source, offset, length);
-      String s2 = new String(result, 0, upto, "UTF-8");
-      if (!s1.equals(s2)) {
-        //System.out.println("DIFF: s1 len=" + s1.length());
-        //for(int i=0;i<s1.length();i++)
-        //  System.out.println("    " + i + ": " + (int) s1.charAt(i));
-        //System.out.println("s2 len=" + s2.length());
-        //for(int i=0;i<s2.length();i++)
-        //  System.out.println("    " + i + ": " + (int) s2.charAt(i));
-
-        // If the input string was invalid, then the
-        // difference is OK
-        if (!validUTF16String(s1))
-          return true;
-
-        return false;
-      }
-      return s1.equals(s2);
-    } catch (UnsupportedEncodingException uee) {
-      return false;
-    }
-  }
-
-  // Only called from assert
-  private static boolean matches(String source, int offset, int length, byte[] result, int upto) {
-    try {
-      String s1 = source.substring(offset, offset+length);
-      String s2 = new String(result, 0, upto, "UTF-8");
-      if (!s1.equals(s2)) {
-        // Allow a difference if s1 is not valid UTF-16
-
-        //System.out.println("DIFF: s1 len=" + s1.length());
-        //for(int i=0;i<s1.length();i++)
-        //  System.out.println("    " + i + ": " + (int) s1.charAt(i));
-        //System.out.println("  s2 len=" + s2.length());
-        //for(int i=0;i<s2.length();i++)
-        //  System.out.println("    " + i + ": " + (int) s2.charAt(i));
-
-        // If the input string was invalid, then the
-        // difference is OK
-        if (!validUTF16String(s1))
-          return true;
-
-        return false;
-      }
-      return s1.equals(s2);
-    } catch (UnsupportedEncodingException uee) {
-      return false;
-    }
-  }
-  */
-  public static boolean validUTF16String(CharSequence s) {
-    final int size = s.length();
-    for(int i=0;i<size;i++) {
-      char ch = s.charAt(i);
-      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
-        if (i < size-1) {
-          i++;
-          char nextCH = s.charAt(i);
-          if (nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) {
-            // Valid surrogate pair
-          } else
-            // Unmatched high surrogate
-            return false;
-        } else
-          // Unmatched high surrogate
-          return false;
-      } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
-        // Unmatched low surrogate
-        return false;
-    }
-
-    return true;
-  }
-
-  public static boolean validUTF16String(char[] s, int size) {
-    for(int i=0;i<size;i++) {
-      char ch = s[i];
-      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
-        if (i < size-1) {
-          i++;
-          char nextCH = s[i];
-          if (nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) {
-            // Valid surrogate pair
-          } else
-            return false;
-        } else
-          return false;
-      } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
-        // Unmatched low surrogate
-        return false;
-    }
-
-    return true;
-  }
-
-  // Borrowed from Python's 3.1.2 sources,
-  // Objects/unicodeobject.c, and modified (see commented
-  // out section, and the -1s) to disallow the reserved for
-  // future (RFC 3629) 5/6 byte sequence characters, and
-  // invalid 0xFE and 0xFF bytes.
-
-  /* Map UTF-8 encoded prefix byte to sequence length.  -1 (0xFF)
-   * means illegal prefix.  see RFC 2279 for details */
-  static byte[] utf8CodeLength = new byte[] {
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    4, 4, 4, 4, 4, 4, 4, 4 //, 5, 5, 5, 5, 6, 6, 0, 0
-  };
-
-
-  /** Returns the number of code points in this utf8
-   *  sequence.  Behavior is undefined if the utf8 sequence
-   *  is invalid.*/
-  public static int codePointCount(BytesRef utf8) {
-    int upto = utf8.offset;
-    final int limit = utf8.offset + utf8.length;
-    final byte[] bytes = utf8.bytes;
-    int codePointCount = 0;
-    while (upto < limit) {
-      codePointCount++;
-      upto += utf8CodeLength[bytes[upto]&0xFF];
-    }
-    return codePointCount;
-  }
-
-  public static void UTF8toUTF32(final BytesRef utf8, final IntsRef utf32) {
-    // pre-alloc for worst case
-    if (utf32.ints == null || utf32.ints.length < utf8.length) {
-      utf32.ints = new int[utf8.length];
-    }
-    int utf32Count = 0;
-    int utf8Upto = utf8.offset;
-    final int[] ints = utf32.ints;
-    final byte[] bytes = utf8.bytes;
-    final int utf8Limit = utf8.offset + utf8.length;
-    while(utf8Upto < utf8Limit) {
-      final int numBytes = utf8CodeLength[bytes[utf8Upto]&0xFF];
-      int v = 0;
-      switch(numBytes) {
-      case 1:
-        ints[utf32Count++] = bytes[utf8Upto++];
-        continue;
-      case 2:
-        // 5 useful bits
-        v = bytes[utf8Upto++] & 31;
-        break;
-      case 3:
-        // 4 useful bits
-        v = bytes[utf8Upto++] & 15;
-        break;
-      case 4:
-        // 3 useful bits
-        v = bytes[utf8Upto++] & 7;
-        break;
-      default :
-        throw new IllegalStateException("invalid utf8");
-      }
-
-      final int limit = utf8Upto + numBytes-1;
-
-      while(utf8Upto < limit) {
-        v = v << 6 | bytes[utf8Upto++]&63;
-      }
-      ints[utf32Count++] = v;
-    }
-    
-    utf32.offset = 0;
-    utf32.length = utf32Count;
-  }
-
-  /** Shift value for lead surrogate to form a supplementary character. */
-  private static final int LEAD_SURROGATE_SHIFT_ = 10;
-  /** Mask to retrieve the significant value from a trail surrogate.*/
-  private static final int TRAIL_SURROGATE_MASK_ = 0x3FF;
-  /** Trail surrogate minimum value */
-  private static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00;
-  /** Lead surrogate minimum value */
-  private static final int LEAD_SURROGATE_MIN_VALUE = 0xD800;
-  /** The minimum value for Supplementary code points */
-  private static final int SUPPLEMENTARY_MIN_VALUE = 0x10000;
-  /** Value that all lead surrogate starts with */
-  private static final int LEAD_SURROGATE_OFFSET_ = LEAD_SURROGATE_MIN_VALUE
-          - (SUPPLEMENTARY_MIN_VALUE >> LEAD_SURROGATE_SHIFT_);
-
-  /**
-   * Cover JDK 1.5 API. Create a String from an array of codePoints.
-   *
-   * @param codePoints The code array
-   * @param offset The start of the text in the code point array
-   * @param count The number of code points
-   * @return a String representing the code points between offset and count
-   * @throws IllegalArgumentException If an invalid code point is encountered
-   * @throws IndexOutOfBoundsException If the offset or count are out of bounds.
-   */
-  public static String newString(int[] codePoints, int offset, int count) {
-      if (count < 0) {
-          throw new IllegalArgumentException();
-      }
-      char[] chars = new char[count];
-      int w = 0;
-      for (int r = offset, e = offset + count; r < e; ++r) {
-          int cp = codePoints[r];
-          if (cp < 0 || cp > 0x10ffff) {
-              throw new IllegalArgumentException();
-          }
-          while (true) {
-              try {
-                  if (cp < 0x010000) {
-                      chars[w] = (char) cp;
-                      w++;
-                  } else {
-                      chars[w] = (char) (LEAD_SURROGATE_OFFSET_ + (cp >> LEAD_SURROGATE_SHIFT_));
-                      chars[w + 1] = (char) (TRAIL_SURROGATE_MIN_VALUE + (cp & TRAIL_SURROGATE_MASK_));
-                      w += 2;
-                  }
-                  break;
-              } catch (IndexOutOfBoundsException ex) {
-                  int newlen = (int) (Math.ceil((double) codePoints.length * (w + 2)
-                          / (r - offset + 1)));
-                  char[] temp = new char[newlen];
-                  System.arraycopy(chars, 0, temp, 0, w);
-                  chars = temp;
-              }
-          }
-      }
-      return new String(chars, 0, w);
-  }
-
-  // for debugging
-  public static String toHexString(String s) {
-    StringBuilder sb = new StringBuilder();
-    for(int i=0;i<s.length();i++) {
-      char ch = s.charAt(i);
-      if (i > 0) {
-        sb.append(' ');
-      }
-      if (ch < 128) {
-        sb.append(ch);
-      } else {
-        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
-          sb.append("H:");
-        } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
-          sb.append("L:");
-        } else if (ch > UNI_SUR_LOW_END) {
-          if (ch == 0xffff) {
-            sb.append("F:");
-          } else {
-            sb.append("E:");
-          }
-        }
-        
-        sb.append("0x" + Integer.toHexString(ch));
-      }
-    }
-    return sb.toString();
-  }
-  
-  /**
-   * Interprets the given byte array as UTF-8 and converts to UTF-16. The {@link CharsRef} will be extended if 
-   * it doesn't provide enough space to hold the worst case of each byte becoming a UTF-16 codepoint.
-   * <p>
-   * NOTE: Full characters are read, even if this reads past the length passed (and
-   * can result in an ArrayOutOfBoundsException if invalid UTF-8 is passed).
-   * Explicit checks for valid UTF-8 are not performed. 
-   */
-  public static void UTF8toUTF16(byte[] utf8, int offset, int length, CharsRef chars) {
-    int out_offset = chars.offset = 0;
-    final char[] out = chars.chars =  ArrayUtil.grow(chars.chars, length);
-    final int limit = offset + length;
-    while (offset < limit) {
-      int b = utf8[offset++]&0xff;
-      if (b < 0xc0) {
-        assert b < 0x80;
-        out[out_offset++] = (char)b;
-      } else if (b < 0xe0) {
-        out[out_offset++] = (char)(((b&0x1f)<<6) + (utf8[offset++]&0x3f));
-      } else if (b < 0xf0) {
-        out[out_offset++] = (char)(((b&0xf)<<12) + ((utf8[offset]&0x3f)<<6) + (utf8[offset+1]&0x3f));
-        offset += 2;
-      } else {
-        assert b < 0xf8;
-        int ch = ((b&0x7)<<18) + ((utf8[offset]&0x3f)<<12) + ((utf8[offset+1]&0x3f)<<6) + (utf8[offset+2]&0x3f);
-        offset += 3;
-        if (ch < UNI_MAX_BMP) {
-          out[out_offset++] = (char)ch;
-        } else {
-          int chHalf = ch - 0x0010000;
-          out[out_offset++] = (char) ((chHalf >> 10) + 0xD800);
-          out[out_offset++] = (char) ((chHalf & HALF_MASK) + 0xDC00);          
-        }
-      }
-    }
-    chars.length = out_offset - chars.offset;
-  }
-  
-  /**
-   * Utility method for {@link #UTF8toUTF16(byte[], int, int, CharsRef)}
-   * @see #UTF8toUTF16(byte[], int, int, CharsRef)
-   */
-  public static void UTF8toUTF16(BytesRef bytesRef, CharsRef chars) {
-    UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars);
-  }
-
-}
Index: lucene/src/java/org/apache/lucene/util/AttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/AttributeImpl.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/AttributeImpl.java	(working copy)
@@ -1,135 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.lang.reflect.Field;
-import java.lang.reflect.Modifier;
-import java.lang.ref.WeakReference;
-import java.util.LinkedList;
-
-/**
- * Base class for Attributes that can be added to a 
- * {@link org.apache.lucene.util.AttributeSource}.
- * <p>
- * Attributes are used to add data in a dynamic, yet type-safe way to a source
- * of usually streamed objects, e. g. a {@link org.apache.lucene.analysis.TokenStream}.
- */
-public abstract class AttributeImpl implements Cloneable, Attribute {  
-  /**
-   * Clears the values in this AttributeImpl and resets it to its 
-   * default value. If this implementation implements more than one Attribute interface
-   * it clears all.
-   */
-  public abstract void clear();
-  
-  /**
-   * This method returns the current attribute values as a string in the following format
-   * by calling the {@link #reflectWith(AttributeReflector)} method:
-   * 
-   * <ul>
-   * <li><em>iff {@code prependAttClass=true}:</em> {@code "AttributeClass#key=value,AttributeClass#key=value"}
-   * <li><em>iff {@code prependAttClass=false}:</em> {@code "key=value,key=value"}
-   * </ul>
-   *
-   * @see #reflectWith(AttributeReflector)
-   */
-  public final String reflectAsString(final boolean prependAttClass) {
-    final StringBuilder buffer = new StringBuilder();
-    reflectWith(new AttributeReflector() {
-      public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
-        if (buffer.length() > 0) {
-          buffer.append(',');
-        }
-        if (prependAttClass) {
-          buffer.append(attClass.getName()).append('#');
-        }
-        buffer.append(key).append('=').append((value == null) ? "null" : value);
-      }
-    });
-    return buffer.toString();
-  }
-  
-  /**
-   * This method is for introspection of attributes, it should simply
-   * add the key/values this attribute holds to the given {@link AttributeReflector}.
-   *
-   * <p>The default implementation calls {@link AttributeReflector#reflect} for all
-   * non-static fields from the implementing class, using the field name as key
-   * and the field value as value. The Attribute class is also determined by reflection.
-   * Please note that the default implementation can only handle single-Attribute
-   * implementations.
-   *
-   * <p>Custom implementations look like this (e.g. for a combined attribute implementation):
-   * <pre>
-   *   public void reflectWith(AttributeReflector reflector) {
-   *     reflector.reflect(CharTermAttribute.class, "term", term());
-   *     reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", getPositionIncrement());
-   *   }
-   * </pre>
-   *
-   * <p>If you implement this method, make sure that for each invocation, the same set of {@link Attribute}
-   * interfaces and keys are passed to {@link AttributeReflector#reflect} in the same order, but possibly
-   * different values. So don't automatically exclude e.g. {@code null} properties!
-   *
-   * @see #reflectAsString(boolean)
-   */
-  public void reflectWith(AttributeReflector reflector) {
-    final Class<? extends AttributeImpl> clazz = this.getClass();
-    final LinkedList<WeakReference<Class<? extends Attribute>>> interfaces = AttributeSource.getAttributeInterfaces(clazz);
-    if (interfaces.size() != 1) {
-      throw new UnsupportedOperationException(clazz.getName() +
-        " implements more than one Attribute interface, the default reflectWith() implementation cannot handle this.");
-    }
-    final Class<? extends Attribute> interf = interfaces.getFirst().get();
-    final Field[] fields = clazz.getDeclaredFields();
-    try {
-      for (int i = 0; i < fields.length; i++) {
-        final Field f = fields[i];
-        if (Modifier.isStatic(f.getModifiers())) continue;
-        f.setAccessible(true);
-        reflector.reflect(interf, f.getName(), f.get(this));
-      }
-    } catch (IllegalAccessException e) {
-      // this should never happen, because we're just accessing fields
-      // from 'this'
-      throw new RuntimeException(e);
-    }
-  }
-  
-  /**
-   * Copies the values from this Attribute into the passed-in
-   * target attribute. The target implementation must support all the
-   * Attributes this implementation supports.
-   */
-  public abstract void copyTo(AttributeImpl target);
-    
-  /**
-   * Shallow clone. Subclasses must override this if they 
-   * need to clone any members deeply,
-   */
-  @Override
-  public Object clone() {
-    Object clone = null;
-    try {
-      clone = super.clone();
-    } catch (CloneNotSupportedException e) {
-      throw new RuntimeException(e);  // shouldn't happen
-    }
-    return clone;
-  }
-}
Index: lucene/src/java/org/apache/lucene/util/RamUsageEstimator.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/RamUsageEstimator.java	(revision 1128767)
+++ lucene/src/java/org/apache/lucene/util/RamUsageEstimator.java	(working copy)
@@ -1,210 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.lang.reflect.*;
-import java.text.DecimalFormat;
-import java.util.*;
-
-/**
- * Estimates the size of a given Object using a given MemoryModel for primitive
- * size information.
- * 
- * Resource Usage: 
- * 
- * Internally uses a Map to temporally hold a reference to every
- * object seen. 
- * 
- * If checkInterned, all Strings checked will be interned, but those
- * that were not already interned will be released for GC when the
- * estimate is complete.
- * 
- * @lucene.internal
- */
-public final class RamUsageEstimator {
-
-  public final static int NUM_BYTES_SHORT = 2;
-  public final static int NUM_BYTES_INT = 4;
-  public final static int NUM_BYTES_LONG = 8;
-  public final static int NUM_BYTES_FLOAT = 4;
-  public final static int NUM_BYTES_DOUBLE = 8;
-  public final static int NUM_BYTES_CHAR = 2;
-  public final static int NUM_BYTES_OBJECT_HEADER = 8;
-  public final static int NUM_BYTES_OBJECT_REF = Constants.JRE_IS_64BIT ? 8 : 4;
-  public final static int NUM_BYTES_ARRAY_HEADER = NUM_BYTES_OBJECT_HEADER + NUM_BYTES_INT + NUM_BYTES_OBJECT_REF;
-
-  private MemoryModel memoryModel;
-
-  private final Map<Object,Object> seen;
-
-  private int refSize;
-  private int arraySize;
-  private int classSize;
-
-  private boolean checkInterned;
-
-  /**
-   * Constructs this object with an AverageGuessMemoryModel and
-   * checkInterned = true.
-   */
-  public RamUsageEstimator() {
-    this(new AverageGuessMemoryModel());
-  }
-
-  /**
-   * @param checkInterned check if Strings are interned and don't add to size
-   * if they are. Defaults to true but if you know the objects you are checking
-   * won't likely contain many interned Strings, it will be faster to turn off
-   * intern checking.
-   */
-  public RamUsageEstimator(boolean checkInterned) {
-    this(new AverageGuessMemoryModel(), checkInterned);
-  }
-
-  /**
-   * @param memoryModel MemoryModel to use for primitive object sizes.
-   */
-  public RamUsageEstimator(MemoryModel memoryModel) {
-    this(memoryModel, true);
-  }
-
-  /**
-   * @param memoryModel MemoryModel to use for primitive object sizes.
-   * @param checkInterned check if Strings are interned and don't add to size
-   * if they are. Defaults to true but if you know the objects you are checking
-   * won't likely contain many interned Strings, it will be faster to turn off
-   * intern checking.
-   */
-  public RamUsageEstimator(MemoryModel memoryModel, boolean checkInterned) {
-    this.memoryModel = memoryModel;
-    this.checkInterned = checkInterned;
-    // Use Map rather than Set so that we can use an IdentityHashMap - not
-    // seeing an IdentityHashSet
-    seen = new IdentityHashMap<Object,Object>(64);
-    this.refSize = memoryModel.getReferenceSize();
-    this.arraySize = memoryModel.getArraySize();
-    this.classSize = memoryModel.getClassSize();
-  }
-
-  public long estimateRamUsage(Object obj) {
-    long size = size(obj);
-    seen.clear();
-    return size;
-  }
-
-  private long size(Object obj) {
-    if (obj == null) {
-      return 0;
-    }
-    // interned not part of this object
-    if (checkInterned && obj instanceof String
-        && obj == ((String) obj).intern()) { // interned string will be eligible
-                                             // for GC on
-                                             // estimateRamUsage(Object) return
-      return 0;
-    }
-
-    // skip if we have seen before
-    if (seen.containsKey(obj)) {
-      return 0;
-    }
-
-    // add to seen
-    seen.put(obj, null);
-
-    Class<?> clazz = obj.getClass();
-    if (clazz.isArray()) {
-      return sizeOfArray(obj);
-    }
-
-    long size = 0;
-
-    // walk type hierarchy
-    while (clazz != null) {
-      Field[] fields = clazz.getDeclaredFields();
-      for (int i = 0; i < fields.length; i++) {
-        if (Modifier.isStatic(fields[i].getModifiers())) {
-          continue;
-        }
-
-        if (fields[i].getType().isPrimitive()) {
-          size += memoryModel.getPrimitiveSize(fields[i].getType());
-        } else {
-          size += refSize;
-          fields[i].setAccessible(true);
-          try {
-            Object value = fields[i].get(obj);
-            if (value != null) {
-              size += size(value);
-            }
-          } catch (IllegalAccessException ex) {
-            // ignore for now?
-          }
-        }
-
-      }
-      clazz = clazz.getSuperclass();
-    }
-    size += classSize;
-    return size;
-  }
-
-  private long sizeOfArray(Object obj) {
-    int len = Array.getLength(obj);
-    if (len == 0) {
-      return 0;
-    }
-    long size = arraySize;
-    Class<?> arrayElementClazz = obj.getClass().getComponentType();
-    if (arrayElementClazz.isPrimitive()) {
-      size += len * memoryModel.getPrimitiveSize(arrayElementClazz);
-    } else {
-      for (int i = 0; i < len; i++) {
-        size += refSize + size(Array.get(obj, i));
-      }
-    }
-
-    return size;
-  }
-
-  private static final long ONE_KB = 1024;
-  private static final long ONE_MB = ONE_KB * ONE_KB;
-  private static final long ONE_GB = ONE_KB * ONE_MB;
-
-  /**
-   * Return good default units based on byte size.
-   */
-  public static String humanReadableUnits(long bytes, DecimalFormat df) {
-    String newSizeAndUnits;
-
-    if (bytes / ONE_GB > 0) {
-      newSizeAndUnits = String.valueOf(df.format((float) bytes / ONE_GB))
-          + " GB";
-    } else if (bytes / ONE_MB > 0) {
-      newSizeAndUnits = String.valueOf(df.format((float) bytes / ONE_MB))
-          + " MB";
-    } else if (bytes / ONE_KB > 0) {
-      newSizeAndUnits = String.valueOf(df.format((float) bytes / ONE_KB))
-          + " KB";
-    } else {
-      newSizeAndUnits = String.valueOf(bytes) + " bytes";
-    }
-
-    return newSizeAndUnits;
-  }
-}

Property changes on: lucene/src/java/org/apache/lucene/document/Fieldable.java
___________________________________________________________________
Added: svn:executable
   + *

Index: lucene/src/declarations/org/apache/lucene/analysis/NumericTokenStream.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/NumericTokenStream.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/NumericTokenStream.java	(revision 0)
@@ -0,0 +1,347 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.*;
+
+/**
+ * <b>Expert:</b> This class provides a {@link TokenStream}
+ * for indexing numeric values that can be used by the likes of, in Lucene,
+ * NumericRangeQuery or NumericRangeFilter.
+ * <p/>
+ * <p>Note that for simple usage, NumericField is
+ * recommended.  NumericField disables norms and
+ * term freqs, as they are not usually needed during
+ * searching.  If you need to change these settings, you
+ * should use this class.
+ * <p/>
+ * <p>See NumericField for capabilities of fields
+ * indexed numerically.</p>
+ * <p/>
+ * <p>Here's an example usage, for an <code>int</code> field:
+ * <p/>
+ * <pre>
+ *  Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  document.add(field);
+ * </pre>
+ * <p/>
+ * <p>For optimal performance, re-use the TokenStream and Field instance
+ * for more than one document:
+ * <p/>
+ * <pre>
+ *  NumericTokenStream stream = new NumericTokenStream(precisionStep);
+ *  Field field = new Field(name, stream);
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  Document document = new Document();
+ *  document.add(field);
+ *
+ *  for(all documents) {
+ *    stream.setIntValue(value)
+ *    writer.addDocument(document);
+ *  }
+ * </pre>
+ * <p/>
+ * <p>This stream is not intended to be used in analyzers;
+ * it's more for iterating the different precisions during
+ * indexing a specific numeric value.</p>
+ * <p/>
+ * <p><b>NOTE</b>: as token streams are only consumed once
+ * the document is added to the index, if you index more
+ * than one numeric field, use a separate <code>NumericTokenStream</code>
+ * instance for each.</p>
+ * <p/>
+ * <p>See NumericRangeQuery in Lucene core for more details on the
+ * <a
+ * href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
+ * parameter as well as how numeric fields work under the hood.</p>
+ *
+ * @since 2.9
+ */
+public final class NumericTokenStream extends TokenStream {
+
+  /**
+   * The full precision token gets this token type assigned.
+   */
+  public static final String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
+
+  /**
+   * The lower precision tokens gets this token type assigned.
+   */
+  public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
+
+  /**
+   * <b>Expert:</b> Use this attribute to get the details of the currently generated token.
+   *
+   * @lucene.experimental
+   * @since 4.0
+   */
+  public interface NumericTermAttribute extends Attribute {
+    /**
+     * Returns current shift value, undefined before first token
+     */
+    int getShift();
+
+    /**
+     * Returns current token's raw value as {@code long} with all {@link #getShift} applied, undefined before first token
+     */
+    long getRawValue();
+
+    /**
+     * Returns value size in bits (32 for {@code float}, {@code int}; 64 for {@code double}, {@code long})
+     */
+    int getValueSize();
+
+    /**
+     * <em>Don't call this method!</em>
+     *
+     * @lucene.internal
+     */
+    void init(long value, int valSize, int precisionStep, int shift);
+
+    /**
+     * <em>Don't call this method!</em>
+     *
+     * @lucene.internal
+     */
+    void setShift(int shift);
+
+    /**
+     * <em>Don't call this method!</em>
+     *
+     * @lucene.internal
+     */
+    int incShift();
+  }
+
+  // just a wrapper to prevent adding CTA
+  private static final class NumericAttributeFactory extends AttributeFactory {
+    private final AttributeFactory delegate;
+
+    NumericAttributeFactory(AttributeFactory delegate) {
+      this.delegate = delegate;
+    }
+
+    @Override
+    public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
+      if (CharTermAttribute.class.isAssignableFrom(attClass))
+        throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute.");
+      return delegate.createAttributeInstance(attClass);
+    }
+  }
+
+  /**
+   * Implementation of {@link NumericTermAttribute}.
+   *
+   * @lucene.internal
+   * @since 4.0
+   */
+  public static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute, TermToBytesRefAttribute {
+    private long value = 0L;
+    private int valueSize = 0, shift = 0, precisionStep = 0;
+    private BytesRef bytes = new BytesRef();
+
+    public BytesRef getBytesRef() {
+      return bytes;
+    }
+
+    public int fillBytesRef() {
+      try {
+        assert valueSize == 64 || valueSize == 32;
+        return (valueSize == 64) ?
+            NumericUtils.longToPrefixCoded(value, shift, bytes) :
+            NumericUtils.intToPrefixCoded((int) value, shift, bytes);
+      } catch (IllegalArgumentException iae) {
+        // return empty token before first or after last
+        bytes.length = 0;
+        return 0;
+      }
+    }
+
+    public int getShift() {
+      return shift;
+    }
+
+    public void setShift(int shift) {
+      this.shift = shift;
+    }
+
+    public int incShift() {
+      return (shift += precisionStep);
+    }
+
+    public long getRawValue() {
+      return value & ~((1L << shift) - 1L);
+    }
+
+    public int getValueSize() {
+      return valueSize;
+    }
+
+    public void init(long value, int valueSize, int precisionStep, int shift) {
+      this.value = value;
+      this.valueSize = valueSize;
+      this.precisionStep = precisionStep;
+      this.shift = shift;
+    }
+
+    @Override
+    public void clear() {
+      // this attribute has no contents to clear!
+      // we keep it untouched as it's fully controlled by outer class.
+    }
+
+    @Override
+    public void reflectWith(AttributeReflector reflector) {
+      fillBytesRef();
+      reflector.reflect(TermToBytesRefAttribute.class, "bytes", new BytesRef(bytes));
+      reflector.reflect(NumericTermAttribute.class, "shift", shift);
+      reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());
+      reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize);
+    }
+
+    @Override
+    public void copyTo(AttributeImpl target) {
+      final NumericTermAttribute a = (NumericTermAttribute) target;
+      a.init(value, valueSize, precisionStep, shift);
+    }
+  }
+
+  /**
+   * Creates a token stream for numeric values using the default <code>precisionStep</code>
+   * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized,
+   * before using set a value using the various set<em>???</em>Value() methods.
+   */
+  public NumericTokenStream() {
+    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, NumericUtils.PRECISION_STEP_DEFAULT);
+  }
+
+  /**
+   * Creates a token stream for numeric values with the specified
+   * <code>precisionStep</code>. The stream is not yet initialized,
+   * before using set a value using the various set<em>???</em>Value() methods.
+   */
+  public NumericTokenStream(final int precisionStep) {
+    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, precisionStep);
+  }
+
+  /**
+   * Expert: Creates a token stream for numeric values with the specified
+   * <code>precisionStep</code> using the given
+   * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
+   * The stream is not yet initialized,
+   * before using set a value using the various set<em>???</em>Value() methods.
+   */
+  public NumericTokenStream(AttributeFactory factory, final int precisionStep) {
+    super(new NumericAttributeFactory(factory));
+    if (precisionStep < 1)
+      throw new IllegalArgumentException("precisionStep must be >=1");
+    this.precisionStep = precisionStep;
+    numericAtt.setShift(-precisionStep);
+  }
+
+  /**
+   * Initializes the token stream with the supplied <code>long</code> value.
+   *
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   *         <code>new Field(name, new NumericTokenStream(precisionStep).setLongValue(value))</code>
+   */
+  public NumericTokenStream setLongValue(final long value) {
+    numericAtt.init(value, valSize = 64, precisionStep, -precisionStep);
+    return this;
+  }
+
+  /**
+   * Initializes the token stream with the supplied <code>int</code> value.
+   *
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   *         <code>new Field(name, new NumericTokenStream(precisionStep).setIntValue(value))</code>
+   */
+  public NumericTokenStream setIntValue(final int value) {
+    numericAtt.init(value, valSize = 32, precisionStep, -precisionStep);
+    return this;
+  }
+
+  /**
+   * Initializes the token stream with the supplied <code>double</code> value.
+   *
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   *         <code>new Field(name, new NumericTokenStream(precisionStep).setDoubleValue(value))</code>
+   */
+  public NumericTokenStream setDoubleValue(final double value) {
+    numericAtt.init(NumericUtils.doubleToSortableLong(value), valSize = 64, precisionStep, -precisionStep);
+    return this;
+  }
+
+  /**
+   * Initializes the token stream with the supplied <code>float</code> value.
+   *
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   *         <code>new Field(name, new NumericTokenStream(precisionStep).setFloatValue(value))</code>
+   */
+  public NumericTokenStream setFloatValue(final float value) {
+    numericAtt.init(NumericUtils.floatToSortableInt(value), valSize = 32, precisionStep, -precisionStep);
+    return this;
+  }
+
+  @Override
+  public void reset() {
+    if (valSize == 0)
+      throw new IllegalStateException("call set???Value() before usage");
+    numericAtt.setShift(-precisionStep);
+  }
+
+  @Override
+  public boolean incrementToken() {
+    if (valSize == 0)
+      throw new IllegalStateException("call set???Value() before usage");
+
+    // this will only clear all other attributes in this TokenStream
+    clearAttributes();
+
+    final int shift = numericAtt.incShift();
+    typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
+    posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
+    return (shift < valSize);
+  }
+
+  /**
+   * Returns the precision step.
+   */
+  public int getPrecisionStep() {
+    return precisionStep;
+  }
+
+  // members
+  private final NumericTermAttribute numericAtt = addAttribute(NumericTermAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+
+  private int valSize = 0; // valSize==0 means not initialized
+  private final int precisionStep;
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/CharReader.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/CharReader.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/CharReader.java	(revision 0)
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * CharReader is a Reader wrapper. It reads chars from
+ * Reader and outputs {@link CharStream}, defining an
+ * identify function {@link #correctOffset} method that
+ * simply returns the provided offset.
+ */
+public final class CharReader extends CharStream {
+
+  private final Reader input;
+  
+  public static CharStream get(Reader input) {
+    return input instanceof CharStream ?
+      (CharStream)input : new CharReader(input);
+  }
+
+  private CharReader(Reader in) {
+    input = in;
+  }
+
+  @Override
+  public int correctOffset(int currentOff) {
+    return currentOff;
+  }
+
+  @Override
+  public void close() throws IOException {
+    input.close();
+  }
+
+  @Override
+  public int read(char[] cbuf, int off, int len) throws IOException {
+    return input.read(cbuf, off, len);
+  }
+
+  @Override
+  public boolean markSupported(){
+    return input.markSupported();
+  }
+
+  @Override
+  public void mark( int readAheadLimit ) throws IOException {
+    input.mark(readAheadLimit);
+  }
+
+  @Override
+  public void reset() throws IOException {
+    input.reset();
+  }
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/Token.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/Token.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/Token.java	(revision 0)
@@ -0,0 +1,648 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.index.Payload;
+
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
+
+/** 
+  A Token is an occurrence of a term from the text of a field.  It consists of
+  a term's text, the start and end offset of the term in the text of the field,
+  and a type string.
+  <p>
+  The start and end offsets permit applications to re-associate a token with
+  its source text, e.g., to display highlighted query terms in a document
+  browser, or to show matching text fragments in a <abbr title="KeyWord In Context">KWIC</abbr>
+  display, etc.
+  <p>
+  The type is a string, assigned by a lexical analyzer
+  (a.k.a. tokenizer), naming the lexical or syntactic class that the token
+  belongs to.  For example an end of sentence marker token might be implemented
+  with type "eos".  The default token type is "word".  
+  <p>
+  A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
+  length byte array. For instance, Lucene's DocsAndPositionsEnum#getPayload() can be used
+ to retrieve the
+  payloads from the index.
+  
+  <br><br>
+  
+  <p><b>NOTE:</b> As of 2.9, Token implements all {@link Attribute} interfaces
+  that are part of core Lucene and can be found in the {@code tokenattributes} subpackage.
+  Even though it is not necessary to use Token anymore, with the new TokenStream API it can
+  be used as convenience class that implements all {@link Attribute}s, which is especially useful
+  to easily switch from the old to the new TokenStream API.
+  
+  <br><br>
+  
+  <p>Tokenizers and TokenFilters should try to re-use a Token
+  instance when possible for best performance, by
+  implementing the {@link TokenStream#incrementToken()} API.
+  Failing that, to create a new Token you should first use
+  one of the constructors that starts with null text.  To load
+  the token from a char[] use {@link #copyBuffer(char[], int, int)}.
+  To load from a String use {@link #setEmpty} followed by {@link #append(CharSequence)} or {@link #append(CharSequence, int, int)}.
+  Alternatively you can get the Token's termBuffer by calling either {@link #buffer()},
+  if you know that your text is shorter than the capacity of the termBuffer
+  or {@link #resizeBuffer(int)}, if there is any possibility
+  that you may need to grow the buffer. Fill in the characters of your term into this
+  buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
+  or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setLength(int)} to
+  set the length of the term text.  See <a target="_top"
+  href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
+  for details.</p>
+  <p>Typical Token reuse patterns:
+  <ul>
+  <li> Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(string, startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  <li> Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  </li>
+  <li> Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  <li> Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  <li> Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(source.buffer(), 0, source.length(), source.startOffset(), source.endOffset()[, source.type()]);
+  </pre>
+  </li>
+  </ul>
+  A few things to note:
+  <ul>
+  <li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
+  <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
+  <li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
+  <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
+  </ul>
+  </p>
+  <p>
+  <b>Please note:</b> With Lucene 3.1, the <code>{@linkplain #toString toString()}</code> method had to be changed to match the
+  {@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
+  This method now only prints the term text, no additional information anymore.
+  </p>
+  @see org.apache.lucene.index.Payload
+*/
+public class Token extends CharTermAttributeImpl 
+                   implements TypeAttribute, PositionIncrementAttribute,
+                              FlagsAttribute, OffsetAttribute, PayloadAttribute {
+
+  private int startOffset,endOffset;
+  private String type = DEFAULT_TYPE;
+  private int flags;
+  private Payload payload;
+  private int positionIncrement = 1;
+
+  /** Constructs a Token will null text. */
+  public Token() {
+  }
+
+  /** Constructs a Token with null text and start & end
+   *  offsets.
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text */
+  public Token(int start, int end) {
+    startOffset = start;
+    endOffset = end;
+  }
+
+  /** Constructs a Token with null text and start & end
+   *  offsets plus the Token type.
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text
+   *  @param typ the lexical type of this Token */
+  public Token(int start, int end, String typ) {
+    startOffset = start;
+    endOffset = end;
+    type = typ;
+  }
+
+  /**
+   * Constructs a Token with null text and start & end
+   *  offsets plus flags. NOTE: flags is EXPERIMENTAL.
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text
+   *  @param flags The bits to set for this token
+   */
+  public Token(int start, int end, int flags) {
+    startOffset = start;
+    endOffset = end;
+    this.flags = flags;
+  }
+
+  /** Constructs a Token with the given term text, and start
+   *  & end offsets.  The type defaults to "word."
+   *  <b>NOTE:</b> for better indexing speed you should
+   *  instead use the char[] termBuffer methods to set the
+   *  term text.
+   *  @param text term text
+   *  @param start start offset
+   *  @param end end offset
+   */
+  public Token(String text, int start, int end) {
+    append(text);
+    startOffset = start;
+    endOffset = end;
+  }
+
+  /** Constructs a Token with the given text, start and end
+   *  offsets, & type.  <b>NOTE:</b> for better indexing
+   *  speed you should instead use the char[] termBuffer
+   *  methods to set the term text.
+   *  @param text term text
+   *  @param start start offset
+   *  @param end end offset
+   *  @param typ token type
+   */
+  public Token(String text, int start, int end, String typ) {
+    append(text);
+    startOffset = start;
+    endOffset = end;
+    type = typ;
+  }
+
+  /**
+   *  Constructs a Token with the given text, start and end
+   *  offsets, & type.  <b>NOTE:</b> for better indexing
+   *  speed you should instead use the char[] termBuffer
+   *  methods to set the term text.
+   * @param text
+   * @param start
+   * @param end
+   * @param flags token type bits
+   */
+  public Token(String text, int start, int end, int flags) {
+    append(text);
+    startOffset = start;
+    endOffset = end;
+    this.flags = flags;
+  }
+
+  /**
+   *  Constructs a Token with the given term buffer (offset
+   *  & length), start and end
+   *  offsets
+   * @param startTermBuffer
+   * @param termBufferOffset
+   * @param termBufferLength
+   * @param start
+   * @param end
+   */
+  public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
+    copyBuffer(startTermBuffer, termBufferOffset, termBufferLength);
+    startOffset = start;
+    endOffset = end;
+  }
+
+  /** Set the position increment.  This determines the position of this token
+   * relative to the previous Token in a {@link TokenStream}, used in phrase
+   * searching.
+   *
+   * <p>The default value is one.
+   *
+   * <p>Some common uses for this are:<ul>
+   *
+   * <li>Set it to zero to put multiple terms in the same position.  This is
+   * useful if, e.g., a word has multiple stems.  Searches for phrases
+   * including either stem will match.  In this case, all but the first stem's
+   * increment should be set to zero: the increment of the first instance
+   * should be one.  Repeating a token with an increment of zero can also be
+   * used to boost the scores of matches on that token.
+   *
+   * <li>Set it to values greater than one to inhibit exact phrase matches.
+   * If, for example, one does not want phrases to match across removed stop
+   * words, then one could build a stop word filter that removes stop words and
+   * also sets the increment to the number of stop words removed before each
+   * non-stop word.  Then exact phrase queries will only match when the terms
+   * occur with no intervening stop words.
+   *
+   * </ul>
+   * @param positionIncrement the distance from the prior term
+   * @see org.apache.lucene.index.DocsAndPositionsEnum
+   */
+  public void setPositionIncrement(int positionIncrement) {
+    if (positionIncrement < 0)
+      throw new IllegalArgumentException
+        ("Increment must be zero or greater: " + positionIncrement);
+    this.positionIncrement = positionIncrement;
+  }
+
+  /** Returns the position increment of this Token.
+   * @see #setPositionIncrement
+   */
+  public int getPositionIncrement() {
+    return positionIncrement;
+  }
+
+  /** Returns this Token's starting offset, the position of the first character
+    corresponding to this token in the source text.
+
+    Note that the difference between endOffset() and startOffset() may not be
+    equal to {@link #length}, as the term text may have been altered by a
+    stemmer or some other filter. */
+  public final int startOffset() {
+    return startOffset;
+  }
+
+  /** Set the starting offset.
+      @see #startOffset() */
+  public void setStartOffset(int offset) {
+    this.startOffset = offset;
+  }
+
+  /** Returns this Token's ending offset, one greater than the position of the
+    last character corresponding to this token in the source text. The length
+    of the token in the source text is (endOffset - startOffset). */
+  public final int endOffset() {
+    return endOffset;
+  }
+
+  /** Set the ending offset.
+      @see #endOffset() */
+  public void setEndOffset(int offset) {
+    this.endOffset = offset;
+  }
+  
+  /** Set the starting and ending offset.
+  @see #startOffset() and #endOffset()*/
+  public void setOffset(int startOffset, int endOffset) {
+    this.startOffset = startOffset;
+    this.endOffset = endOffset;
+  }
+
+  /** Returns this Token's lexical type.  Defaults to "word". */
+  public final String type() {
+    return type;
+  }
+
+  /** Set the lexical type.
+      @see #type() */
+  public final void setType(String type) {
+    this.type = type;
+  }
+
+  /**
+   * <p/>
+   *
+   * Get the bitset for any bits that have been set.  This is completely distinct from {@link #type()}, although they do share similar purposes.
+   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
+   *
+   * 
+   * @return The bits
+   * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
+   */
+  public int getFlags() {
+    return flags;
+  }
+
+  /**
+   * @see #getFlags()
+   */
+  public void setFlags(int flags) {
+    this.flags = flags;
+  }
+
+  /**
+   * Returns this Token's payload.
+   */ 
+  public Payload getPayload() {
+    return this.payload;
+  }
+
+  /** 
+   * Sets this Token's payload.
+   */
+  public void setPayload(Payload payload) {
+    this.payload = payload;
+  }
+  
+  /** Resets the term text, payload, flags, and positionIncrement,
+   * startOffset, endOffset and token type to default.
+   */
+  @Override
+  public void clear() {
+    super.clear();
+    payload = null;
+    positionIncrement = 1;
+    flags = 0;
+    startOffset = endOffset = 0;
+    type = DEFAULT_TYPE;
+  }
+
+  @Override
+  public Object clone() {
+    Token t = (Token)super.clone();
+    // Do a deep clone
+    if (payload != null) {
+      t.payload = (Payload) payload.clone();
+    }
+    return t;
+  }
+
+  /** Makes a clone, but replaces the term buffer &
+   * start/end offset in the process.  This is more
+   * efficient than doing a full clone (and then calling
+   * {@link #copyBuffer}) because it saves a wasted copy of the old
+   * termBuffer. */
+  public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
+    final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
+    t.positionIncrement = positionIncrement;
+    t.flags = flags;
+    t.type = type;
+    if (payload != null)
+      t.payload = (Payload) payload.clone();
+    return t;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == this)
+      return true;
+
+    if (obj instanceof Token) {
+      final Token other = (Token) obj;
+      return (startOffset == other.startOffset &&
+          endOffset == other.endOffset && 
+          flags == other.flags &&
+          positionIncrement == other.positionIncrement &&
+          (type == null ? other.type == null : type.equals(other.type)) &&
+          (payload == null ? other.payload == null : payload.equals(other.payload)) &&
+          super.equals(obj)
+      );
+    } else
+      return false;
+  }
+
+  @Override
+  public int hashCode() {
+    int code = super.hashCode();
+    code = code * 31 + startOffset;
+    code = code * 31 + endOffset;
+    code = code * 31 + flags;
+    code = code * 31 + positionIncrement;
+    if (type != null)
+      code = code * 31 + type.hashCode();
+    if (payload != null)
+      code = code * 31 + payload.hashCode();
+    return code;
+  }
+      
+  // like clear() but doesn't clear termBuffer/text
+  private void clearNoTermBuffer() {
+    payload = null;
+    positionIncrement = 1;
+    flags = 0;
+    startOffset = endOffset = 0;
+    type = DEFAULT_TYPE;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #copyBuffer(char[], int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset},
+   *  {@link #setType}
+   *  @return this Token instance */
+  public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
+    clearNoTermBuffer();
+    copyBuffer(newTermBuffer, newTermOffset, newTermLength);
+    payload = null;
+    positionIncrement = 1;
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = newType;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #copyBuffer(char[], int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType} on Token.DEFAULT_TYPE
+   *  @return this Token instance */
+  public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
+    clearNoTermBuffer();
+    copyBuffer(newTermBuffer, newTermOffset, newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = DEFAULT_TYPE;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #append(CharSequence)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType}
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) {
+    clear();
+    append(newTerm);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = newType;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #append(CharSequence, int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType}
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
+    clear();
+    append(newTerm, newTermOffset, newTermOffset + newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = newType;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #append(CharSequence)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType} on Token.DEFAULT_TYPE
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newStartOffset, int newEndOffset) {
+    clear();
+    append(newTerm);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = DEFAULT_TYPE;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #append(CharSequence, int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType} on Token.DEFAULT_TYPE
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
+    clear();
+    append(newTerm, newTermOffset, newTermOffset + newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = DEFAULT_TYPE;
+    return this;
+  }
+
+  /**
+   * Copy the prototype token's fields into this one. Note: Payloads are shared.
+   * @param prototype
+   */
+  public void reinit(Token prototype) {
+    copyBuffer(prototype.buffer(), 0, prototype.length());
+    positionIncrement = prototype.positionIncrement;
+    flags = prototype.flags;
+    startOffset = prototype.startOffset;
+    endOffset = prototype.endOffset;
+    type = prototype.type;
+    payload =  prototype.payload;
+  }
+
+  /**
+   * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
+   * @param prototype
+   * @param newTerm
+   */
+  public void reinit(Token prototype, String newTerm) {
+    setEmpty().append(newTerm);
+    positionIncrement = prototype.positionIncrement;
+    flags = prototype.flags;
+    startOffset = prototype.startOffset;
+    endOffset = prototype.endOffset;
+    type = prototype.type;
+    payload =  prototype.payload;
+  }
+
+  /**
+   * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
+   * @param prototype
+   * @param newTermBuffer
+   * @param offset
+   * @param length
+   */
+  public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
+    copyBuffer(newTermBuffer, offset, length);
+    positionIncrement = prototype.positionIncrement;
+    flags = prototype.flags;
+    startOffset = prototype.startOffset;
+    endOffset = prototype.endOffset;
+    type = prototype.type;
+    payload =  prototype.payload;
+  }
+
+  @Override
+  public void copyTo(AttributeImpl target) {
+    if (target instanceof Token) {
+      final Token to = (Token) target;
+      to.reinit(this);
+      // reinit shares the payload, so clone it:
+      if (payload !=null) {
+        to.payload = (Payload) payload.clone();
+      }
+    } else {
+      super.copyTo(target);
+      ((OffsetAttribute) target).setOffset(startOffset, endOffset);
+      ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
+      ((PayloadAttribute) target).setPayload((payload == null) ? null : (Payload) payload.clone());
+      ((FlagsAttribute) target).setFlags(flags);
+      ((TypeAttribute) target).setType(type);
+    }
+  }
+
+  @Override
+  public void reflectWith(AttributeReflector reflector) {
+    super.reflectWith(reflector);
+    reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
+    reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
+    reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
+    reflector.reflect(PayloadAttribute.class, "payload", payload);
+    reflector.reflect(FlagsAttribute.class, "flags", flags);
+    reflector.reflect(TypeAttribute.class, "type", type);
+  }
+
+  /** Convenience factory that returns <code>Token</code> as implementation for the basic
+   * attributes and return the default impl (with &quot;Impl&quot; appended) for all other
+   * attributes.
+   * @since 3.0
+   */
+  public static final AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
+    new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
+  
+  /** <b>Expert:</b> Creates a TokenAttributeFactory returning {@link Token} as instance for the basic attributes
+   * and for all other attributes calls the given delegate factory.
+   * @since 3.0
+   */
+  public static final class TokenAttributeFactory extends AttributeSource.AttributeFactory {
+    
+    private final AttributeSource.AttributeFactory delegate;
+    
+    /** <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
+     * and for all other attributes calls the given delegate factory. */
+    public TokenAttributeFactory(AttributeSource.AttributeFactory delegate) {
+      this.delegate = delegate;
+    }
+  
+    @Override
+    public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
+      return attClass.isAssignableFrom(Token.class)
+        ? new Token() : delegate.createAttributeInstance(attClass);
+    }
+    
+    @Override
+    public boolean equals(Object other) {
+      if (this == other) return true;
+      if (other instanceof TokenAttributeFactory) {
+        final TokenAttributeFactory af = (TokenAttributeFactory) other;
+        return this.delegate.equals(af.delegate);
+      }
+      return false;
+    }
+    
+    @Override
+    public int hashCode() {
+      return delegate.hashCode() ^ 0x0a45aa31;
+    }
+  }
+
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/CachingTokenFilter.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/CachingTokenFilter.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/CachingTokenFilter.java	(revision 0)
@@ -0,0 +1,86 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * This class can be used if the token attributes of a TokenStream
+ * are intended to be consumed more than once. It caches
+ * all token attribute states locally in a List.
+ * 
+ * <P>CachingTokenFilter implements the optional method
+ * {@link TokenStream#reset()}, which repositions the
+ * stream to the first Token. 
+ */
+public final class CachingTokenFilter extends TokenFilter {
+  private List<AttributeSource.State> cache = null;
+  private Iterator<AttributeSource.State> iterator = null; 
+  private AttributeSource.State finalState;
+  
+  public CachingTokenFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if (cache == null) {
+      // fill cache lazily
+      cache = new LinkedList<AttributeSource.State>();
+      fillCache();
+      iterator = cache.iterator();
+    }
+    
+    if (!iterator.hasNext()) {
+      // the cache is exhausted, return false
+      return false;
+    }
+    // Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
+    restoreState(iterator.next());
+    return true;
+  }
+  
+  @Override
+  public final void end() throws IOException {
+    if (finalState != null) {
+      restoreState(finalState);
+    }
+  }
+
+  @Override
+  public void reset() throws IOException {
+    if(cache != null) {
+      iterator = cache.iterator();
+    }
+  }
+  
+  private void fillCache() throws IOException {
+    while(input.incrementToken()) {
+      cache.add(captureState());
+    }
+    // capture final state
+    input.end();
+    finalState = captureState();
+  }
+
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/Analyzer.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/Analyzer.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/Analyzer.java	(revision 0)
@@ -0,0 +1,160 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.AlreadyClosedException;
+import org.apache.lucene.util.CloseableThreadLocal;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.Reader;
+import java.lang.reflect.Modifier;
+
+/**
+ * An Analyzer builds TokenStreams, which analyze text.  It thus represents a
+ * policy for extracting index terms from text.
+ * <p/>
+ * Typical implementations first build a Tokenizer, which breaks the stream of
+ * characters from the Reader into raw Tokens.  One or more TokenFilters may
+ * then be applied to the output of the Tokenizer.
+ * <p>The {@code Analyzer}-API in Lucene is based on the decorator pattern.
+ * Therefore all non-abstract subclasses must be final or their {@link #tokenStream}
+ * and {@link #reusableTokenStream} implementations must be final! This is checked
+ * when Java assertions are enabled.
+ */
+public abstract class Analyzer implements Closeable {
+
+  protected Analyzer() {
+    super();
+    assert assertFinal();
+  }
+
+  private boolean assertFinal() {
+    try {
+      final Class<?> clazz = getClass();
+      assert clazz.isAnonymousClass() ||
+          (clazz.getModifiers() & (Modifier.FINAL | Modifier.PRIVATE)) != 0 ||
+          (
+              Modifier.isFinal(clazz.getMethod("tokenStream", String.class, Reader.class).getModifiers()) &&
+                  Modifier.isFinal(clazz.getMethod("reusableTokenStream", String.class, Reader.class).getModifiers())
+          ) :
+          "Analyzer implementation classes or at least their tokenStream() and reusableTokenStream() implementations must be final";
+      return true;
+    } catch (NoSuchMethodException nsme) {
+      return false;
+    }
+  }
+
+  /**
+   * Creates a TokenStream which tokenizes all the text in the provided
+   * Reader.  Must be able to handle null field name for
+   * backward compatibility.
+   */
+  public abstract TokenStream tokenStream(String fieldName, Reader reader);
+
+  /**
+   * Creates a TokenStream that is allowed to be re-used
+   * from the previous time that the same thread called
+   * this method.  Callers that do not need to use more
+   * than one TokenStream at the same time from this
+   * analyzer should use this method for better
+   * performance.
+   */
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    return tokenStream(fieldName, reader);
+  }
+
+  private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
+
+  /**
+   * Used by Analyzers that implement reusableTokenStream
+   * to retrieve previously saved TokenStreams for re-use
+   * by the same thread.
+   */
+  protected Object getPreviousTokenStream() {
+    try {
+      return tokenStreams.get();
+    } catch (NullPointerException npe) {
+      if (tokenStreams == null) {
+        throw new AlreadyClosedException("this Analyzer is closed");
+      } else {
+        throw npe;
+      }
+    }
+  }
+
+  /**
+   * Used by Analyzers that implement reusableTokenStream
+   * to save a TokenStream for later re-use by the same
+   * thread.
+   */
+  protected void setPreviousTokenStream(Object obj) {
+    try {
+      tokenStreams.set(obj);
+    } catch (NullPointerException npe) {
+      if (tokenStreams == null) {
+        throw new AlreadyClosedException("this Analyzer is closed");
+      } else {
+        throw npe;
+      }
+    }
+  }
+
+  /**
+   * Invoked before indexing a Fieldable instance if
+   * terms have already been added to that field.  This allows custom
+   * analyzers to place an automatic position increment gap between
+   * Fieldable instances using the same field name.  The default value
+   * position increment gap is 0.  With a 0 position increment gap and
+   * the typical default token position increment of 1, all terms in a field,
+   * including across Fieldable instances, are in successive positions, allowing
+   * exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
+   *
+   * @param fieldName Fieldable name being indexed.
+   * @return position increment gap, added to the next token emitted from {@link #tokenStream(String, Reader)}
+   */
+  public int getPositionIncrementGap(String fieldName) {
+    return 0;
+  }
+
+  /**
+   * Just like {@link #getPositionIncrementGap}, except for
+   * Token offsets instead.  By default this returns 1 for
+   * tokenized fields and, as if the fields were joined
+   * with an extra space character, and 0 for un-tokenized
+   * fields.  This method is only called if the field
+   * produced at least one token for indexing.
+   *
+   * @param isTokenized Is the field tokenized?
+   * @return offset gap, added to the next token emitted from {@link #tokenStream(String, Reader)}
+   */
+  public int getOffsetGap(String fieldName, boolean isTokenized) {
+    if (isTokenized)
+      return 1;
+    else
+      return 0;
+  }
+
+  /**
+   * Frees persistent resources used by this Analyzer
+   */
+  public void close() {
+    tokenStreams.close();
+    tokenStreams = null;
+  }
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/TokenFilter.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/TokenFilter.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/TokenFilter.java	(revision 0)
@@ -0,0 +1,56 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/** A TokenFilter is a TokenStream whose input is another TokenStream.
+  <p>
+  This is an abstract class; subclasses must override {@link #incrementToken()}.
+  @see TokenStream
+  */
+public abstract class TokenFilter extends TokenStream {
+  /** The source of tokens for this filter. */
+  protected final TokenStream input;
+
+  /** Construct a token stream filtering the given input. */
+  protected TokenFilter(TokenStream input) {
+    super(input);
+    this.input = input;
+  }
+  
+  /** Performs end-of-stream operations, if any, and calls then <code>end()</code> on the
+   * input TokenStream.<p/> 
+   * <b>NOTE:</b> Be sure to call <code>super.end()</code> first when overriding this method.*/
+  @Override
+  public void end() throws IOException {
+    input.end();
+  }
+  
+  /** Close the input TokenStream. */
+  @Override
+  public void close() throws IOException {
+    input.close();
+  }
+
+  /** Reset the filter as well as the input TokenStream. */
+  @Override
+  public void reset() throws IOException {
+    input.reset();
+  }
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java	(revision 0)
@@ -0,0 +1,99 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.AttributeImpl;
+
+/**
+ * The payload of a Token. See also {@link Payload}.
+ */
+public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable {
+  private Payload payload;  
+  
+  /**
+   * Initialize this attribute with no payload.
+   */
+  public PayloadAttributeImpl() {}
+  
+  /**
+   * Initialize this attribute with the given payload. 
+   */
+  public PayloadAttributeImpl(Payload payload) {
+    this.payload = payload;
+  }
+  
+  /**
+   * Returns this Token's payload.
+   */ 
+  public Payload getPayload() {
+    return this.payload;
+  }
+
+  /** 
+   * Sets this Token's payload.
+   */
+  public void setPayload(Payload payload) {
+    this.payload = payload;
+  }
+  
+  @Override
+  public void clear() {
+    payload = null;
+  }
+
+  @Override
+  public Object clone()  {
+    PayloadAttributeImpl clone = (PayloadAttributeImpl) super.clone();
+    if (payload != null) {
+      clone.payload = (Payload) payload.clone();
+    }
+    return clone;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof PayloadAttribute) {
+      PayloadAttributeImpl o = (PayloadAttributeImpl) other;
+      if (o.payload == null || payload == null) {
+        return o.payload == null && payload == null;
+      }
+      
+      return o.payload.equals(payload);
+    }
+    
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return (payload == null) ? 0 : payload.hashCode();
+  }
+
+  @Override
+  public void copyTo(AttributeImpl target) {
+    PayloadAttribute t = (PayloadAttribute) target;
+    t.setPayload((payload == null) ? null : (Payload) payload.clone());
+  }  
+
+  
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/TermToBytesRefAttribute.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/TermToBytesRefAttribute.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/TermToBytesRefAttribute.java	(revision 0)
@@ -0,0 +1,77 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This attribute is requested by TermsHashPerField to index the contents.
+ * This attribute can be used to customize the final byte[] encoding of terms.
+ * <p>
+ * Consumers of this attribute call {@link #getBytesRef()} up-front, and then
+ * invoke {@link #fillBytesRef()} for each term. Example:
+ * <pre class="prettyprint">
+ *   final TermToBytesRefAttribute termAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
+ *   final BytesRef bytes = termAtt.getBytesRef();
+ *
+ *   while (termAtt.incrementToken() {
+ *
+ *     // you must call termAtt.fillBytesRef() before doing something with the bytes.
+ *     // this encodes the term value (internally it might be a char[], etc) into the bytes.
+ *     int hashCode = termAtt.fillBytesRef();
+ *
+ *     if (isInteresting(bytes)) {
+ *     
+ *       // because the bytes are reused by the attribute (like CharTermAttribute's char[] buffer),
+ *       // you should make a copy if you need persistent access to the bytes, otherwise they will
+ *       // be rewritten across calls to incrementToken()
+ *
+ *       doSomethingWith(new BytesRef(bytes));
+ *     }
+ *   }
+ *   ...
+ * </pre>
+ * @lucene.experimental This is a very expert API, please use
+ * {@link CharTermAttributeImpl} and its implementation of this method
+ * for UTF-8 terms.
+ */
+public interface TermToBytesRefAttribute extends Attribute {
+  /** 
+   * Updates the bytes {@link #getBytesRef()} to contain this term's
+   * final encoding, and returns its hashcode.
+   * @return the hashcode as defined by {@link BytesRef#hashCode}:
+   * <pre>
+   *  int hash = 0;
+   *  for (int i = termBytes.offset; i &lt; termBytes.offset+termBytes.length; i++) {
+   *    hash = 31*hash + termBytes.bytes[i];
+   *  }
+   * </pre>
+   * Implement this for performance reasons, if your code can calculate
+   * the hash on-the-fly. If this is not the case, just return
+   * {@code termBytes.hashCode()}.
+   */
+  public int fillBytesRef();
+  
+  /**
+   * Retrieve this attribute's BytesRef. The bytes are updated 
+   * from the current term when the consumer calls {@link #fillBytesRef()}.
+   * @return this Attributes internal BytesRef.
+   */
+  public BytesRef getBytesRef();
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java	(revision 0)
@@ -0,0 +1,78 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.AttributeImpl;
+
+/**
+ * This attribute can be used to pass different flags down the tokenizer chain,
+ * eg from one TokenFilter to another one. 
+ * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
+ */
+public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable {
+  private int flags = 0;
+  
+  /**
+   * <p/>
+   *
+   * Get the bitset for any bits that have been set.  This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
+   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
+   *
+   *
+   * @return The bits
+   */
+  public int getFlags() {
+    return flags;
+  }
+
+  /**
+   * @see #getFlags()
+   */
+  public void setFlags(int flags) {
+    this.flags = flags;
+  }
+  
+  @Override
+  public void clear() {
+    flags = 0;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) {
+      return true;
+    }
+    
+    if (other instanceof FlagsAttributeImpl) {
+      return ((FlagsAttributeImpl) other).flags == flags;
+    }
+    
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return flags;
+  }
+  
+  @Override
+  public void copyTo(AttributeImpl target) {
+    FlagsAttribute t = (FlagsAttribute) target;
+    t.setFlags(flags);
+  }
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/TypeAttribute.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/TypeAttribute.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/TypeAttribute.java	(revision 0)
@@ -0,0 +1,36 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+
+/**
+ * A Token's lexical type. The Default value is "word". 
+ */
+public interface TypeAttribute extends Attribute {
+
+  /** the default type */
+  public static final String DEFAULT_TYPE = "word";
+
+  /** Returns this Token's lexical type.  Defaults to "word". */
+  public String type();
+
+  /** Set the lexical type.
+      @see #type() */
+  public void setType(String type);
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java	(revision 0)
@@ -0,0 +1,49 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.Attribute;
+
+/**
+ * This attribute can be used to mark a token as a keyword. Keyword aware
+ * {@link TokenStream}s can decide to modify a token based on the return value
+ * of {@link #isKeyword()} if the token is modified. Stemming filters for
+ * instance can use this attribute to conditionally skip a term if
+ * {@link #isKeyword()} returns <code>true</code>.
+ */
+public interface KeywordAttribute extends Attribute {
+
+  /**
+   * Returns <code>true</code> iff the current token is a keyword, otherwise
+   * <code>false</code>/
+   * 
+   * @return <code>true</code> iff the current token is a keyword, otherwise
+   *         <code>false</code>/
+   */
+  public boolean isKeyword();
+
+  /**
+   * Marks the current token as keyword iff set to <code>true</code>.
+   * 
+   * @param isKeyword
+   *          <code>true</code> iff the current token is a keyword, otherwise
+   *          <code>false</code>.
+   */
+  public void setKeyword(boolean isKeyword);
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java	(revision 0)
@@ -0,0 +1,44 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+
+/**
+ * The start and end character offset of a Token. 
+ */
+public interface OffsetAttribute extends Attribute {
+  /** Returns this Token's starting offset, the position of the first character
+  corresponding to this token in the source text.
+
+  Note that the difference between endOffset() and startOffset() may not be
+  equal to termText.length(), as the term text may have been altered by a
+  stemmer or some other filter. */
+  public int startOffset();
+
+  
+  /** Set the starting and ending offset.
+    @see #startOffset() and #endOffset()*/
+  public void setOffset(int startOffset, int endOffset);
+  
+
+  /** Returns this Token's ending offset, one greater than the position of the
+  last character corresponding to this token in the source text. The length
+  of the token in the source text is (endOffset - startOffset). */
+  public int endOffset();
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java	(revision 0)
@@ -0,0 +1,268 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.nio.CharBuffer;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.AttributeReflector;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.UnicodeUtil;
+
+/**
+ * The term text of a Token.
+ */
+public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttribute, TermToBytesRefAttribute, Cloneable {
+  private static int MIN_BUFFER_SIZE = 10;
+  
+  private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
+  private int termLength = 0;
+
+  public final void copyBuffer(char[] buffer, int offset, int length) {
+    growTermBuffer(length);
+    System.arraycopy(buffer, offset, termBuffer, 0, length);
+    termLength = length;
+  }
+
+  public final char[] buffer() {
+    return termBuffer;
+  }
+  
+  public final char[] resizeBuffer(int newSize) {
+    if(termBuffer.length < newSize){
+      // Not big enough; create a new array with slight
+      // over allocation and preserve content
+      final char[] newCharBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
+      System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
+      termBuffer = newCharBuffer;
+    }
+    return termBuffer;   
+  }
+  
+  private void growTermBuffer(int newSize) {
+    if(termBuffer.length < newSize){
+      // Not big enough; create a new array with slight
+      // over allocation:
+      termBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
+    }
+  }
+
+  public final CharTermAttribute setLength(int length) {
+    if (length > termBuffer.length)
+      throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
+    termLength = length;
+    return this;
+  }
+  
+  public final CharTermAttribute setEmpty() {
+    termLength = 0;
+    return this;
+  }
+  
+  // *** TermToBytesRefAttribute interface ***
+  private BytesRef bytes = new BytesRef(MIN_BUFFER_SIZE);
+
+  // not until java 6 @Override
+  public int fillBytesRef() {
+    return UnicodeUtil.UTF16toUTF8WithHash(termBuffer, 0, termLength, bytes);
+  }
+
+  // not until java 6 @Override
+  public BytesRef getBytesRef() {
+    return bytes;
+  }
+  
+  // *** CharSequence interface ***
+  public final int length() {
+    return termLength;
+  }
+  
+  public final char charAt(int index) {
+    if (index >= termLength)
+      throw new IndexOutOfBoundsException();
+    return termBuffer[index];
+  }
+  
+  public final CharSequence subSequence(final int start, final int end) {
+    if (start > termLength || end > termLength)
+      throw new IndexOutOfBoundsException();
+    return new String(termBuffer, start, end - start);
+  }
+  
+  // *** Appendable interface ***
+
+  public final CharTermAttribute append(CharSequence csq) {
+    if (csq == null) // needed for Appendable compliance
+      return appendNull();
+    return append(csq, 0, csq.length());
+  }
+  
+  public final CharTermAttribute append(CharSequence csq, int start, int end) {
+    if (csq == null) // needed for Appendable compliance
+      csq = "null";
+    final int len = end - start, csqlen = csq.length();
+    if (len < 0 || start > csqlen || end > csqlen)
+      throw new IndexOutOfBoundsException();
+    if (len == 0)
+      return this;
+    resizeBuffer(termLength + len);
+    if (len > 4) { // only use instanceof check series for longer CSQs, else simply iterate
+      if (csq instanceof String) {
+        ((String) csq).getChars(start, end, termBuffer, termLength);
+      } else if (csq instanceof StringBuilder) {
+        ((StringBuilder) csq).getChars(start, end, termBuffer, termLength);
+      } else if (csq instanceof CharTermAttribute) {
+        System.arraycopy(((CharTermAttribute) csq).buffer(), start, termBuffer, termLength, len);
+      } else if (csq instanceof CharBuffer && ((CharBuffer) csq).hasArray()) {
+        final CharBuffer cb = (CharBuffer) csq;
+        System.arraycopy(cb.array(), cb.arrayOffset() + cb.position() + start, termBuffer, termLength, len);
+      } else if (csq instanceof StringBuffer) {
+        ((StringBuffer) csq).getChars(start, end, termBuffer, termLength);
+      } else {
+        while (start < end)
+          termBuffer[termLength++] = csq.charAt(start++);
+        // no fall-through here, as termLength is updated!
+        return this;
+      }
+      termLength += len;
+      return this;
+    } else {
+      while (start < end)
+        termBuffer[termLength++] = csq.charAt(start++);
+      return this;
+    }
+  }
+  
+  public final CharTermAttribute append(char c) {
+    resizeBuffer(termLength + 1)[termLength++] = c;
+    return this;
+  }
+  
+  // *** For performance some convenience methods in addition to CSQ's ***
+  
+  public final CharTermAttribute append(String s) {
+    if (s == null) // needed for Appendable compliance
+      return appendNull();
+    final int len = s.length();
+    s.getChars(0, len, resizeBuffer(termLength + len), termLength);
+    termLength += len;
+    return this;
+  }
+  
+  public final CharTermAttribute append(StringBuilder s) {
+    if (s == null) // needed for Appendable compliance
+      return appendNull();
+    final int len = s.length();
+    s.getChars(0, len, resizeBuffer(termLength + len), termLength);
+    termLength += len;
+    return this;
+  }
+  
+  public final CharTermAttribute append(CharTermAttribute ta) {
+    if (ta == null) // needed for Appendable compliance
+      return appendNull();
+    final int len = ta.length();
+    System.arraycopy(ta.buffer(), 0, resizeBuffer(termLength + len), termLength, len);
+    termLength += len;
+    return this;
+  }
+
+  private CharTermAttribute appendNull() {
+    resizeBuffer(termLength + 4);
+    termBuffer[termLength++] = 'n';
+    termBuffer[termLength++] = 'u';
+    termBuffer[termLength++] = 'l';
+    termBuffer[termLength++] = 'l';
+    return this;
+  }
+  
+  // *** AttributeImpl ***
+
+  @Override
+  public int hashCode() {
+    int code = termLength;
+    code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
+    return code;
+  }
+
+  @Override
+  public void clear() {
+    termLength = 0;    
+  }
+
+  @Override
+  public Object clone() {
+    CharTermAttributeImpl t = (CharTermAttributeImpl)super.clone();
+    // Do a deep clone
+    t.termBuffer = new char[this.termLength];
+    System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength);
+    t.bytes = new BytesRef(bytes);
+    return t;
+  }
+  
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof CharTermAttributeImpl) {
+      final CharTermAttributeImpl o = ((CharTermAttributeImpl) other);
+      if (termLength != o.termLength)
+        return false;
+      for(int i=0;i<termLength;i++) {
+        if (termBuffer[i] != o.termBuffer[i]) {
+          return false;
+        }
+      }
+      return true;
+    }
+    
+    return false;
+  }
+
+  /** 
+   * Returns solely the term text as specified by the
+   * {@link CharSequence} interface.
+   * <p>This method changed the behavior with Lucene 3.1,
+   * before it returned a String representation of the whole
+   * term with all attributes.
+   * This affects especially the
+   * {@link org.apache.lucene.analysis.Token} subclass.
+   */
+  @Override
+  public String toString() {
+    return new String(termBuffer, 0, termLength);
+  }
+  
+  @Override
+  public void reflectWith(AttributeReflector reflector) {
+    reflector.reflect(CharTermAttribute.class, "term", toString());
+    fillBytesRef();
+    reflector.reflect(TermToBytesRefAttribute.class, "bytes", new BytesRef(bytes));
+  }
+  
+  @Override
+  public void copyTo(AttributeImpl target) {
+    CharTermAttribute t = (CharTermAttribute) target;
+    t.copyBuffer(termBuffer, 0, termLength);
+  }
+
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java	(revision 0)
@@ -0,0 +1,36 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.Attribute;
+
+/**
+ * The payload of a Token. See also {@link Payload}.
+ */
+public interface PayloadAttribute extends Attribute {
+  /**
+   * Returns this Token's payload.
+   */ 
+  public Payload getPayload();
+
+  /** 
+   * Sets this Token's payload.
+   */
+  public void setPayload(Payload payload);
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java	(revision 0)
@@ -0,0 +1,97 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.AttributeImpl;
+
+/** The positionIncrement determines the position of this token
+ * relative to the previous Token in a {@link TokenStream}, used in phrase
+ * searching.
+ *
+ * <p>The default value is one.
+ *
+ * <p>Some common uses for this are:<ul>
+ *
+ * <li>Set it to zero to put multiple terms in the same position.  This is
+ * useful if, e.g., a word has multiple stems.  Searches for phrases
+ * including either stem will match.  In this case, all but the first stem's
+ * increment should be set to zero: the increment of the first instance
+ * should be one.  Repeating a token with an increment of zero can also be
+ * used to boost the scores of matches on that token.
+ *
+ * <li>Set it to values greater than one to inhibit exact phrase matches.
+ * If, for example, one does not want phrases to match across removed stop
+ * words, then one could build a stop word filter that removes stop words and
+ * also sets the increment to the number of stop words removed before each
+ * non-stop word.  Then exact phrase queries will only match when the terms
+ * occur with no intervening stop words.
+ *
+ * </ul>
+ */
+public class PositionIncrementAttributeImpl extends AttributeImpl implements PositionIncrementAttribute, Cloneable {
+  private int positionIncrement = 1;
+  
+  /** Set the position increment. The default value is one.
+   *
+   * @param positionIncrement the distance from the prior term
+   */
+  public void setPositionIncrement(int positionIncrement) {
+    if (positionIncrement < 0)
+      throw new IllegalArgumentException
+        ("Increment must be zero or greater: " + positionIncrement);
+    this.positionIncrement = positionIncrement;
+  }
+
+  /** Returns the position increment of this Token.
+   * @see #setPositionIncrement
+   */
+  public int getPositionIncrement() {
+    return positionIncrement;
+  }
+
+  @Override
+  public void clear() {
+    this.positionIncrement = 1;
+  }
+  
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof PositionIncrementAttributeImpl) {
+      return positionIncrement == ((PositionIncrementAttributeImpl) other).positionIncrement;
+    }
+ 
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return positionIncrement;
+  }
+  
+  @Override
+  public void copyTo(AttributeImpl target) {
+    PositionIncrementAttribute t = (PositionIncrementAttribute) target;
+    t.setPositionIncrement(positionIncrement);
+  }  
+
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java	(revision 0)
@@ -0,0 +1,44 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.Attribute;
+
+/**
+ * This attribute can be used to pass different flags down the {@link Tokenizer} chain,
+ * eg from one TokenFilter to another one. 
+ * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
+ */
+public interface FlagsAttribute extends Attribute {
+  /**
+   * <p/>
+   *
+   * Get the bitset for any bits that have been set.  This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
+   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
+   *
+   *
+   * @return The bits
+   */
+  public int getFlags();
+
+  /**
+   * @see #getFlags()
+   */
+  public void setFlags(int flags);  
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java	(revision 0)
@@ -0,0 +1,76 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.AttributeImpl;
+
+/**
+ * A Token's lexical type. The Default value is "word". 
+ */
+public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute, Cloneable {
+  private String type;
+  
+  public TypeAttributeImpl() {
+    this(DEFAULT_TYPE); 
+  }
+  
+  public TypeAttributeImpl(String type) {
+    this.type = type;
+  }
+  
+  /** Returns this Token's lexical type.  Defaults to "word". */
+  public String type() {
+    return type;
+  }
+
+  /** Set the lexical type.
+      @see #type() */
+  public void setType(String type) {
+    this.type = type;
+  }
+
+  @Override
+  public void clear() {
+    type = DEFAULT_TYPE;    
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof TypeAttributeImpl) {
+      final TypeAttributeImpl o = (TypeAttributeImpl) other;
+      return (this.type == null ? o.type == null : this.type.equals(o.type));
+    }
+    
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return (type == null) ? 0 : type.hashCode();
+  }
+  
+  @Override
+  public void copyTo(AttributeImpl target) {
+    TypeAttribute t = (TypeAttribute) target;
+    t.setType(type);
+  }
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java	(revision 0)
@@ -0,0 +1,91 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+
+/**
+ * The term text of a Token.
+ */
+public interface CharTermAttribute extends Attribute, CharSequence, Appendable {
+  
+  /** Copies the contents of buffer, starting at offset for
+   *  length characters, into the termBuffer array.
+   *  @param buffer the buffer to copy
+   *  @param offset the index in the buffer of the first character to copy
+   *  @param length the number of characters to copy
+   */
+  public void copyBuffer(char[] buffer, int offset, int length);
+  
+  /** Returns the internal termBuffer character array which
+   *  you can then directly alter.  If the array is too
+   *  small for your token, use {@link
+   *  #resizeBuffer(int)} to increase it.  After
+   *  altering the buffer be sure to call {@link
+   *  #setLength} to record the number of valid
+   *  characters that were placed into the termBuffer. */
+  public char[] buffer();
+
+  /** Grows the termBuffer to at least size newSize, preserving the
+   *  existing content.
+   *  @param newSize minimum size of the new termBuffer
+   *  @return newly created termBuffer with length >= newSize
+   */
+  public char[] resizeBuffer(int newSize);
+
+  /** Set number of valid characters (length of the term) in
+   *  the termBuffer array. Use this to truncate the termBuffer
+   *  or to synchronize with external manipulation of the termBuffer.
+   *  Note: to grow the size of the array,
+   *  use {@link #resizeBuffer(int)} first.
+   *  @param length the truncated length
+   */
+  public CharTermAttribute setLength(int length);
+  
+  /** Sets the length of the termBuffer to zero.
+   * Use this method before appending contents
+   * using the {@link Appendable} interface.
+   */
+  public CharTermAttribute setEmpty();
+  
+  // the following methods are redefined to get rid of IOException declaration:
+  public CharTermAttribute append(CharSequence csq);
+  public CharTermAttribute append(CharSequence csq, int start, int end);
+  public CharTermAttribute append(char c);
+
+  /** Appends the specified {@code String} to this character sequence. 
+   * <p>The characters of the {@code String} argument are appended, in order, increasing the length of
+   * this sequence by the length of the argument. If argument is {@code null}, then the four
+   * characters {@code "null"} are appended. 
+   */
+  public CharTermAttribute append(String s);
+
+  /** Appends the specified {@code StringBuilder} to this character sequence. 
+   * <p>The characters of the {@code StringBuilder} argument are appended, in order, increasing the length of
+   * this sequence by the length of the argument. If argument is {@code null}, then the four
+   * characters {@code "null"} are appended. 
+   */
+  public CharTermAttribute append(StringBuilder sb);
+
+  /** Appends the contents of the other {@code CharTermAttribute} to this character sequence. 
+   * <p>The characters of the {@code CharTermAttribute} argument are appended, in order, increasing the length of
+   * this sequence by the length of the argument. If argument is {@code null}, then the four
+   * characters {@code "null"} are appended. 
+   */
+  public CharTermAttribute append(CharTermAttribute termAtt);
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java	(revision 0)
@@ -0,0 +1,82 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.AttributeImpl;
+
+/**
+ *This attribute can be used to mark a token as a keyword. Keyword aware
+ * {@link TokenStream}s can decide to modify a token based on the return value
+ * of {@link #isKeyword()} if the token is modified. Stemming filters for
+ * instance can use this attribute to conditionally skip a term if
+ * {@link #isKeyword()} returns <code>true</code>.
+ */
+public final class KeywordAttributeImpl extends AttributeImpl implements
+    KeywordAttribute {
+  private boolean keyword;
+
+  @Override
+  public void clear() {
+    keyword = false;
+  }
+
+  @Override
+  public void copyTo(AttributeImpl target) {
+    KeywordAttribute attr = (KeywordAttribute) target;
+    attr.setKeyword(keyword);
+  }
+
+  @Override
+  public int hashCode() {
+    return keyword ? 31 : 37;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (getClass() != obj.getClass())
+      return false;
+    final KeywordAttributeImpl other = (KeywordAttributeImpl) obj;
+    return keyword == other.keyword;
+  }
+
+  /**
+   * Returns <code>true</code> iff the current token is a keyword, otherwise
+   * <code>false</code>/
+   * 
+   * @return <code>true</code> iff the current token is a keyword, otherwise
+   *         <code>false</code>/
+   */
+  public boolean isKeyword() {
+    return keyword;
+  }
+
+  /**
+   * Marks the current token as keyword iff set to <code>true</code>.
+   * 
+   * @param isKeyword
+   *          <code>true</code> iff the current token is a keyword, otherwise
+   *          <code>false</code>.
+   */
+  public void setKeyword(boolean isKeyword) {
+    keyword = isKeyword;
+  }
+
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java	(revision 0)
@@ -0,0 +1,88 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.AttributeImpl;
+
+/**
+ * The start and end character offset of a Token. 
+ */
+public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable {
+  private int startOffset;
+  private int endOffset;
+
+  /** Returns this Token's starting offset, the position of the first character
+  corresponding to this token in the source text.
+
+  Note that the difference between endOffset() and startOffset() may not be
+  equal to termText.length(), as the term text may have been altered by a
+  stemmer or some other filter. */
+  public int startOffset() {
+    return startOffset;
+  }
+
+  
+  /** Set the starting and ending offset.
+    @see #startOffset() and #endOffset()*/
+  public void setOffset(int startOffset, int endOffset) {
+    this.startOffset = startOffset;
+    this.endOffset = endOffset;
+  }
+  
+
+  /** Returns this Token's ending offset, one greater than the position of the
+  last character corresponding to this token in the source text. The length
+  of the token in the source text is (endOffset - startOffset). */
+  public int endOffset() {
+    return endOffset;
+  }
+
+
+  @Override
+  public void clear() {
+    startOffset = 0;
+    endOffset = 0;
+  }
+  
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof OffsetAttributeImpl) {
+      OffsetAttributeImpl o = (OffsetAttributeImpl) other;
+      return o.startOffset == startOffset && o.endOffset == endOffset;
+    }
+    
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    int code = startOffset;
+    code = code * 31 + endOffset;
+    return code;
+  } 
+  
+  @Override
+  public void copyTo(AttributeImpl target) {
+    OffsetAttribute t = (OffsetAttribute) target;
+    t.setOffset(startOffset, endOffset);
+  }  
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java	(revision 0)
@@ -0,0 +1,59 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+
+/** The positionIncrement determines the position of this token
+ * relative to the previous Token in a TokenStream, used in phrase
+ * searching.
+ *
+ * <p>The default value is one.
+ *
+ * <p>Some common uses for this are:<ul>
+ *
+ * <li>Set it to zero to put multiple terms in the same position.  This is
+ * useful if, e.g., a word has multiple stems.  Searches for phrases
+ * including either stem will match.  In this case, all but the first stem's
+ * increment should be set to zero: the increment of the first instance
+ * should be one.  Repeating a token with an increment of zero can also be
+ * used to boost the scores of matches on that token.
+ *
+ * <li>Set it to values greater than one to inhibit exact phrase matches.
+ * If, for example, one does not want phrases to match across removed stop
+ * words, then one could build a stop word filter that removes stop words and
+ * also sets the increment to the number of stop words removed before each
+ * non-stop word.  Then exact phrase queries will only match when the terms
+ * occur with no intervening stop words.
+ *
+ * </ul>
+ * 
+ * @see org.apache.lucene.index.DocsAndPositionsEnum
+ */
+public interface PositionIncrementAttribute extends Attribute {
+  /** Set the position increment. The default value is one.
+   *
+   * @param positionIncrement the distance from the prior term
+   */
+  public void setPositionIncrement(int positionIncrement);
+
+  /** Returns the position increment of this Token.
+   * @see #setPositionIncrement
+   */
+  public int getPositionIncrement();
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/CharStream.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/CharStream.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/CharStream.java	(revision 0)
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.Reader;
+
+/**
+ * CharStream adds {@link #correctOffset}
+ * functionality over {@link Reader}.  All Tokenizers accept a
+ * CharStream instead of {@link Reader} as input, which enables
+ * arbitrary character based filtering before tokenization. 
+ * The {@link #correctOffset} method fixed offsets to account for
+ * removal or insertion of characters, so that the offsets
+ * reported in the tokens match the character offsets of the
+ * original Reader.
+ */
+public abstract class CharStream extends Reader {
+
+  /**
+   * Called by CharFilter(s) and Tokenizer to correct token offset.
+   *
+   * @param currentOff offset as seen in the output
+   * @return corrected offset based on the input
+   */
+  public abstract int correctOffset(int currentOff);
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/Tokenizer.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/Tokenizer.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/Tokenizer.java	(revision 0)
@@ -0,0 +1,95 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.AttributeSource;
+
+import java.io.Reader;
+import java.io.IOException;
+
+/** A Tokenizer is a TokenStream whose input is a Reader.
+  <p>
+  This is an abstract class; subclasses must override {@link #incrementToken()}
+  <p>
+  NOTE: Subclasses overriding {@link #incrementToken()} must
+  call {@link AttributeSource#clearAttributes()} before
+  setting attributes.
+ */
+public abstract class Tokenizer extends TokenStream {
+  /** The text source for this Tokenizer. */
+  protected Reader input;
+
+  /** Construct a tokenizer with null input. */
+  protected Tokenizer() {}
+  
+  /** Construct a token stream processing the given input. */
+  protected Tokenizer(Reader input) {
+    this.input = CharReader.get(input);
+  }
+  
+  /** Construct a tokenizer with null input using the given AttributeFactory. */
+  protected Tokenizer(AttributeFactory factory) {
+    super(factory);
+  }
+
+  /** Construct a token stream processing the given input using the given AttributeFactory. */
+  protected Tokenizer(AttributeFactory factory, Reader input) {
+    super(factory);
+    this.input = CharReader.get(input);
+  }
+
+  /** Construct a token stream processing the given input using the given AttributeSource. */
+  protected Tokenizer(AttributeSource source) {
+    super(source);
+  }
+
+  /** Construct a token stream processing the given input using the given AttributeSource. */
+  protected Tokenizer(AttributeSource source, Reader input) {
+    super(source);
+    this.input = CharReader.get(input);
+  }
+  
+  /** By default, closes the input Reader. */
+  @Override
+  public void close() throws IOException {
+    if (input != null) {
+      input.close();
+      // LUCENE-2387: don't hold onto Reader after close, so
+      // GC can reclaim
+      input = null;
+    }
+  }
+  
+  /** Return the corrected offset. If {@link #input} is a {@link CharStream} subclass
+   * this method calls {@link CharStream#correctOffset}, else returns <code>currentOff</code>.
+   * @param currentOff offset as seen in the output
+   * @return corrected offset based on the input
+   * @see CharStream#correctOffset
+   */
+  protected final int correctOffset(int currentOff) {
+    return (input instanceof CharStream) ? ((CharStream) input).correctOffset(currentOff) : currentOff;
+  }
+
+  /** Expert: Reset the tokenizer to a new reader.  Typically, an
+   *  analyzer (in its reusableTokenStream method) will use
+   *  this to re-use a previously created tokenizer. */
+  public void reset(Reader input) throws IOException {
+    this.input = input;
+  }
+}
+
Index: lucene/src/declarations/org/apache/lucene/analysis/TokenStream.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/TokenStream.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/TokenStream.java	(revision 0)
@@ -0,0 +1,182 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeSource;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.lang.reflect.Modifier;
+
+/**
+ * A <code>TokenStream</code> enumerates the sequence of tokens from some input text.
+ * <p/>
+ * This is an abstract class; concrete subclasses are:
+ * <ul>
+ * <li>{@link Tokenizer}, a <code>TokenStream</code> whose input is a Reader; and
+ * <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
+ * <code>TokenStream</code>.
+ * </ul>
+ * A new <code>TokenStream</code> API has been introduced with Lucene 2.9. This API
+ * has moved from being {@link Token}-based to {@link Attribute}-based. While
+ * {@link Token} still exists in 2.9 as a convenience class, the preferred way
+ * to store the information of a {@link Token} is to use {@link AttributeImpl}s.
+ * <p/>
+ * <code>TokenStream</code> now extends {@link AttributeSource}, which provides
+ * access to all of the token {@link Attribute}s for the <code>TokenStream</code>.
+ * Note that only one instance per {@link AttributeImpl} is created and reused
+ * for every token. This approach reduces object creation and allows local
+ * caching of references to the {@link AttributeImpl}s. See
+ * {@link #incrementToken()} for further details.
+ * <p/>
+ * <b>The workflow of the new <code>TokenStream</code> API is as follows:</b>
+ * <ol>
+ * <li>Instantiation of <code>TokenStream</code>/{@link TokenFilter}s which add/get
+ * attributes to/from the {@link AttributeSource}.
+ * <li>The consumer calls {@link TokenStream#reset()}.
+ * <li>The consumer retrieves attributes from the stream and stores local
+ * references to all attributes it wants to access.
+ * <li>The consumer calls {@link #incrementToken()} until it returns false
+ * consuming the attributes after each call.
+ * <li>The consumer calls {@link #end()} so that any end-of-stream operations
+ * can be performed.
+ * <li>The consumer calls {@link #close()} to release any resource when finished
+ * using the <code>TokenStream</code>.
+ * </ol>
+ * To make sure that filters and consumers know which attributes are available,
+ * the attributes must be added during instantiation. Filters and consumers are
+ * not required to check for availability of attributes in
+ * {@link #incrementToken()}.
+ * <p/>
+ * You can find some example code for the new API in the analysis package level
+ * Javadoc.
+ * <p/>
+ * Sometimes it is desirable to capture a current state of a <code>TokenStream</code>,
+ * e.g., for buffering purposes (see {@link CachingTokenFilter},
+ * TeeSinkTokenFilter). For this usecase
+ * {@link AttributeSource#captureState} and {@link AttributeSource#restoreState}
+ * can be used.
+ * <p>The {@code TokenStream}-API in Lucene is based on the decorator pattern.
+ * Therefore all non-abstract subclasses must be final or have at least a final
+ * implementation of {@link #incrementToken}! This is checked when Java
+ * assertions are enabled.
+ */
+public abstract class TokenStream extends AttributeSource implements Closeable {
+
+  /**
+   * A TokenStream using the default attribute factory.
+   */
+  protected TokenStream() {
+    super();
+    assert assertFinal();
+  }
+
+  /**
+   * A TokenStream that uses the same attributes as the supplied one.
+   */
+  protected TokenStream(AttributeSource input) {
+    super(input);
+    assert assertFinal();
+  }
+
+  /**
+   * A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances.
+   */
+  protected TokenStream(AttributeFactory factory) {
+    super(factory);
+    assert assertFinal();
+  }
+
+  private boolean assertFinal() {
+    try {
+      final Class<?> clazz = getClass();
+      assert clazz.isAnonymousClass() ||
+          (clazz.getModifiers() & (Modifier.FINAL | Modifier.PRIVATE)) != 0 ||
+          Modifier.isFinal(clazz.getMethod("incrementToken").getModifiers()) :
+          "TokenStream implementation classes or at least their incrementToken() implementation must be final";
+      return true;
+    } catch (NoSuchMethodException nsme) {
+      return false;
+    }
+  }
+
+  /**
+   * Consumers use this method to advance the stream to
+   * the next token. Implementing classes must implement this method and update
+   * the appropriate {@link AttributeImpl}s with the attributes of the next
+   * token.
+   * <p/>
+   * The producer must make no assumptions about the attributes after the method
+   * has been returned: the caller may arbitrarily change it. If the producer
+   * needs to preserve the state for subsequent calls, it can use
+   * {@link #captureState} to create a copy of the current attribute state.
+   * <p/>
+   * This method is called for every token of a document, so an efficient
+   * implementation is crucial for good performance. To avoid calls to
+   * {@link #addAttribute(Class)} and {@link #getAttribute(Class)},
+   * references to all {@link AttributeImpl}s that this stream uses should be
+   * retrieved during instantiation.
+   * <p/>
+   * To ensure that filters and consumers know which attributes are available,
+   * the attributes must be added during instantiation. Filters and consumers
+   * are not required to check for availability of attributes in
+   * {@link #incrementToken()}.
+   *
+   * @return false for end of stream; true otherwise
+   */
+  public abstract boolean incrementToken() throws IOException;
+
+  /**
+   * This method is called by the consumer after the last token has been
+   * consumed, after {@link #incrementToken()} returned <code>false</code>
+   * (using the new <code>TokenStream</code> API). Streams implementing the old API
+   * should upgrade to use this feature.
+   * <p/>
+   * This method can be used to perform any end-of-stream operations, such as
+   * setting the final offset of a stream. The final offset of a stream might
+   * differ from the offset of the last token eg in case one or more whitespaces
+   * followed after the last token, but a WhitespaceTokenizer was used.
+   *
+   * @throws IOException
+   */
+  public void end() throws IOException {
+    // do nothing by default
+  }
+
+  /**
+   * Resets this stream to the beginning. This is an optional operation, so
+   * subclasses may or may not implement this method. {@link #reset()} is not needed for
+   * the standard indexing process. However, if the tokens of a
+   * <code>TokenStream</code> are intended to be consumed more than once, it is
+   * necessary to implement {@link #reset()}. Note that if your TokenStream
+   * caches tokens and feeds them back again after a reset, it is imperative
+   * that you clone the tokens when you store them away (on the first pass) as
+   * well as when you return them (on future passes after {@link #reset()}).
+   */
+  public void reset() throws IOException {
+  }
+
+  /**
+   * Releases resources associated with this stream.
+   */
+  public void close() throws IOException {
+  }
+
+}
Index: lucene/src/declarations/org/apache/lucene/analysis/package.html
===================================================================
--- lucene/src/declarations/org/apache/lucene/analysis/package.html	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/analysis/package.html	(revision 0)
@@ -0,0 +1,630 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+<p>API and code to convert text into indexable/searchable tokens.  Covers {@link org.apache.lucene.analysis.Analyzer} and related classes.</p>
+<h2>Parsing? Tokenization? Analysis!</h2>
+<p>
+Lucene, indexing and search library, accepts only plain text input.
+<p>
+<h2>Parsing</h2>
+<p>
+Applications that build their search capabilities upon Lucene may support documents in various formats &ndash; HTML, XML, PDF, Word &ndash; just to name a few.
+Lucene does not care about the <i>Parsing</i> of these and other document formats, and it is the responsibility of the 
+application using Lucene to use an appropriate <i>Parser</i> to convert the original format into plain text before passing that plain text to Lucene.
+<p>
+<h2>Tokenization</h2>
+<p>
+Plain text passed to Lucene for indexing goes through a process generally called tokenization. Tokenization is the process
+of breaking input text into small indexing elements &ndash; tokens.
+The way input text is broken into tokens heavily influences how people will then be able to search for that text. 
+For instance, sentences beginnings and endings can be identified to provide for more accurate phrase 
+and proximity searches (though sentence identification is not provided by Lucene).
+<p>
+In some cases simply breaking the input text into tokens is not enough &ndash; a deeper <i>Analysis</i> may be needed.
+There are many post tokenization steps that can be done, including (but not limited to):
+<ul>
+  <li><a href="http://en.wikipedia.org/wiki/Stemming">Stemming</a> &ndash; 
+      Replacing of words by their stems. 
+      For instance with English stemming "bikes" is replaced by "bike"; 
+      now query "bike" can find both documents containing "bike" and those containing "bikes".
+  </li>
+  <li><a href="http://en.wikipedia.org/wiki/Stop_words">Stop Words Filtering</a> &ndash; 
+      Common words like "the", "and" and "a" rarely add any value to a search.
+      Removing them shrinks the index size and increases performance.
+      It may also reduce some "noise" and actually improve search quality.
+  </li>
+  <li><a href="http://en.wikipedia.org/wiki/Text_normalization">Text Normalization</a> &ndash; 
+      Stripping accents and other character markings can make for better searching.
+  </li>
+  <li><a href="http://en.wikipedia.org/wiki/Synonym">Synonym Expansion</a> &ndash; 
+      Adding in synonyms at the same token position as the current word can mean better 
+      matching when users search with words in the synonym set.
+  </li>
+</ul> 
+<p>
+<h2>Core Analysis</h2>
+<p>
+  The analysis package provides the mechanism to convert Strings and Readers into tokens that can be indexed by Lucene.  There
+  are three main classes in the package from which all analysis processes are derived.  These are:
+  <ul>
+    <li>{@link org.apache.lucene.analysis.Analyzer} &ndash; An Analyzer is responsible for building a {@link org.apache.lucene.analysis.TokenStream} which can be consumed
+    by the indexing and searching processes.  See below for more information on implementing your own Analyzer.</li>
+    <li>{@link org.apache.lucene.analysis.Tokenizer} &ndash; A Tokenizer is a {@link org.apache.lucene.analysis.TokenStream} and is responsible for breaking
+    up incoming text into tokens. In most cases, an Analyzer will use a Tokenizer as the first step in
+    the analysis process.</li>
+    <li>{@link org.apache.lucene.analysis.TokenFilter} &ndash; A TokenFilter is also a {@link org.apache.lucene.analysis.TokenStream} and is responsible
+    for modifying tokens that have been created by the Tokenizer.  Common modifications performed by a
+    TokenFilter are: deletion, stemming, synonym injection, and down casing.  Not all Analyzers require TokenFilters</li>
+  </ul>
+  <b>Lucene 2.9 introduces a new TokenStream API. Please see the section "New TokenStream API" below for more details.</b>
+</p>
+<h2>Hints, Tips and Traps</h2>
+<p>
+   The synergy between {@link org.apache.lucene.analysis.Analyzer} and {@link org.apache.lucene.analysis.Tokenizer}
+   is sometimes confusing. To ease on this confusion, some clarifications:
+   <ul>
+      <li>The {@link org.apache.lucene.analysis.Analyzer} is responsible for the entire task of 
+          <u>creating</u> tokens out of the input text, while the {@link org.apache.lucene.analysis.Tokenizer}
+          is only responsible for <u>breaking</u> the input text into tokens. Very likely, tokens created 
+          by the {@link org.apache.lucene.analysis.Tokenizer} would be modified or even omitted 
+          by the {@link org.apache.lucene.analysis.Analyzer} (via one or more
+          {@link org.apache.lucene.analysis.TokenFilter}s) before being returned.
+       </li>
+       <li>{@link org.apache.lucene.analysis.Tokenizer} is a {@link org.apache.lucene.analysis.TokenStream}, 
+           but {@link org.apache.lucene.analysis.Analyzer} is not.
+       </li>
+       <li>{@link org.apache.lucene.analysis.Analyzer} is "field aware", but 
+           {@link org.apache.lucene.analysis.Tokenizer} is not.
+       </li>
+   </ul>
+</p>
+<p>
+  Lucene Java provides a number of analysis capabilities, the most commonly used one being the StandardAnalyzer.  
+  Many applications will have a long and industrious life with nothing more
+  than the StandardAnalyzer.  However, there are a few other classes/packages that are worth mentioning:
+  <ol>
+    <li>PerFieldAnalyzerWrapper &ndash; Most Analyzers perform the same operation on all
+      {@link org.apache.lucene.document.Field}s.  The PerFieldAnalyzerWrapper can be used to associate a different Analyzer with different
+      {@link org.apache.lucene.document.Field}s.</li>
+    <li>The modules/analysis library located at the root of the Lucene distribution has a number of different Analyzer implementations to solve a variety
+    of different problems related to searching.  Many of the Analyzers are designed to analyze non-English languages.</li>
+    <li>There are a variety of Tokenizer and TokenFilter implementations in this package.  Take a look around, chances are someone has implemented what you need.</li>
+  </ol>
+</p>
+<p>
+  Analysis is one of the main causes of performance degradation during indexing.  Simply put, the more you analyze the slower the indexing (in most cases).
+  Perhaps your application would be just fine using the simple WhitespaceTokenizer combined with a StopFilter. The contrib/benchmark library can be useful 
+  for testing out the speed of the analysis process.
+</p>
+<h2>Invoking the Analyzer</h2>
+<p>
+  Applications usually do not invoke analysis &ndash; Lucene does it for them:
+  <ul>
+    <li>At indexing, as a consequence of 
+        {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document) addDocument(doc)},
+        the Analyzer in effect for indexing is invoked for each indexed field of the added document.
+    </li>
+    <li>At search, as a consequence of
+        {@link org.apache.lucene.queryParser.QueryParser#parse(java.lang.String) QueryParser.parse(queryText)},
+        the QueryParser may invoke the Analyzer in effect.
+        Note that for some queries analysis does not take place, e.g. wildcard queries.
+    </li>
+  </ul>
+  However an application might invoke Analysis of any text for testing or for any other purpose, something like:
+  <PRE class="prettyprint">
+      Analyzer analyzer = new StandardAnalyzer(); // or any other analyzer
+      TokenStream ts = analyzer.tokenStream("myfield",new StringReader("some text goes here"));
+      while (ts.incrementToken()) {
+        System.out.println("token: "+ts));
+      }
+  </PRE>
+</p>
+<h2>Indexing Analysis vs. Search Analysis</h2>
+<p>
+  Selecting the "correct" analyzer is crucial
+  for search quality, and can also affect indexing and search performance.
+  The "correct" analyzer differs between applications.
+  Lucene java's wiki page 
+  <a href="http://wiki.apache.org/lucene-java/AnalysisParalysis">AnalysisParalysis</a> 
+  provides some data on "analyzing your analyzer".
+  Here are some rules of thumb:
+  <ol>
+    <li>Test test test... (did we say test?)</li>
+    <li>Beware of over analysis &ndash; might hurt indexing performance.</li>
+    <li>Start with same analyzer for indexing and search, otherwise searches would not find what they are supposed to...</li>
+    <li>In some cases a different analyzer is required for indexing and search, for instance:
+        <ul>
+           <li>Certain searches require more stop words to be filtered. (I.e. more than those that were filtered at indexing.)</li>
+           <li>Query expansion by synonyms, acronyms, auto spell correction, etc.</li>
+        </ul>
+        This might sometimes require a modified analyzer &ndash; see the next section on how to do that.
+    </li>
+  </ol>
+</p>
+<h2>Implementing your own Analyzer</h2>
+<p>Creating your own Analyzer is straightforward. It usually involves either wrapping an existing Tokenizer and  set of TokenFilters to create a new Analyzer
+or creating both the Analyzer and a Tokenizer or TokenFilter.  Before pursuing this approach, you may find it worthwhile
+to explore the modules/analysis library and/or ask on the java-user@lucene.apache.org mailing list first to see if what you need already exists.
+If you are still committed to creating your own Analyzer or TokenStream derivation (Tokenizer or TokenFilter) have a look at
+the source code of any one of the many samples located in this package.
+</p>
+<p>
+  The following sections discuss some aspects of implementing your own analyzer.
+</p>
+<h3>Field Section Boundaries</h3>
+<p>
+  When {@link org.apache.lucene.document.Document#add(org.apache.lucene.document.Fieldable) document.add(field)}
+  is called multiple times for the same field name, we could say that each such call creates a new 
+  section for that field in that document. 
+  In fact, a separate call to 
+  {@link org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader) tokenStream(field,reader)}
+  would take place for each of these so called "sections".
+  However, the default Analyzer behavior is to treat all these sections as one large section. 
+  This allows phrase search and proximity search to seamlessly cross 
+  boundaries between these "sections".
+  In other words, if a certain field "f" is added like this:
+  <PRE class="prettyprint">
+      document.add(new Field("f","first ends",...);
+      document.add(new Field("f","starts two",...);
+      indexWriter.addDocument(document);
+  </PRE>
+  Then, a phrase search for "ends starts" would find that document.
+  Where desired, this behavior can be modified by introducing a "position gap" between consecutive field "sections", 
+  simply by overriding 
+  {@link org.apache.lucene.analysis.Analyzer#getPositionIncrementGap(java.lang.String) Analyzer.getPositionIncrementGap(fieldName)}:
+  <PRE class="prettyprint">
+      Analyzer myAnalyzer = new StandardAnalyzer() {
+         public int getPositionIncrementGap(String fieldName) {
+           return 10;
+         }
+      };
+  </PRE>
+</p>
+<h3>Token Position Increments</h3>
+<p>
+   By default, all tokens created by Analyzers and Tokenizers have a 
+   {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute#getPositionIncrement() position increment} of one.
+   This means that the position stored for that token in the index would be one more than
+   that of the previous token.
+   Recall that phrase and proximity searches rely on position info.
+</p>
+<p>
+   If the selected analyzer filters the stop words "is" and "the", then for a document 
+   containing the string "blue is the sky", only the tokens "blue", "sky" are indexed, 
+   with position("sky") = 1 + position("blue"). Now, a phrase query "blue is the sky"
+   would find that document, because the same analyzer filters the same stop words from
+   that query. But also the phrase query "blue sky" would find that document.
+</p>
+<p>   
+   If this behavior does not fit the application needs,
+   a modified analyzer can be used, that would increment further the positions of
+   tokens following a removed stop word, using
+   {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute#setPositionIncrement(int)}.
+   This can be done with something like:
+   <PRE class="prettyprint">
+      public TokenStream tokenStream(final String fieldName, Reader reader) {
+        final TokenStream ts = someAnalyzer.tokenStream(fieldName, reader);
+        TokenStream res = new TokenStream() {
+          CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+          PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+        
+          public boolean incrementToken() throws IOException {
+            int extraIncrement = 0;
+            while (true) {
+              boolean hasNext = ts.incrementToken();
+              if (hasNext) {
+                if (stopWords.contains(termAtt.toString())) {
+                  extraIncrement++; // filter this word
+                  continue;
+                } 
+                if (extraIncrement>0) {
+                  posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+extraIncrement);
+                }
+              }
+              return hasNext;
+            }
+          }
+        };
+        return res;
+      }
+   </PRE>
+   Now, with this modified analyzer, the phrase query "blue sky" would find that document.
+   But note that this is yet not a perfect solution, because any phrase query "blue w1 w2 sky"
+   where both w1 and w2 are stop words would match that document.
+</p>
+<p>
+   Few more use cases for modifying position increments are:
+   <ol>
+     <li>Inhibiting phrase and proximity matches in sentence boundaries &ndash; for this, a tokenizer that 
+         identifies a new sentence can add 1 to the position increment of the first token of the new sentence.</li>
+     <li>Injecting synonyms &ndash; here, synonyms of a token should be added after that token, 
+         and their position increment should be set to 0.
+         As result, all synonyms of a token would be considered to appear in exactly the 
+         same position as that token, and so would they be seen by phrase and proximity searches.</li>
+   </ol>
+</p>
+<h2>New TokenStream API</h2>
+<p>
+	With Lucene 2.9 we introduce a new TokenStream API. The old API used to produce Tokens. A Token
+	has getter and setter methods for different properties like positionIncrement and termText.
+	While this approach was sufficient for the default indexing format, it is not versatile enough for
+	Flexible Indexing, a term which summarizes the effort of making the Lucene indexer pluggable and extensible for custom
+	index formats.
+</p>
+<p>
+A fully customizable indexer means that users will be able to store custom data structures on disk. Therefore an API
+is necessary that can transport custom types of data from the documents to the indexer.
+</p>
+<h3>Attribute and AttributeSource</h3> 
+Lucene 2.9 therefore introduces a new pair of classes called {@link org.apache.lucene.util.Attribute} and
+{@link org.apache.lucene.util.AttributeSource}. An Attribute serves as a
+particular piece of information about a text token. For example, {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}
+ contains the term text of a token, and {@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute} contains the start and end character offsets of a token.
+An AttributeSource is a collection of Attributes with a restriction: there may be only one instance of each attribute type. TokenStream now extends AttributeSource, which
+means that one can add Attributes to a TokenStream. Since TokenFilter extends TokenStream, all filters are also
+AttributeSources.
+<p>
+	Lucene now provides six Attributes out of the box, which replace the variables the Token class has:
+	<ul>
+	  <li>{@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}<p>The term text of a token.</p></li>
+  	  <li>{@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute}<p>The start and end offset of token in characters.</p></li>
+	  <li>{@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute}<p>See above for detailed information about position increment.</p></li>
+	  <li>{@link org.apache.lucene.analysis.tokenattributes.PayloadAttribute}<p>The payload that a Token can optionally have.</p></li>
+	  <li>{@link org.apache.lucene.analysis.tokenattributes.TypeAttribute}<p>The type of the token. Default is 'word'.</p></li>
+	  <li>{@link org.apache.lucene.analysis.tokenattributes.FlagsAttribute}<p>Optional flags a token can have.</p></li>
+	</ul>
+</p>
+<h3>Using the new TokenStream API</h3>
+There are a few important things to know in order to use the new API efficiently which are summarized here. You may want
+to walk through the example below first and come back to this section afterwards.
+<ol><li>
+Please keep in mind that an AttributeSource can only have one instance of a particular Attribute. Furthermore, if 
+a chain of a TokenStream and multiple TokenFilters is used, then all TokenFilters in that chain share the Attributes
+with the TokenStream.
+</li>
+<br>
+<li>
+Attribute instances are reused for all tokens of a document. Thus, a TokenStream/-Filter needs to update
+the appropriate Attribute(s) in incrementToken(). The consumer, commonly the Lucene indexer, consumes the data in the
+Attributes and then calls incrementToken() again until it returns false, which indicates that the end of the stream
+was reached. This means that in each call of incrementToken() a TokenStream/-Filter can safely overwrite the data in
+the Attribute instances.
+</li>
+<br>
+<li>
+For performance reasons a TokenStream/-Filter should add/get Attributes during instantiation; i.e., create an attribute in the
+constructor and store references to it in an instance variable.  Using an instance variable instead of calling addAttribute()/getAttribute() 
+in incrementToken() will avoid attribute lookups for every token in the document.
+</li>
+<br>
+<li>
+All methods in AttributeSource are idempotent, which means calling them multiple times always yields the same
+result. This is especially important to know for addAttribute(). The method takes the <b>type</b> (<code>Class</code>)
+of an Attribute as an argument and returns an <b>instance</b>. If an Attribute of the same type was previously added, then
+the already existing instance is returned, otherwise a new instance is created and returned. Therefore TokenStreams/-Filters
+can safely call addAttribute() with the same Attribute type multiple times. Even consumers of TokenStreams should
+normally call addAttribute() instead of getAttribute(), because it would not fail if the TokenStream does not have this
+Attribute (getAttribute() would throw an IllegalArgumentException, if the Attribute is missing). More advanced code
+could simply check with hasAttribute(), if a TokenStream has it, and may conditionally leave out processing for
+extra performance.
+</li></ol>
+<h3>Example</h3>
+In this example we will create a WhiteSpaceTokenizer and use a LengthFilter to suppress all words that only
+have two or less characters. The LengthFilter is part of the Lucene core and its implementation will be explained
+here to illustrate the usage of the new TokenStream API.<br>
+Then we will develop a custom Attribute, a PartOfSpeechAttribute, and add another filter to the chain which
+utilizes the new custom attribute, and call it PartOfSpeechTaggingFilter.
+<h4>Whitespace tokenization</h4>
+<pre class="prettyprint">
+public class MyAnalyzer extends Analyzer {
+
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream stream = new WhitespaceTokenizer(reader);
+    return stream;
+  }
+  
+  public static void main(String[] args) throws IOException {
+    // text to tokenize
+    final String text = "This is a demo of the new TokenStream API";
+    
+    MyAnalyzer analyzer = new MyAnalyzer();
+    TokenStream stream = analyzer.tokenStream("field", new StringReader(text));
+    
+    // get the CharTermAttribute from the TokenStream
+    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
+
+    stream.reset();
+    
+    // print all tokens until stream is exhausted
+    while (stream.incrementToken()) {
+      System.out.println(termAtt.toString());
+    }
+    
+    stream.end()
+    stream.close();
+  }
+}
+</pre>
+In this easy example a simple white space tokenization is performed. In main() a loop consumes the stream and
+prints the term text of the tokens by accessing the CharTermAttribute that the WhitespaceTokenizer provides. 
+Here is the output:
+<pre>
+This
+is
+a
+demo
+of
+the
+new
+TokenStream
+API
+</pre>
+<h4>Adding a LengthFilter</h4>
+We want to suppress all tokens that have 2 or less characters. We can do that easily by adding a LengthFilter 
+to the chain. Only the tokenStream() method in our analyzer needs to be changed:
+<pre class="prettyprint">
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream stream = new WhitespaceTokenizer(reader);
+    stream = new LengthFilter(stream, 3, Integer.MAX_VALUE);
+    return stream;
+  }
+</pre>
+Note how now only words with 3 or more characters are contained in the output:
+<pre>
+This
+demo
+the
+new
+TokenStream
+API
+</pre>
+Now let's take a look how the LengthFilter is implemented (it is part of Lucene's core):
+<pre class="prettyprint">
+public final class LengthFilter extends TokenFilter {
+
+  final int min;
+  final int max;
+  
+  private CharTermAttribute termAtt;
+
+  /**
+   * Build a filter that removes words that are too long or too
+   * short from the text.
+   */
+  public LengthFilter(TokenStream in, int min, int max)
+  {
+    super(in);
+    this.min = min;
+    this.max = max;
+    termAtt = addAttribute(CharTermAttribute.class);
+  }
+  
+  /**
+   * Returns the next input Token whose term() is the right len
+   */
+  public final boolean incrementToken() throws IOException
+  {
+    assert termAtt != null;
+    // return the first non-stop word found
+    while (input.incrementToken()) {
+      int len = termAtt.length();
+      if (len >= min && len <= max) {
+          return true;
+      }
+      // note: else we ignore it but should we index each part of it?
+    }
+    // reached EOS -- return null
+    return false;
+  }
+}
+</pre>
+The CharTermAttribute is added in the constructor and stored in the instance variable <code>termAtt</code>.
+Remember that there can only be a single instance of CharTermAttribute in the chain, so in our example the 
+<code>addAttribute()</code> call in LengthFilter returns the TermAttribute that the WhitespaceTokenizer already added. The tokens
+are retrieved from the input stream in the <code>incrementToken()</code> method. By looking at the term text
+in the CharTermAttribute the length of the term can be determined and too short or too long tokens are skipped. 
+Note how <code>incrementToken()</code> can efficiently access the instance variable; no attribute lookup
+is neccessary. The same is true for the consumer, which can simply use local references to the Attributes.
+
+<h4>Adding a custom Attribute</h4>
+Now we're going to implement our own custom Attribute for part-of-speech tagging and call it consequently 
+<code>PartOfSpeechAttribute</code>. First we need to define the interface of the new Attribute:
+<pre class="prettyprint">
+  public interface PartOfSpeechAttribute extends Attribute {
+    public static enum PartOfSpeech {
+      Noun, Verb, Adjective, Adverb, Pronoun, Preposition, Conjunction, Article, Unknown
+    }
+  
+    public void setPartOfSpeech(PartOfSpeech pos);
+  
+    public PartOfSpeech getPartOfSpeech();
+  }
+</pre>
+
+Now we also need to write the implementing class. The name of that class is important here: By default, Lucene
+checks if there is a class with the name of the Attribute with the postfix 'Impl'. In this example, we would
+consequently call the implementing class <code>PartOfSpeechAttributeImpl</code>. <br/>
+This should be the usual behavior. However, there is also an expert-API that allows changing these naming conventions:
+{@link org.apache.lucene.util.AttributeSource.AttributeFactory}. The factory accepts an Attribute interface as argument
+and returns an actual instance. You can implement your own factory if you need to change the default behavior. <br/><br/>
+
+Now here is the actual class that implements our new Attribute. Notice that the class has to extend
+{@link org.apache.lucene.util.AttributeImpl}:
+
+<pre class="prettyprint">
+public final class PartOfSpeechAttributeImpl extends AttributeImpl 
+                            implements PartOfSpeechAttribute{
+  
+  private PartOfSpeech pos = PartOfSpeech.Unknown;
+  
+  public void setPartOfSpeech(PartOfSpeech pos) {
+    this.pos = pos;
+  }
+  
+  public PartOfSpeech getPartOfSpeech() {
+    return pos;
+  }
+
+  public void clear() {
+    pos = PartOfSpeech.Unknown;
+  }
+
+  public void copyTo(AttributeImpl target) {
+    ((PartOfSpeechAttributeImpl) target).pos = pos;
+  }
+
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof PartOfSpeechAttributeImpl) {
+      return pos == ((PartOfSpeechAttributeImpl) other).pos;
+    }
+ 
+    return false;
+  }
+
+  public int hashCode() {
+    return pos.ordinal();
+  }
+}
+</pre>
+This is a simple Attribute implementation has only a single variable that stores the part-of-speech of a token. It extends the
+new <code>AttributeImpl</code> class and therefore implements its abstract methods <code>clear(), copyTo(), equals(), hashCode()</code>.
+Now we need a TokenFilter that can set this new PartOfSpeechAttribute for each token. In this example we show a very naive filter
+that tags every word with a leading upper-case letter as a 'Noun' and all other words as 'Unknown'.
+<pre class="prettyprint">
+  public static class PartOfSpeechTaggingFilter extends TokenFilter {
+    PartOfSpeechAttribute posAtt;
+    CharTermAttribute termAtt;
+    
+    protected PartOfSpeechTaggingFilter(TokenStream input) {
+      super(input);
+      posAtt = addAttribute(PartOfSpeechAttribute.class);
+      termAtt = addAttribute(CharTermAttribute.class);
+    }
+    
+    public boolean incrementToken() throws IOException {
+      if (!input.incrementToken()) {return false;}
+      posAtt.setPartOfSpeech(determinePOS(termAtt.buffer(), 0, termAtt.length()));
+      return true;
+    }
+    
+    // determine the part of speech for the given term
+    protected PartOfSpeech determinePOS(char[] term, int offset, int length) {
+      // naive implementation that tags every uppercased word as noun
+      if (length > 0 && Character.isUpperCase(term[0])) {
+        return PartOfSpeech.Noun;
+      }
+      return PartOfSpeech.Unknown;
+    }
+  }
+</pre>
+Just like the LengthFilter, this new filter accesses the attributes it needs in the constructor and
+stores references in instance variables. Notice how you only need to pass in the interface of the new
+Attribute and instantiating the correct class is automatically been taken care of.
+Now we need to add the filter to the chain:
+<pre class="prettyprint">
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream stream = new WhitespaceTokenizer(reader);
+    stream = new LengthFilter(stream, 3, Integer.MAX_VALUE);
+    stream = new PartOfSpeechTaggingFilter(stream);
+    return stream;
+  }
+</pre>
+Now let's look at the output:
+<pre>
+This
+demo
+the
+new
+TokenStream
+API
+</pre>
+Apparently it hasn't changed, which shows that adding a custom attribute to a TokenStream/Filter chain does not
+affect any existing consumers, simply because they don't know the new Attribute. Now let's change the consumer
+to make use of the new PartOfSpeechAttribute and print it out:
+<pre class="prettyprint">
+  public static void main(String[] args) throws IOException {
+    // text to tokenize
+    final String text = "This is a demo of the new TokenStream API";
+    
+    MyAnalyzer analyzer = new MyAnalyzer();
+    TokenStream stream = analyzer.tokenStream("field", new StringReader(text));
+    
+    // get the CharTermAttribute from the TokenStream
+    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
+    
+    // get the PartOfSpeechAttribute from the TokenStream
+    PartOfSpeechAttribute posAtt = stream.addAttribute(PartOfSpeechAttribute.class);
+    
+    stream.reset();
+
+    // print all tokens until stream is exhausted
+    while (stream.incrementToken()) {
+      System.out.println(termAtt.toString() + ": " + posAtt.getPartOfSpeech());
+    }
+    
+    stream.end();
+    stream.close();
+  }
+</pre>
+The change that was made is to get the PartOfSpeechAttribute from the TokenStream and print out its contents in
+the while loop that consumes the stream. Here is the new output:
+<pre>
+This: Noun
+demo: Unknown
+the: Unknown
+new: Unknown
+TokenStream: Noun
+API: Noun
+</pre>
+Each word is now followed by its assigned PartOfSpeech tag. Of course this is a naive 
+part-of-speech tagging. The word 'This' should not even be tagged as noun; it is only spelled capitalized because it
+is the first word of a sentence. Actually this is a good opportunity for an excerise. To practice the usage of the new
+API the reader could now write an Attribute and TokenFilter that can specify for each word if it was the first token
+of a sentence or not. Then the PartOfSpeechTaggingFilter can make use of this knowledge and only tag capitalized words
+as nouns if not the first word of a sentence (we know, this is still not a correct behavior, but hey, it's a good exercise). 
+As a small hint, this is how the new Attribute class could begin:
+<pre class="prettyprint">
+  public class FirstTokenOfSentenceAttributeImpl extends Attribute
+                   implements FirstTokenOfSentenceAttribute {
+    
+    private boolean firstToken;
+    
+    public void setFirstToken(boolean firstToken) {
+      this.firstToken = firstToken;
+    }
+    
+    public boolean getFirstToken() {
+      return firstToken;
+    }
+
+    public void clear() {
+      firstToken = false;
+    }
+
+  ...
+</pre>
+</body>
+</html>

Property changes on: lucene/src/declarations/org/apache/lucene/LucenePackage.java
___________________________________________________________________
Added: svn:eol-style
   + native


Property changes on: lucene/src/declarations/org/apache/lucene/index/Payload.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/declarations/org/apache/lucene/util/Attribute.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/util/Attribute.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/util/Attribute.java	(revision 0)
@@ -0,0 +1,24 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Base interface for attributes.
+ */
+public interface Attribute {
+}
Index: lucene/src/declarations/org/apache/lucene/util/AttributeReflector.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/util/AttributeReflector.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/util/AttributeReflector.java	(revision 0)
@@ -0,0 +1,34 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This interface is used to reflect contents of {@link AttributeSource} or {@link AttributeImpl}.
+ */
+public interface AttributeReflector {
+
+  /**
+   * This method gets called for every property in an {@link AttributeImpl}/{@link AttributeSource}
+   * passing the class name of the {@link Attribute}, a key and the actual value.
+   * E.g., an invocation of {@link org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl#reflectWith}
+   * would call this method once using {@code org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class}
+   * as attribute class, {@code "term"} as key and the actual value as a String.
+   */
+  public void reflect(Class<? extends Attribute> attClass, String key, Object value);
+  
+}

Property changes on: lucene/src/declarations/org/apache/lucene/util/IntsRef.java
___________________________________________________________________
Added: svn:eol-style
   + native


Property changes on: lucene/src/declarations/org/apache/lucene/util/SorterTemplate.java
___________________________________________________________________
Added: svn:keywords
   + Date Author Id Revision HeadURL
Added: svn:eol-style
   + native


Property changes on: lucene/src/declarations/org/apache/lucene/util/Constants.java
___________________________________________________________________
Added: cvs2svn:cvs-rev
   + 1.3
Added: svn:keywords
   + Author Date Id Revision
Added: svn:eol-style
   + native


Property changes on: lucene/src/declarations/org/apache/lucene/util/ArrayUtil.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/declarations/org/apache/lucene/util/NumericUtils.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/util/NumericUtils.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/util/NumericUtils.java	(working copy)
@@ -18,9 +18,6 @@
  */
 
 import org.apache.lucene.analysis.NumericTokenStream;
-import org.apache.lucene.document.NumericField;
-import org.apache.lucene.search.NumericRangeFilter;
-import org.apache.lucene.search.NumericRangeQuery; // for javadocs
 
 /**
  * This is a helper class to generate prefix-encoded representations for numerical values
@@ -48,7 +45,7 @@
  * <p>For easy usage, the trie algorithm is implemented for indexing inside
  * {@link NumericTokenStream} that can index <code>int</code>, <code>long</code>,
  * <code>float</code>, and <code>double</code>. For querying,
- * {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query part
+ * NumericRangeQuery and NumericRangeFilter implement the query part
  * for the same data types.
  *
  * <p>This class can also be used, to generate lexicographically sortable (according to
@@ -63,8 +60,8 @@
   private NumericUtils() {} // no instance!
   
   /**
-   * The default precision step used by {@link NumericField}, {@link NumericTokenStream},
-   * {@link NumericRangeQuery}, and {@link NumericRangeFilter} as default
+   * The default precision step used by NumericField, {@link NumericTokenStream},
+   * NumericRangeQuery, and NumericRangeFilter as default
    */
   public static final int PRECISION_STEP_DEFAULT = 4;
   
@@ -284,7 +281,7 @@
    * {@link org.apache.lucene.search.BooleanQuery} for each call to its
    * {@link LongRangeBuilder#addRange(BytesRef,BytesRef)}
    * method.
-   * <p>This method is used by {@link NumericRangeQuery}.
+   * <p>This method is used by NumericRangeQuery in Lucene core.
    */
   public static void splitLongRange(final LongRangeBuilder builder,
     final int precisionStep,  final long minBound, final long maxBound
@@ -298,7 +295,7 @@
    * {@link org.apache.lucene.search.BooleanQuery} for each call to its
    * {@link IntRangeBuilder#addRange(BytesRef,BytesRef)}
    * method.
-   * <p>This method is used by {@link NumericRangeQuery}.
+   * <p>This method is used by NumericRangeQuery in Lucene core.
    */
   public static void splitIntRange(final IntRangeBuilder builder,
     final int precisionStep,  final int minBound, final int maxBound

Property changes on: lucene/src/declarations/org/apache/lucene/util/NumericUtils.java
___________________________________________________________________
Added: svn:eol-style
   + native


Property changes on: lucene/src/declarations/org/apache/lucene/util/BytesRef.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/declarations/org/apache/lucene/util/AttributeSource.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/util/AttributeSource.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/util/AttributeSource.java	(revision 0)
@@ -0,0 +1,504 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.ref.WeakReference;
+import java.util.Collections;
+import java.util.NoSuchElementException;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.WeakHashMap;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.lucene.analysis.TokenStream; // for javadocs
+
+/**
+ * An AttributeSource contains a list of different {@link AttributeImpl}s,
+ * and methods to add and get them. There can only be a single instance
+ * of an attribute in the same AttributeSource instance. This is ensured
+ * by passing in the actual type of the Attribute (Class&lt;Attribute&gt;) to 
+ * the {@link #addAttribute(Class)}, which then checks if an instance of
+ * that type is already present. If yes, it returns the instance, otherwise
+ * it creates a new instance and returns it.
+ */
+public class AttributeSource {
+  /**
+   * An AttributeFactory creates instances of {@link AttributeImpl}s.
+   */
+  public static abstract class AttributeFactory {
+    /**
+     * returns an {@link AttributeImpl} for the supplied {@link Attribute} interface class.
+     */
+    public abstract AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass);
+    
+    /**
+     * This is the default factory that creates {@link AttributeImpl}s using the
+     * class name of the supplied {@link Attribute} interface class by appending <code>Impl</code> to it.
+     */
+    public static final AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory();
+    
+    private static final class DefaultAttributeFactory extends AttributeFactory {
+      private static final WeakHashMap<Class<? extends Attribute>, WeakReference<Class<? extends AttributeImpl>>> attClassImplMap =
+        new WeakHashMap<Class<? extends Attribute>, WeakReference<Class<? extends AttributeImpl>>>();
+      
+      private DefaultAttributeFactory() {}
+    
+      @Override
+      public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
+        try {
+          return getClassForInterface(attClass).newInstance();
+        } catch (InstantiationException e) {
+          throw new IllegalArgumentException("Could not instantiate implementing class for " + attClass.getName());
+        } catch (IllegalAccessException e) {
+          throw new IllegalArgumentException("Could not instantiate implementing class for " + attClass.getName());
+        }
+      }
+      
+      private static Class<? extends AttributeImpl> getClassForInterface(Class<? extends Attribute> attClass) {
+        synchronized(attClassImplMap) {
+          final WeakReference<Class<? extends AttributeImpl>> ref = attClassImplMap.get(attClass);
+          Class<? extends AttributeImpl> clazz = (ref == null) ? null : ref.get();
+          if (clazz == null) {
+            try {
+              attClassImplMap.put(attClass,
+                new WeakReference<Class<? extends AttributeImpl>>(
+                  clazz = Class.forName(attClass.getName() + "Impl", true, attClass.getClassLoader())
+                  .asSubclass(AttributeImpl.class)
+                )
+              );
+            } catch (ClassNotFoundException e) {
+              throw new IllegalArgumentException("Could not find implementing class for " + attClass.getName());
+            }
+          }
+          return clazz;
+        }
+      }
+    }
+  }
+      
+  /**
+   * This class holds the state of an AttributeSource.
+   * @see #captureState
+   * @see #restoreState
+   */
+  public static final class State implements Cloneable {
+    AttributeImpl attribute;
+    State next;
+    
+    @Override
+    public Object clone() {
+      State clone = new State();
+      clone.attribute = (AttributeImpl) attribute.clone();
+      
+      if (next != null) {
+        clone.next = (State) next.clone();
+      }
+      
+      return clone;
+    }
+  }
+    
+  // These two maps must always be in sync!!!
+  // So they are private, final and read-only from the outside (read-only iterators)
+  private final Map<Class<? extends Attribute>, AttributeImpl> attributes;
+  private final Map<Class<? extends AttributeImpl>, AttributeImpl> attributeImpls;
+  private final State[] currentState;
+
+  private AttributeFactory factory;
+  
+  /**
+   * An AttributeSource using the default attribute factory {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}.
+   */
+  public AttributeSource() {
+    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
+  }
+  
+  /**
+   * An AttributeSource that uses the same attributes as the supplied one.
+   */
+  public AttributeSource(AttributeSource input) {
+    if (input == null) {
+      throw new IllegalArgumentException("input AttributeSource must not be null");
+    }
+    this.attributes = input.attributes;
+    this.attributeImpls = input.attributeImpls;
+    this.currentState = input.currentState;
+    this.factory = input.factory;
+  }
+  
+  /**
+   * An AttributeSource using the supplied {@link AttributeFactory} for creating new {@link Attribute} instances.
+   */
+  public AttributeSource(AttributeFactory factory) {
+    this.attributes = new LinkedHashMap<Class<? extends Attribute>, AttributeImpl>();
+    this.attributeImpls = new LinkedHashMap<Class<? extends AttributeImpl>, AttributeImpl>();
+    this.currentState = new State[1];
+    this.factory = factory;
+  }
+  
+  /**
+   * returns the used AttributeFactory.
+   */
+  public final AttributeFactory getAttributeFactory() {
+    return this.factory;
+  }
+  
+  /** Returns a new iterator that iterates the attribute classes
+   * in the same order they were added in.
+   */
+  public final Iterator<Class<? extends Attribute>> getAttributeClassesIterator() {
+    return Collections.unmodifiableSet(attributes.keySet()).iterator();
+  }
+  
+  /** Returns a new iterator that iterates all unique Attribute implementations.
+   * This iterator may contain less entries that {@link #getAttributeClassesIterator},
+   * if one instance implements more than one Attribute interface.
+   */
+  public final Iterator<AttributeImpl> getAttributeImplsIterator() {
+    final State initState = getCurrentState();
+    if (initState != null) {
+      return new Iterator<AttributeImpl>() {
+        private State state = initState;
+      
+        public void remove() {
+          throw new UnsupportedOperationException();
+        }
+        
+        public AttributeImpl next() {
+          if (state == null)
+            throw new NoSuchElementException();
+          final AttributeImpl att = state.attribute;
+          state = state.next;
+          return att;
+        }
+        
+        public boolean hasNext() {
+          return state != null;
+        }
+      };
+    } else {
+      return Collections.<AttributeImpl>emptySet().iterator();
+    }
+  }
+  
+  /** a cache that stores all interfaces for known implementation classes for performance (slow reflection) */
+  private static final WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>> knownImplClasses =
+    new WeakHashMap<Class<? extends AttributeImpl>,LinkedList<WeakReference<Class<? extends Attribute>>>>();
+  
+  static LinkedList<WeakReference<Class<? extends Attribute>>> getAttributeInterfaces(final Class<? extends AttributeImpl> clazz) {
+    synchronized(knownImplClasses) {
+      LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces = knownImplClasses.get(clazz);
+      if (foundInterfaces == null) {
+        // we have a strong reference to the class instance holding all interfaces in the list (parameter "att"),
+        // so all WeakReferences are never evicted by GC
+        knownImplClasses.put(clazz, foundInterfaces = new LinkedList<WeakReference<Class<? extends Attribute>>>());
+        // find all interfaces that this attribute instance implements
+        // and that extend the Attribute interface
+        Class<?> actClazz = clazz;
+        do {
+          for (Class<?> curInterface : actClazz.getInterfaces()) {
+            if (curInterface != Attribute.class && Attribute.class.isAssignableFrom(curInterface)) {
+              foundInterfaces.add(new WeakReference<Class<? extends Attribute>>(curInterface.asSubclass(Attribute.class)));
+            }
+          }
+          actClazz = actClazz.getSuperclass();
+        } while (actClazz != null);
+      }
+      return foundInterfaces;
+    }
+  }
+  
+  /** <b>Expert:</b> Adds a custom AttributeImpl instance with one or more Attribute interfaces.
+   * <p><font color="red"><b>Please note:</b> It is not guaranteed, that <code>att</code> is added to
+   * the <code>AttributeSource</code>, because the provided attributes may already exist.
+   * You should always retrieve the wanted attributes using {@link #getAttribute} after adding
+   * with this method and cast to your class.
+   * The recommended way to use custom implementations is using an {@link AttributeFactory}.
+   * </font></p>
+   */
+  public final void addAttributeImpl(final AttributeImpl att) {
+    final Class<? extends AttributeImpl> clazz = att.getClass();
+    if (attributeImpls.containsKey(clazz)) return;
+    final LinkedList<WeakReference<Class<? extends Attribute>>> foundInterfaces =
+      getAttributeInterfaces(clazz);
+    
+    // add all interfaces of this AttributeImpl to the maps
+    for (WeakReference<Class<? extends Attribute>> curInterfaceRef : foundInterfaces) {
+      final Class<? extends Attribute> curInterface = curInterfaceRef.get();
+      assert (curInterface != null) :
+        "We have a strong reference on the class holding the interfaces, so they should never get evicted";
+      // Attribute is a superclass of this interface
+      if (!attributes.containsKey(curInterface)) {
+        // invalidate state to force recomputation in captureState()
+        this.currentState[0] = null;
+        attributes.put(curInterface, att);
+        attributeImpls.put(clazz, att);
+      }
+    }
+  }
+  
+  /**
+   * The caller must pass in a Class&lt;? extends Attribute&gt; value.
+   * This method first checks if an instance of that class is 
+   * already in this AttributeSource and returns it. Otherwise a
+   * new instance is created, added to this AttributeSource and returned. 
+   */
+  public final <A extends Attribute> A addAttribute(Class<A> attClass) {
+    AttributeImpl attImpl = attributes.get(attClass);
+    if (attImpl == null) {
+      if (!(attClass.isInterface() && Attribute.class.isAssignableFrom(attClass))) {
+        throw new IllegalArgumentException(
+          "addAttribute() only accepts an interface that extends Attribute, but " +
+          attClass.getName() + " does not fulfil this contract."
+        );
+      }
+      addAttributeImpl(attImpl = this.factory.createAttributeInstance(attClass));
+    }
+    return attClass.cast(attImpl);
+  }
+  
+  /** Returns true, iff this AttributeSource has any attributes */
+  public final boolean hasAttributes() {
+    return !this.attributes.isEmpty();
+  }
+
+  /**
+   * The caller must pass in a Class&lt;? extends Attribute&gt; value. 
+   * Returns true, iff this AttributeSource contains the passed-in Attribute.
+   */
+  public final boolean hasAttribute(Class<? extends Attribute> attClass) {
+    return this.attributes.containsKey(attClass);
+  }
+
+  /**
+   * The caller must pass in a Class&lt;? extends Attribute&gt; value. 
+   * Returns the instance of the passed in Attribute contained in this AttributeSource
+   * 
+   * @throws IllegalArgumentException if this AttributeSource does not contain the
+   *         Attribute. It is recommended to always use {@link #addAttribute} even in consumers
+   *         of TokenStreams, because you cannot know if a specific TokenStream really uses
+   *         a specific Attribute. {@link #addAttribute} will automatically make the attribute
+   *         available. If you want to only use the attribute, if it is available (to optimize
+   *         consuming), use {@link #hasAttribute}.
+   */
+  public final <A extends Attribute> A getAttribute(Class<A> attClass) {
+    AttributeImpl attImpl = attributes.get(attClass);
+    if (attImpl == null) {
+      throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass.getName() + "'.");
+    }
+    return attClass.cast(attImpl);
+  }
+    
+  private State getCurrentState() {
+    State s  = currentState[0];
+    if (s != null || !hasAttributes()) {
+      return s;
+    }
+    State c = s = currentState[0] = new State();
+    final Iterator<AttributeImpl> it = attributeImpls.values().iterator();
+    c.attribute = it.next();
+    while (it.hasNext()) {
+      c.next = new State();
+      c = c.next;
+      c.attribute = it.next();
+    }
+    return s;
+  }
+  
+  /**
+   * Resets all Attributes in this AttributeSource by calling
+   * {@link AttributeImpl#clear()} on each Attribute implementation.
+   */
+  public final void clearAttributes() {
+    for (State state = getCurrentState(); state != null; state = state.next) {
+      state.attribute.clear();
+    }
+  }
+  
+  /**
+   * Captures the state of all Attributes. The return value can be passed to
+   * {@link #restoreState} to restore the state of this or another AttributeSource.
+   */
+  public final State captureState() {
+    final State state = this.getCurrentState();
+    return (state == null) ? null : (State) state.clone();
+  }
+  
+  /**
+   * Restores this state by copying the values of all attribute implementations
+   * that this state contains into the attributes implementations of the targetStream.
+   * The targetStream must contain a corresponding instance for each argument
+   * contained in this state (e.g. it is not possible to restore the state of
+   * an AttributeSource containing a TermAttribute into a AttributeSource using
+   * a Token instance as implementation).
+   * <p>
+   * Note that this method does not affect attributes of the targetStream
+   * that are not contained in this state. In other words, if for example
+   * the targetStream contains an OffsetAttribute, but this state doesn't, then
+   * the value of the OffsetAttribute remains unchanged. It might be desirable to
+   * reset its value to the default, in which case the caller should first
+   * call {@link TokenStream#clearAttributes()} on the targetStream.   
+   */
+  public final void restoreState(State state) {
+    if (state == null)  return;
+    
+    do {
+      AttributeImpl targetImpl = attributeImpls.get(state.attribute.getClass());
+      if (targetImpl == null) {
+        throw new IllegalArgumentException("State contains AttributeImpl of type " +
+          state.attribute.getClass().getName() + " that is not in in this AttributeSource");
+      }
+      state.attribute.copyTo(targetImpl);
+      state = state.next;
+    } while (state != null);
+  }
+
+  @Override
+  public int hashCode() {
+    int code = 0;
+    for (State state = getCurrentState(); state != null; state = state.next) {
+      code = code * 31 + state.attribute.hashCode();
+    }
+    return code;
+  }
+  
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == this) {
+      return true;
+    }
+
+    if (obj instanceof AttributeSource) {
+      AttributeSource other = (AttributeSource) obj;  
+    
+      if (hasAttributes()) {
+        if (!other.hasAttributes()) {
+          return false;
+        }
+        
+        if (this.attributeImpls.size() != other.attributeImpls.size()) {
+          return false;
+        }
+  
+        // it is only equal if all attribute impls are the same in the same order
+        State thisState = this.getCurrentState();
+        State otherState = other.getCurrentState();
+        while (thisState != null && otherState != null) {
+          if (otherState.attribute.getClass() != thisState.attribute.getClass() || !otherState.attribute.equals(thisState.attribute)) {
+            return false;
+          }
+          thisState = thisState.next;
+          otherState = otherState.next;
+        }
+        return true;
+      } else {
+        return !other.hasAttributes();
+      }
+    } else
+      return false;
+  }
+  
+  /**
+   * This method returns the current attribute values as a string in the following format
+   * by calling the {@link #reflectWith(AttributeReflector)} method:
+   * 
+   * <ul>
+   * <li><em>iff {@code prependAttClass=true}:</em> {@code "AttributeClass#key=value,AttributeClass#key=value"}
+   * <li><em>iff {@code prependAttClass=false}:</em> {@code "key=value,key=value"}
+   * </ul>
+   *
+   * @see #reflectWith(AttributeReflector)
+   */
+  public final String reflectAsString(final boolean prependAttClass) {
+    final StringBuilder buffer = new StringBuilder();
+    reflectWith(new AttributeReflector() {
+      public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
+        if (buffer.length() > 0) {
+          buffer.append(',');
+        }
+        if (prependAttClass) {
+          buffer.append(attClass.getName()).append('#');
+        }
+        buffer.append(key).append('=').append((value == null) ? "null" : value);
+      }
+    });
+    return buffer.toString();
+  }
+  
+  /**
+   * This method is for introspection of attributes, it should simply
+   * add the key/values this AttributeSource holds to the given {@link AttributeReflector}.
+   *
+   * <p>This method iterates over all Attribute implementations and calls the
+   * corresponding {@link AttributeImpl#reflectWith} method.</p>
+   *
+   * @see AttributeImpl#reflectWith
+   */
+  public final void reflectWith(AttributeReflector reflector) {
+    for (State state = getCurrentState(); state != null; state = state.next) {
+      state.attribute.reflectWith(reflector);
+    }
+  }
+
+  /**
+   * Performs a clone of all {@link AttributeImpl} instances returned in a new
+   * {@code AttributeSource} instance. This method can be used to e.g. create another TokenStream
+   * with exactly the same attributes (using {@link #AttributeSource(AttributeSource)}).
+   * You can also use it as a (non-performant) replacement for {@link #captureState}, if you need to look
+   * into / modify the captured state.
+   */
+  public final AttributeSource cloneAttributes() {
+    final AttributeSource clone = new AttributeSource(this.factory);
+    
+    if (hasAttributes()) {
+      // first clone the impls
+      for (State state = getCurrentState(); state != null; state = state.next) {
+        clone.attributeImpls.put(state.attribute.getClass(), (AttributeImpl) state.attribute.clone());
+      }
+      
+      // now the interfaces
+      for (Entry<Class<? extends Attribute>, AttributeImpl> entry : this.attributes.entrySet()) {
+        clone.attributes.put(entry.getKey(), clone.attributeImpls.get(entry.getValue().getClass()));
+      }
+    }
+    
+    return clone;
+  }
+  
+  /**
+   * Copies the contents of this {@code AttributeSource} to the given target {@code AttributeSource}.
+   * The given instance has to provide all {@link Attribute}s this instance contains. 
+   * The actual attribute implementations must be identical in both {@code AttributeSource} instances;
+   * ideally both AttributeSource instances should use the same {@link AttributeFactory}.
+   * You can use this method as a replacement for {@link #restoreState}, if you use
+   * {@link #cloneAttributes} instead of {@link #captureState}.
+   */
+  public final void copyTo(AttributeSource target) {
+    for (State state = getCurrentState(); state != null; state = state.next) {
+      final AttributeImpl targetImpl = target.attributeImpls.get(state.attribute.getClass());
+      if (targetImpl == null) {
+        throw new IllegalArgumentException("This AttributeSource contains AttributeImpl of type " +
+          state.attribute.getClass().getName() + " that is not in the target");
+      }
+      state.attribute.copyTo(targetImpl);
+    }
+  }
+
+}

Property changes on: lucene/src/declarations/org/apache/lucene/util/UnicodeUtil.java
___________________________________________________________________
Added: svn:eol-style
   + native

Index: lucene/src/declarations/org/apache/lucene/util/AttributeImpl.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/util/AttributeImpl.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/util/AttributeImpl.java	(revision 0)
@@ -0,0 +1,135 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.lang.ref.WeakReference;
+import java.util.LinkedList;
+
+/**
+ * Base class for Attributes that can be added to a 
+ * {@link org.apache.lucene.util.AttributeSource}.
+ * <p>
+ * Attributes are used to add data in a dynamic, yet type-safe way to a source
+ * of usually streamed objects, e. g. a {@link org.apache.lucene.analysis.TokenStream}.
+ */
+public abstract class AttributeImpl implements Cloneable, Attribute {  
+  /**
+   * Clears the values in this AttributeImpl and resets it to its 
+   * default value. If this implementation implements more than one Attribute interface
+   * it clears all.
+   */
+  public abstract void clear();
+  
+  /**
+   * This method returns the current attribute values as a string in the following format
+   * by calling the {@link #reflectWith(AttributeReflector)} method:
+   * 
+   * <ul>
+   * <li><em>iff {@code prependAttClass=true}:</em> {@code "AttributeClass#key=value,AttributeClass#key=value"}
+   * <li><em>iff {@code prependAttClass=false}:</em> {@code "key=value,key=value"}
+   * </ul>
+   *
+   * @see #reflectWith(AttributeReflector)
+   */
+  public final String reflectAsString(final boolean prependAttClass) {
+    final StringBuilder buffer = new StringBuilder();
+    reflectWith(new AttributeReflector() {
+      public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
+        if (buffer.length() > 0) {
+          buffer.append(',');
+        }
+        if (prependAttClass) {
+          buffer.append(attClass.getName()).append('#');
+        }
+        buffer.append(key).append('=').append((value == null) ? "null" : value);
+      }
+    });
+    return buffer.toString();
+  }
+  
+  /**
+   * This method is for introspection of attributes, it should simply
+   * add the key/values this attribute holds to the given {@link AttributeReflector}.
+   *
+   * <p>The default implementation calls {@link AttributeReflector#reflect} for all
+   * non-static fields from the implementing class, using the field name as key
+   * and the field value as value. The Attribute class is also determined by reflection.
+   * Please note that the default implementation can only handle single-Attribute
+   * implementations.
+   *
+   * <p>Custom implementations look like this (e.g. for a combined attribute implementation):
+   * <pre>
+   *   public void reflectWith(AttributeReflector reflector) {
+   *     reflector.reflect(CharTermAttribute.class, "term", term());
+   *     reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", getPositionIncrement());
+   *   }
+   * </pre>
+   *
+   * <p>If you implement this method, make sure that for each invocation, the same set of {@link Attribute}
+   * interfaces and keys are passed to {@link AttributeReflector#reflect} in the same order, but possibly
+   * different values. So don't automatically exclude e.g. {@code null} properties!
+   *
+   * @see #reflectAsString(boolean)
+   */
+  public void reflectWith(AttributeReflector reflector) {
+    final Class<? extends AttributeImpl> clazz = this.getClass();
+    final LinkedList<WeakReference<Class<? extends Attribute>>> interfaces = AttributeSource.getAttributeInterfaces(clazz);
+    if (interfaces.size() != 1) {
+      throw new UnsupportedOperationException(clazz.getName() +
+        " implements more than one Attribute interface, the default reflectWith() implementation cannot handle this.");
+    }
+    final Class<? extends Attribute> interf = interfaces.getFirst().get();
+    final Field[] fields = clazz.getDeclaredFields();
+    try {
+      for (int i = 0; i < fields.length; i++) {
+        final Field f = fields[i];
+        if (Modifier.isStatic(f.getModifiers())) continue;
+        f.setAccessible(true);
+        reflector.reflect(interf, f.getName(), f.get(this));
+      }
+    } catch (IllegalAccessException e) {
+      // this should never happen, because we're just accessing fields
+      // from 'this'
+      throw new RuntimeException(e);
+    }
+  }
+  
+  /**
+   * Copies the values from this Attribute into the passed-in
+   * target attribute. The target implementation must support all the
+   * Attributes this implementation supports.
+   */
+  public abstract void copyTo(AttributeImpl target);
+    
+  /**
+   * Shallow clone. Subclasses must override this if they 
+   * need to clone any members deeply,
+   */
+  @Override
+  public Object clone() {
+    Object clone = null;
+    try {
+      clone = super.clone();
+    } catch (CloneNotSupportedException e) {
+      throw new RuntimeException(e);  // shouldn't happen
+    }
+    return clone;
+  }
+}

Property changes on: lucene/src/declarations/org/apache/lucene/util/MemoryModel.java
___________________________________________________________________
Added: svn:keywords
   + Date Author Id Revision HeadURL
Added: svn:eol-style
   + native

Index: lucene/src/declarations/org/apache/lucene/util/AlreadyClosedException.java
===================================================================
--- lucene/src/declarations/org/apache/lucene/util/AlreadyClosedException.java	(revision 0)
+++ lucene/src/declarations/org/apache/lucene/util/AlreadyClosedException.java	(working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.store;
+package org.apache.lucene.util;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more

Property changes on: lucene/src/declarations/org/apache/lucene/util/AlreadyClosedException.java
___________________________________________________________________
Added: svn:eol-style
   + native


Property changes on: lucene/src/declarations/org/apache/lucene/util/AverageGuessMemoryModel.java
___________________________________________________________________
Added: svn:keywords
   + Date Author Id Revision HeadURL
Added: svn:eol-style
   + native


Property changes on: lucene/src/declarations/org/apache/lucene/util/RamUsageEstimator.java
___________________________________________________________________
Added: svn:keywords
   + Date Author Id Revision HeadURL
Added: svn:eol-style
   + native


Property changes on: lucene/src/declarations/org/apache/lucene/util/CloseableThreadLocal.java
___________________________________________________________________
Added: svn:eol-style
   + native


Property changes on: lucene/src/declarations/org/apache/lucene/util/CharsRef.java
___________________________________________________________________
Added: svn:keywords
   + Date Author Id Revision HeadURL
Added: svn:eol-style
   + native

Index: lucene/build.xml
===================================================================
--- lucene/build.xml	(revision 1128767)
+++ lucene/build.xml	(working copy)
@@ -26,6 +26,7 @@
 
   <!-- Build classpath -->
   <path id="classpath">
+    <pathelement location="${build.dir}/classes/declarations"/>
     <pathelement location="${build.dir}/classes/java"/>
   </path>
 
@@ -42,6 +43,7 @@
     <pathelement location="${build.dir}/classes/test-framework"/>
     <pathelement location="${build.dir}/classes/test"/>
     <pathelement location="${build.dir}/classes/java"/>
+    <pathelement location="${build.dir}/classes/declarations"/>
     <pathelement path="${java.class.path}"/>
   </path>
 
Index: lucene/contrib/contrib-build.xml
===================================================================
--- lucene/contrib/contrib-build.xml	(revision 1128767)
+++ lucene/contrib/contrib-build.xml	(working copy)
@@ -31,6 +31,7 @@
   
   <!-- if you extend the classpath refid in one contrib's build.xml (add JARs), use this as basis: -->
   <path id="base.classpath">
+    <pathelement location="${common.dir}/build/classes/declarations"/>
    <pathelement location="${common.dir}/build/classes/java"/>
    <pathelement path="${project.classpath}"/>
   </path>
