Index: CHANGES.txt
===================================================================
--- CHANGES.txt	(revision 781075)
+++ CHANGES.txt	(working copy)
@@ -360,7 +360,8 @@
 
 26. LUCENE-1550: Added new n-gram based String distance measure for spell checking.
     See the Javadocs for NGramDistance.java for a reference paper on why this is helpful (Tom Morton via Grant Ingersoll)
-        
+
+27. LUCENE-1676: Added DelimitedPayloadTokenFilter class for automatically adding payloads "in-stream" (Grant Ingersoll)        
     
 Optimizations
 
Index: contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
===================================================================
--- contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java	(revision 0)
+++ contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java	(revision 0)
@@ -0,0 +1,139 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.LuceneTestCase;
+
+import java.io.StringReader;
+
+
+/**
+ *
+ *
+ **/
+public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
+
+  public void testPayloads() throws Exception {
+    String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
+    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+    TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
+    PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
+    assertTermEquals("The", filter, termAtt, payAtt, null);
+    assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
+    assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
+    assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
+    assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8"));
+    assertTermEquals("over", filter, termAtt, payAtt, null);
+    assertTermEquals("the", filter, termAtt, payAtt, null);
+    assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
+    assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
+    assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
+    assertFalse(filter.incrementToken());
+  }
+
+  public void testNext() throws Exception {
+
+    String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
+    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+    assertTermEquals("The", filter, null);
+    assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
+    assertTermEquals("red", filter, "JJ".getBytes("UTF-8"));
+    assertTermEquals("fox", filter, "NN".getBytes("UTF-8"));
+    assertTermEquals("jumped", filter, "VB".getBytes("UTF-8"));
+    assertTermEquals("over", filter, null);
+    assertTermEquals("the", filter, null);
+    assertTermEquals("lazy", filter, "JJ".getBytes("UTF-8"));
+    assertTermEquals("brown", filter, "JJ".getBytes("UTF-8"));
+    assertTermEquals("dogs", filter, "NN".getBytes("UTF-8"));
+    assertFalse(filter.incrementToken());
+  }
+
+
+  public void testFloatEncoding() throws Exception {
+    String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
+    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new FloatEncoder());
+    TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
+    PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
+    assertTermEquals("The", filter, termAtt, payAtt, null);
+    assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
+    assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f));
+    assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f));
+    assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f));
+    assertTermEquals("over", filter, termAtt, payAtt, null);
+    assertTermEquals("the", filter, termAtt, payAtt, null);
+    assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f));
+    assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f));
+    assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f));
+    assertFalse(filter.incrementToken());
+  }
+
+  public void testIntEncoding() throws Exception {
+    String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
+    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new IntegerEncoder());
+    TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
+    PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
+    assertTermEquals("The", filter, termAtt, payAtt, null);
+    assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
+    assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2));
+    assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3));
+    assertTermEquals("jumped", filter, termAtt, payAtt, null);
+    assertTermEquals("over", filter, termAtt, payAtt, null);
+    assertTermEquals("the", filter, termAtt, payAtt, null);
+    assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5));
+    assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99));
+    assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83));
+    assertFalse(filter.incrementToken());
+  }
+
+  void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
+    Token tok = new Token();
+    assertTrue(stream.next(tok) != null);
+    assertEquals(expected, tok.term());
+    Payload payload = tok.getPayload();
+    if (payload != null) {
+      assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
+      for (int i = 0; i < expectPay.length; i++) {
+        assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i));
+
+      }
+    } else {
+      assertTrue("expectPay is not null and it should be", expectPay == null);
+    }
+  }
+
+
+  void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
+    assertTrue(stream.incrementToken());
+    assertEquals(expected, termAtt.term());
+    Payload payload = payAtt.getPayload();
+    if (payload != null) {
+      assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
+      for (int i = 0; i < expectPay.length; i++) {
+        assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i));
+
+      }
+    } else {
+      assertTrue("expectPay is not null and it should be", expectPay == null);
+    }
+  }
+}
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java	(revision 0)
@@ -0,0 +1,109 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+import java.io.IOException;
+
+
+/**
+ * Characters before the delimiter are the "token", those after are the payload.
+ * <p/>
+ * For example, if the delimiter is '|', then for the string "foo|bar", foo is the token
+ * and "bar" is a payload.
+ * <p/>
+ * Note, you can also include a {@link org.apache.lucene.analysis.payloads.PayloadEncoder} to convert the payload in an appropriate way (from characters to bytes).
+ * <p/>
+ * Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
+ *
+ * @see PayloadEncoder
+ */
+public class DelimitedPayloadTokenFilter extends TokenFilter {
+  public static final char DEFAULT_DELIMITER = '|';
+  protected char delimiter = DEFAULT_DELIMITER;
+  protected TermAttribute termAtt;
+  protected PayloadAttribute payAtt;
+  protected PayloadEncoder encoder;
+
+  /**
+   * Construct a token stream filtering the given input.
+   */
+  protected DelimitedPayloadTokenFilter(TokenStream input) {
+    this(input, DEFAULT_DELIMITER, new IdentityEncoder());
+  }
+
+
+  public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) {
+    super(input);
+    termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+    payAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
+    this.delimiter = delimiter;
+    this.encoder = encoder;
+  }
+
+  public boolean incrementToken() throws IOException {
+    boolean result = false;
+    if (input.incrementToken()) {
+      final char[] buffer = termAtt.termBuffer();
+      final int length = termAtt.termLength();
+      //look for the delimiter
+      boolean seen = false;
+      for (int i = 0; i < length; i++) {
+        if (buffer[i] == delimiter) {
+          termAtt.setTermBuffer(buffer, 0, i);
+          payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
+          seen = true;
+          break;//at this point, we know the whole piece, so we can exit.  If we don't see the delimiter, then the termAtt is the same
+        }
+      }
+      if (seen == false) {
+        //no delimiter
+        payAtt.setPayload(null);
+      }
+      result = true;
+    }
+    return result;
+  }
+
+  
+  public Token next(Token reusableToken) throws IOException {
+    Token result = input.next(reusableToken);
+    if (result != null) {
+      final char[] buffer = result.termBuffer();
+      final int length = result.termLength();
+      boolean seen = false;
+      for (int i = 0; i < length; i++) {
+        if (buffer[i] == delimiter) {
+          result.setTermBuffer(buffer, 0, i);
+          result.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
+          seen = true;
+          break;//at this point, we know the whole piece, so we can exit.  If we don't see the delimiter, then the termAtt is the same
+        }
+      }
+      if (seen == false) {
+        //no delimiter
+        payAtt.setPayload(null);
+      }
+    }
+    return result;
+  }
+}
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java	(revision 0)
@@ -0,0 +1,14 @@
+package org.apache.lucene.analysis.payloads;
+
+import org.apache.lucene.index.Payload;
+
+
+/**
+ *
+ *
+ **/
+public abstract class AbstractEncoder implements PayloadEncoder{
+  public Payload encode(char[] buffer) {
+    return encode(buffer, 0, buffer.length);
+  }
+}
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java	(revision 0)
@@ -0,0 +1,35 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+
+
+/**
+ *
+ *
+ **/
+public class FloatEncoder extends AbstractEncoder implements PayloadEncoder {
+
+  public Payload encode(char[] buffer, int offset, int length) {
+    Payload result = new Payload();
+    float payload = Float.parseFloat(new String(buffer, offset, length));//TODO: improve this so that we don't have to new Strings
+    byte[] bytes = PayloadHelper.encodeFloat(payload);
+    result.setData(bytes);
+    return result;
+  }
+}
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java	(revision 0)
@@ -0,0 +1,57 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.io.UnsupportedEncodingException;
+
+
+/**
+ *  Does nothing other than convert the char array to a byte array using the specified encoding.
+ *
+ **/
+public class IdentityEncoder extends AbstractEncoder implements PayloadEncoder{
+
+  protected Charset charset = Charset.forName("UTF-8");
+  protected String charsetName = "UTF-8";  //argh, stupid 1.4
+
+  public IdentityEncoder() {
+  }
+
+  public IdentityEncoder(Charset charset) {
+    this.charset = charset;
+    charsetName = charset.name();
+  }
+
+
+  public Payload encode(char[] buffer, int offset, int length) {
+    //what's the most efficient way to get a byte [] from a char[] array
+    //Do we have to go through String?
+    String tmp = new String(buffer, offset, length);
+    Payload result = null;//Can we avoid allocating by knowing where using the new API?
+    try {
+      result = new Payload(tmp.getBytes(charsetName));
+    } catch (UnsupportedEncodingException e) {
+      //should never hit this, since we get the name from the Charset
+    }
+
+    return result;
+  }
+}
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java	(revision 0)
@@ -0,0 +1,38 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+
+
+/**
+ * Mainly for use with the DelimitedPayloadTokenFilter, converts char buffers to Payload
+ *
+ **/
+public interface PayloadEncoder {
+
+  Payload encode(char[] buffer);
+
+  /**
+   * Convert a char array to a {@link org.apache.lucene.index.Payload}
+   * @param buffer
+   * @param offset
+   * @param length
+   * @return
+   */
+  Payload encode(char [] buffer, int offset, int length);
+}
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java	(revision 781075)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java	(working copy)
@@ -31,6 +31,10 @@
     return encodeInt(Float.floatToIntBits(payload), data, offset);
   }
 
+  public static byte[] encodeInt(int payload){
+    return encodeInt(payload, new byte[4], 0);
+  }
+
   public static byte[] encodeInt(int payload, byte[] data, int offset){
     data[offset] = (byte)(payload >> 24);
     data[offset + 1] = (byte)(payload >> 16);
Index: contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java
===================================================================
--- contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java	(revision 0)
+++ contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java	(revision 0)
@@ -0,0 +1,35 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+
+
+/**
+ *
+ *
+ **/
+public class IntegerEncoder extends AbstractEncoder implements PayloadEncoder {
+
+  public Payload encode(char[] buffer, int offset, int length) {
+    Payload result = new Payload();
+    int payload = Integer.parseInt(new String(buffer, offset, length));//TODO: improve this so that we don't have to new Strings
+    byte[] bytes = PayloadHelper.encodeInt(payload);
+    result.setData(bytes);
+    return result;
+  }
+}
\ No newline at end of file
