Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (revision 1428419) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (working copy) @@ -110,8 +110,6 @@ // TODO: can we promote some of these to be only // offsets offenders? Collections.>addAll(brokenComponents, - // TODO: fix basetokenstreamtestcase not to trip because this one has no CharTermAtt - EmptyTokenizer.class, // doesn't actual reset itself! CachingTokenFilter.class, // doesn't consume whole stream! Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java (revision 1428419) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java (working copy) @@ -20,15 +20,55 @@ import java.io.IOException; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; -public class TestEmptyTokenStream extends LuceneTestCase { +public class TestEmptyTokenStream extends BaseTokenStreamTestCase { - public void test() throws IOException { + public void testConsume() throws IOException { TokenStream ts = new EmptyTokenStream(); + ts.reset(); assertFalse(ts.incrementToken()); + ts.end(); + ts.close(); + // try again with reuse: ts.reset(); assertFalse(ts.incrementToken()); + ts.end(); + ts.close(); } + + public void testConsume2() throws IOException { + BaseTokenStreamTestCase.assertTokenStreamContents(new EmptyTokenStream(), new String[0]); + } + public void testIndexWriter_LUCENE4656() throws IOException { + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( + TEST_VERSION_CURRENT, null)); + + TokenStream ts = new EmptyTokenStream(); + assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class)); + + Document doc = new Document(); + doc.add(new StringField("id", "0", Field.Store.YES)); + doc.add(new TextField("description", ts)); + + // this should not fail because we have no TermToBytesRefAttribute + writer.addDocument(doc); + + assertEquals(1, writer.numDocs()); + + writer.close(); + directory.close(); + } + } Index: lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (revision 1428419) +++ lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (working copy) @@ -106,75 +106,72 @@ OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class); - consumer.start(field); + if (hasMoreTokens) { + consumer.start(field); - for (;;) { + do { + // If we hit an exception in stream.next below + // (which is fairly common, eg if analyzer + // chokes on a given document), then it's + // non-aborting and (above) this one document + // will be marked as deleted, but still + // consume a docID - // If we hit an exception in stream.next below - // (which is fairly common, eg if analyzer - // chokes on a given document), then it's - // non-aborting and (above) this one document - // will be marked as deleted, but still - // consume a docID - - if (!hasMoreTokens) break; - - final int posIncr = posIncrAttribute.getPositionIncrement(); - if (posIncr < 0) { - throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ")"); - } - if (fieldState.position == 0 && posIncr == 0) { - throw new IllegalArgumentException("first position increment must be > 0 (got 0)"); - } - int position = fieldState.position + posIncr; - if (position > 0) { - // NOTE: confusing: this "mirrors" the - // position++ we do below - position--; - } else if (position < 0) { - throw new IllegalArgumentException("position overflow for field '" + field.name() + "'"); - } - - // position is legal, we can safely place it in fieldState now. - // not sure if anything will use fieldState after non-aborting exc... - fieldState.position = position; - - if (posIncr == 0) - fieldState.numOverlap++; - - if (checkOffsets) { - int startOffset = fieldState.offset + offsetAttribute.startOffset(); - int endOffset = fieldState.offset + offsetAttribute.endOffset(); - if (startOffset < 0 || endOffset < startOffset) { - throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " - + "startOffset=" + startOffset + ",endOffset=" + endOffset); + final int posIncr = posIncrAttribute.getPositionIncrement(); + if (posIncr < 0) { + throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ")"); } - if (startOffset < lastStartOffset) { - throw new IllegalArgumentException("offsets must not go backwards startOffset=" - + startOffset + " is < lastStartOffset=" + lastStartOffset); + if (fieldState.position == 0 && posIncr == 0) { + throw new IllegalArgumentException("first position increment must be > 0 (got 0)"); } - lastStartOffset = startOffset; - } + int position = fieldState.position + posIncr; + if (position > 0) { + // NOTE: confusing: this "mirrors" the + // position++ we do below + position--; + } else if (position < 0) { + throw new IllegalArgumentException("position overflow for field '" + field.name() + "'"); + } + + // position is legal, we can safely place it in fieldState now. + // not sure if anything will use fieldState after non-aborting exc... + fieldState.position = position; - boolean success = false; - try { - // If we hit an exception in here, we abort - // all buffered documents since the last - // flush, on the likelihood that the - // internal state of the consumer is now - // corrupt and should not be flushed to a - // new segment: - consumer.add(); - success = true; - } finally { - if (!success) { - docState.docWriter.setAborting(); + if (posIncr == 0) + fieldState.numOverlap++; + + if (checkOffsets) { + int startOffset = fieldState.offset + offsetAttribute.startOffset(); + int endOffset = fieldState.offset + offsetAttribute.endOffset(); + if (startOffset < 0 || endOffset < startOffset) { + throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " + + "startOffset=" + startOffset + ",endOffset=" + endOffset); + } + if (startOffset < lastStartOffset) { + throw new IllegalArgumentException("offsets must not go backwards startOffset=" + + startOffset + " is < lastStartOffset=" + lastStartOffset); + } + lastStartOffset = startOffset; } - } - fieldState.length++; - fieldState.position++; - hasMoreTokens = stream.incrementToken(); + boolean success = false; + try { + // If we hit an exception in here, we abort + // all buffered documents since the last + // flush, on the likelihood that the + // internal state of the consumer is now + // corrupt and should not be flushed to a + // new segment: + consumer.add(); + success = true; + } finally { + if (!success) { + docState.docWriter.setAborting(); + } + } + fieldState.length++; + fieldState.position++; + } while (stream.incrementToken()); } // trigger streams to perform end-of-stream operations stream.end(); Index: lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (revision 1428419) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (working copy) @@ -116,8 +116,11 @@ assertNotNull(output); CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class); - assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); - CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); + CharTermAttribute termAtt = null; + if (output.length > 0) { + assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); + termAtt = ts.getAttribute(CharTermAttribute.class); + } OffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { @@ -615,8 +618,7 @@ int remainder = random.nextInt(10); Reader reader = new StringReader(text); TokenStream ts = a.tokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader); - assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); - CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); + CharTermAttribute termAtt = ts.hasAttribute(CharTermAttribute.class) ? ts.getAttribute(CharTermAttribute.class) : null; OffsetAttribute offsetAtt = ts.hasAttribute(OffsetAttribute.class) ? ts.getAttribute(OffsetAttribute.class) : null; PositionIncrementAttribute posIncAtt = ts.hasAttribute(PositionIncrementAttribute.class) ? ts.getAttribute(PositionIncrementAttribute.class) : null; PositionLengthAttribute posLengthAtt = ts.hasAttribute(PositionLengthAttribute.class) ? ts.getAttribute(PositionLengthAttribute.class) : null; @@ -631,6 +633,7 @@ // First pass: save away "correct" tokens while (ts.incrementToken()) { + assertNotNull("has no CharTermAttribute", termAtt); tokens.add(termAtt.toString()); if (typeAtt != null) types.add(typeAtt.type()); if (posIncAtt != null) positions.add(posIncAtt.getPositionIncrement()); Index: lucene/test-framework/src/java/org/apache/lucene/analysis/EmptyTokenizer.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/EmptyTokenizer.java (revision 1428419) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/EmptyTokenizer.java (working copy) @@ -17,19 +17,44 @@ * limitations under the License. */ +import java.io.IOException; import java.io.Reader; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; + /** * Emits no tokens */ public final class EmptyTokenizer extends Tokenizer { + int endOffset; + final OffsetAttribute offsetAttribute; + public EmptyTokenizer(Reader input) { super(input); + endOffset = 0; + offsetAttribute = addAttribute(OffsetAttribute.class); } @Override public boolean incrementToken() { return false; } + + @Override + public void end() throws IOException { + while (input.read() != -1) { + endOffset += 1L + input.skip(Long.MAX_VALUE); + } + final int endOffset = correctOffset(this.endOffset); + offsetAttribute.setOffset(endOffset, endOffset); + } + + @Override + public void reset() throws IOException { + super.reset(); + endOffset = 0; + } + }