Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java (revision 1428534) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java (working copy) @@ -17,19 +17,16 @@ * limitations under the License. */ -import java.lang.reflect.Modifier; import java.io.Reader; import java.io.StringReader; -import java.util.ArrayList; +import java.lang.reflect.Modifier; import java.util.Collections; import java.util.IdentityHashMap; import java.util.List; -import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.CharFilter; -import org.apache.lucene.analysis.EmptyTokenizer; import org.apache.lucene.analysis.MockCharFilter; import org.apache.lucene.analysis.MockFixedLengthPayloadFilter; import org.apache.lucene.analysis.MockGraphTokenFilter; @@ -39,10 +36,9 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockVariableLengthPayloadFilter; import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ValidatingTokenFilter; -import org.apache.lucene.analysis.core.TestRandomChains; import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer; import org.apache.lucene.analysis.sinks.TeeSinkTokenFilter; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -72,7 +68,6 @@ MockRandomLookaheadTokenFilter.class, MockTokenFilter.class, MockVariableLengthPayloadFilter.class, - EmptyTokenizer.class, ValidatingTokenFilter.class ); } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (revision 1428534) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (working copy) @@ -46,7 +46,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.CharFilter; -import org.apache.lucene.analysis.EmptyTokenizer; import org.apache.lucene.analysis.MockGraphTokenFilter; import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter; import org.apache.lucene.analysis.MockTokenFilter; @@ -110,8 +109,6 @@ // TODO: can we promote some of these to be only // offsets offenders? Collections.>addAll(brokenComponents, - // TODO: fix basetokenstreamtestcase not to trip because this one has no CharTermAtt - EmptyTokenizer.class, // doesn't actual reset itself! CachingTokenFilter.class, // doesn't consume whole stream! Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java (revision 1428534) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java (working copy) @@ -20,15 +20,55 @@ import java.io.IOException; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; -public class TestEmptyTokenStream extends LuceneTestCase { +public class TestEmptyTokenStream extends BaseTokenStreamTestCase { - public void test() throws IOException { + public void testConsume() throws IOException { TokenStream ts = new EmptyTokenStream(); + ts.reset(); assertFalse(ts.incrementToken()); + ts.end(); + ts.close(); + // try again with reuse: ts.reset(); assertFalse(ts.incrementToken()); + ts.end(); + ts.close(); } + + public void testConsume2() throws IOException { + BaseTokenStreamTestCase.assertTokenStreamContents(new EmptyTokenStream(), new String[0]); + } + public void testIndexWriter_LUCENE4656() throws IOException { + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( + TEST_VERSION_CURRENT, null)); + + TokenStream ts = new EmptyTokenStream(); + assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class)); + + Document doc = new Document(); + doc.add(new StringField("id", "0", Field.Store.YES)); + doc.add(new TextField("description", ts)); + + // this should not fail because we have no TermToBytesRefAttribute + writer.addDocument(doc); + + assertEquals(1, writer.numDocs()); + + writer.close(); + directory.close(); + } + } Index: lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (revision 1428534) +++ lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (working copy) @@ -106,75 +106,72 @@ OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class); - consumer.start(field); + if (hasMoreTokens) { + consumer.start(field); - for (;;) { + do { + // If we hit an exception in stream.next below + // (which is fairly common, eg if analyzer + // chokes on a given document), then it's + // non-aborting and (above) this one document + // will be marked as deleted, but still + // consume a docID - // If we hit an exception in stream.next below - // (which is fairly common, eg if analyzer - // chokes on a given document), then it's - // non-aborting and (above) this one document - // will be marked as deleted, but still - // consume a docID - - if (!hasMoreTokens) break; - - final int posIncr = posIncrAttribute.getPositionIncrement(); - if (posIncr < 0) { - throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ")"); - } - if (fieldState.position == 0 && posIncr == 0) { - throw new IllegalArgumentException("first position increment must be > 0 (got 0)"); - } - int position = fieldState.position + posIncr; - if (position > 0) { - // NOTE: confusing: this "mirrors" the - // position++ we do below - position--; - } else if (position < 0) { - throw new IllegalArgumentException("position overflow for field '" + field.name() + "'"); - } - - // position is legal, we can safely place it in fieldState now. - // not sure if anything will use fieldState after non-aborting exc... - fieldState.position = position; - - if (posIncr == 0) - fieldState.numOverlap++; - - if (checkOffsets) { - int startOffset = fieldState.offset + offsetAttribute.startOffset(); - int endOffset = fieldState.offset + offsetAttribute.endOffset(); - if (startOffset < 0 || endOffset < startOffset) { - throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " - + "startOffset=" + startOffset + ",endOffset=" + endOffset); + final int posIncr = posIncrAttribute.getPositionIncrement(); + if (posIncr < 0) { + throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ")"); } - if (startOffset < lastStartOffset) { - throw new IllegalArgumentException("offsets must not go backwards startOffset=" - + startOffset + " is < lastStartOffset=" + lastStartOffset); + if (fieldState.position == 0 && posIncr == 0) { + throw new IllegalArgumentException("first position increment must be > 0 (got 0)"); } - lastStartOffset = startOffset; - } + int position = fieldState.position + posIncr; + if (position > 0) { + // NOTE: confusing: this "mirrors" the + // position++ we do below + position--; + } else if (position < 0) { + throw new IllegalArgumentException("position overflow for field '" + field.name() + "'"); + } + + // position is legal, we can safely place it in fieldState now. + // not sure if anything will use fieldState after non-aborting exc... + fieldState.position = position; - boolean success = false; - try { - // If we hit an exception in here, we abort - // all buffered documents since the last - // flush, on the likelihood that the - // internal state of the consumer is now - // corrupt and should not be flushed to a - // new segment: - consumer.add(); - success = true; - } finally { - if (!success) { - docState.docWriter.setAborting(); + if (posIncr == 0) + fieldState.numOverlap++; + + if (checkOffsets) { + int startOffset = fieldState.offset + offsetAttribute.startOffset(); + int endOffset = fieldState.offset + offsetAttribute.endOffset(); + if (startOffset < 0 || endOffset < startOffset) { + throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " + + "startOffset=" + startOffset + ",endOffset=" + endOffset); + } + if (startOffset < lastStartOffset) { + throw new IllegalArgumentException("offsets must not go backwards startOffset=" + + startOffset + " is < lastStartOffset=" + lastStartOffset); + } + lastStartOffset = startOffset; } - } - fieldState.length++; - fieldState.position++; - hasMoreTokens = stream.incrementToken(); + boolean success = false; + try { + // If we hit an exception in here, we abort + // all buffered documents since the last + // flush, on the likelihood that the + // internal state of the consumer is now + // corrupt and should not be flushed to a + // new segment: + consumer.add(); + success = true; + } finally { + if (!success) { + docState.docWriter.setAborting(); + } + } + fieldState.length++; + fieldState.position++; + } while (stream.incrementToken()); } // trigger streams to perform end-of-stream operations stream.end(); Index: lucene/core/src/test/org/apache/lucene/document/TestDocument.java =================================================================== --- lucene/core/src/test/org/apache/lucene/document/TestDocument.java (revision 1428534) +++ lucene/core/src/test/org/apache/lucene/document/TestDocument.java (working copy) @@ -20,13 +20,9 @@ import java.io.StringReader; import java.util.List; -import org.apache.lucene.analysis.EmptyTokenizer; -import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StoredDocument; @@ -318,7 +314,7 @@ // LUCENE-3616 public void testInvalidFields() { try { - new Field("foo", new EmptyTokenizer(new StringReader("")), StringField.TYPE_STORED); + new Field("foo", new MockTokenizer(new StringReader("")), StringField.TYPE_STORED); fail("did not hit expected exc"); } catch (IllegalArgumentException iae) { // expected Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java (revision 1428534) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.Reader; +import java.io.StringReader; import java.util.HashMap; import java.util.Map; @@ -33,6 +34,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; /** @@ -300,7 +302,7 @@ } /** - * Return empty tokens for field "f1". + * Return no tokens for field "f1". */ private static class AnalyzerReturningNull extends Analyzer { MockAnalyzer stdAnalyzer = new MockAnalyzer(random()); @@ -310,13 +312,21 @@ } @Override - public TokenStreamComponents createComponents(String fieldName, Reader reader) { + protected Reader initReader(String fieldName, Reader reader) { if ("f1".equals(fieldName)) { - return new TokenStreamComponents(new EmptyTokenizer(reader)); + // we don't use the reader, so close it: + IOUtils.closeWhileHandlingException(reader); + // return empty reader, so MockTokenizer returns no tokens: + return new StringReader(""); } else { - return stdAnalyzer.createComponents(fieldName, reader); + return super.initReader(fieldName, reader); } } + + @Override + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return stdAnalyzer.createComponents(fieldName, reader); + } } } Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java (revision 1428534) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.Reader; +import java.io.StringReader; import java.util.HashMap; import java.util.Map; @@ -35,6 +36,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; /** @@ -339,9 +341,9 @@ } /** - * Return empty tokens for field "f1". + * Return no tokens for field "f1". */ - private static final class AnalyzerReturningNull extends Analyzer { + private static class AnalyzerReturningNull extends Analyzer { MockAnalyzer stdAnalyzer = new MockAnalyzer(random()); public AnalyzerReturningNull() { @@ -349,13 +351,21 @@ } @Override - public TokenStreamComponents createComponents(String fieldName, Reader reader) { + protected Reader initReader(String fieldName, Reader reader) { if ("f1".equals(fieldName)) { - return new TokenStreamComponents(new EmptyTokenizer(reader)); + // we don't use the reader, so close it: + IOUtils.closeWhileHandlingException(reader); + // return empty reader, so MockTokenizer returns no tokens: + return new StringReader(""); } else { - return stdAnalyzer.createComponents(fieldName, reader); + return super.initReader(fieldName, reader); } } + + @Override + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return stdAnalyzer.createComponents(fieldName, reader); + } } } Index: lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (revision 1428534) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (working copy) @@ -116,8 +116,11 @@ assertNotNull(output); CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class); - assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); - CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); + CharTermAttribute termAtt = null; + if (output.length > 0) { + assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); + termAtt = ts.getAttribute(CharTermAttribute.class); + } OffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { @@ -615,8 +618,7 @@ int remainder = random.nextInt(10); Reader reader = new StringReader(text); TokenStream ts = a.tokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader); - assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); - CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); + CharTermAttribute termAtt = ts.hasAttribute(CharTermAttribute.class) ? ts.getAttribute(CharTermAttribute.class) : null; OffsetAttribute offsetAtt = ts.hasAttribute(OffsetAttribute.class) ? ts.getAttribute(OffsetAttribute.class) : null; PositionIncrementAttribute posIncAtt = ts.hasAttribute(PositionIncrementAttribute.class) ? ts.getAttribute(PositionIncrementAttribute.class) : null; PositionLengthAttribute posLengthAtt = ts.hasAttribute(PositionLengthAttribute.class) ? ts.getAttribute(PositionLengthAttribute.class) : null; @@ -631,6 +633,7 @@ // First pass: save away "correct" tokens while (ts.incrementToken()) { + assertNotNull("has no CharTermAttribute", termAtt); tokens.add(termAtt.toString()); if (typeAtt != null) types.add(typeAtt.type()); if (posIncAtt != null) positions.add(posIncAtt.getPositionIncrement()); Index: lucene/test-framework/src/java/org/apache/lucene/analysis/EmptyTokenizer.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/EmptyTokenizer.java (revision 1428534) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/EmptyTokenizer.java (working copy) @@ -1,35 +0,0 @@ -package org.apache.lucene.analysis; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Reader; - -/** - * Emits no tokens - */ -public final class EmptyTokenizer extends Tokenizer { - - public EmptyTokenizer(Reader input) { - super(input); - } - - @Override - public boolean incrementToken() { - return false; - } -}