Index: lucene/src/test/org/apache/lucene/analysis/TestStages.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestStages.java (revision 0) +++ lucene/src/test/org/apache/lucene/analysis/TestStages.java (revision 0) @@ -0,0 +1,187 @@ +package org.apache.lucene.analysis; + +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.LuceneTestCaseJ4; +import org.apache.lucene.util.Version; + +import java.io.IOException; +import java.io.StringReader; + +import org.junit.Test; +import static org.junit.Assert.*; + +public class TestStages extends LuceneTestCaseJ4 { + + private static class CannedSynonyms implements SynonymEngine { + private final static String[] syns = new String[] {"tv", "television"}; + public String[] getSynonyms(String s) { + if (s.equals("broadcast")) { + return syns; + } else { + return null; + } + } + } + + // @Test + public void xxxtestWhitespaceTokenizerStage() throws IOException { + + WhitespaceTokenizerStage tokenizer = new WhitespaceTokenizerStage(); + final CharArraySet stopWords = new CharArraySet(Version.LUCENE_31, 10, true); + stopWords.add("this"); + stopWords.add("is"); + stopWords.add("a"); + stopWords.add("of"); + stopWords.add("the"); + + Stage lowercaseFilter = new LowerCaseFilterStage(Version.LUCENE_31, tokenizer); + + StopFilterStage stopFilter = new StopFilterStage(lowercaseFilter, stopWords); + SynonymFilterStage syns = new SynonymFilterStage(stopFilter, new CannedSynonyms()); + + final Stage.Bindings b = syns.getFinalAttrs(); + + final CharTermAttribute termAtt = b.get(CharTermAttribute.class); + final OffsetAttribute offsetAtt = b.get(OffsetAttribute.class); + final PositionIncrementAttribute posIncrAtt = b.get(PositionIncrementAttribute.class); + + final String s = "This is a test of the Emergency Broadcast System"; + + tokenizer.reset(new StringReader(s)); + + int pos = -1; + while(syns.next()) { + pos += posIncrAtt.getPositionIncrement(); + System.out.println("term=" + termAtt + " pos=" + pos); + } + System.out.println("\ndone!"); + + final int ITER1 = 10000; + final int ITER2 = 400; + final int expTotTokens = 4 * ITER1 * ITER2; + + StringBuilder sb = new StringBuilder(); + for(int i=0;i synonymStack; + private SynonymEngine engine; + private AttributeSource.State current; + + private final CharTermAttribute termAttIn; + private final CharTermAttribute termAttOut; + private final PositionIncrementAttribute posIncrAttIn; + private final PositionIncrementAttribute posIncrAttOut; + + public SynonymFilterStage(Stage prev, SynonymEngine engine) { + super(prev); + synonymStack = new Stack(); //#1 + this.engine = engine; + this.prev = prev; + + termAttIn = getInputAttr(CharTermAttribute.class); + termAttOut = new CharTermAttributeImpl(); + addOutputAttr(termAttOut); + + posIncrAttIn = getInputAttr(PositionIncrementAttribute.class); + posIncrAttOut = new PositionIncrementAttributeImpl(); + addOutputAttr(posIncrAttOut); + } + + public final boolean next() throws IOException { + if (synonymStack.size() > 0) { //#2 + termAttOut.setEmpty().append(synonymStack.pop()); + posIncrAttOut.setPositionIncrement(0); //#3 + return true; + } + + if (!prev.next()) //#4 + return false; + + addAliasesToStack(); + termAttOut.copyBuffer(termAttIn.buffer(), 0, termAttIn.length()); + posIncrAttOut.setPositionIncrement(posIncrAttIn.getPositionIncrement()); + + return true; //#7 + } + + private boolean addAliasesToStack() throws IOException { + String[] synonyms = engine.getSynonyms(termAttIn.toString()); //#8 + if (synonyms == null) { + return false; + } + for (String synonym : synonyms) { //#9 + synonymStack.push(synonym); + } + return true; + } +} + +/* +#1 Define synonym buffer +#2 Pop buffered synonyms +#3 Set position increment to 0 +#4 Read next token +#5 Push synonyms onto stack +#6 Save current token +#7 Return current token +#8 Retrieve synonyms +#9 Push synonyms onto stack +*/ Property changes on: lucene/src/java/org/apache/lucene/analysis/SynonymFilterStage.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/analysis/WhitespaceTokenizerStage.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/WhitespaceTokenizerStage.java (revision 0) +++ lucene/src/java/org/apache/lucene/analysis/WhitespaceTokenizerStage.java (revision 0) @@ -0,0 +1,27 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class WhitespaceTokenizerStage extends CharTokenizerStage { + /** Collects only characters which do not satisfy + * {@link Character#isWhitespace(int)}.*/ + @Override + protected boolean isTokenChar(int c) { + return !Character.isWhitespace(c); + } +} \ No newline at end of file Property changes on: lucene/src/java/org/apache/lucene/analysis/WhitespaceTokenizerStage.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/analysis/StopFilterStage.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/StopFilterStage.java (revision 0) +++ lucene/src/java/org/apache/lucene/analysis/StopFilterStage.java (revision 0) @@ -0,0 +1,70 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; +import java.util.List; + +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttributeImpl; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; +import org.apache.lucene.queryParser.QueryParser; // for javadoc +import org.apache.lucene.util.Version; + +public final class StopFilterStage extends Stage { + + private final Stage prev; + private final CharArraySet stopWords; + private final CharTermAttribute termAtt; + private final PositionIncrementAttribute posIncrAttIn; + private final PositionIncrementAttribute posIncrAttOut; + + public StopFilterStage(Stage prev, CharArraySet stopWords) + { + super(prev); + this.prev = prev; + termAtt = getInputAttr(CharTermAttribute.class); + + posIncrAttIn = getInputAttr(PositionIncrementAttribute.class); + posIncrAttOut = new PositionIncrementAttributeImpl(); + addOutputAttr(posIncrAttOut); + + this.stopWords = stopWords; + } + + /** + * Returns the next input Token whose term() is not a stop word. + */ + @Override + public boolean next() throws IOException { + // return the first non-stop word found + int skippedPositions = 0; + while (prev.next()) { + if (!stopWords.contains(termAtt.buffer(), 0, termAtt.length())) { + posIncrAttOut.setPositionIncrement(posIncrAttIn.getPositionIncrement() + skippedPositions); + return true; + } + skippedPositions += posIncrAttIn.getPositionIncrement(); + } + // reached EOS -- return false + return false; + } +} Property changes on: lucene/src/java/org/apache/lucene/analysis/StopFilterStage.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/analysis/SynonymFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/SynonymFilter.java (revision 0) +++ lucene/src/java/org/apache/lucene/analysis/SynonymFilter.java (revision 0) @@ -0,0 +1,87 @@ +package org.apache.lucene.analysis; + +/** + * Copyright Manning Publications Co. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific lan +*/ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.util.AttributeSource; +import java.io.IOException; +import java.util.Stack; + +// From chapter 4 +public class SynonymFilter extends TokenFilter { + public static final String TOKEN_TYPE_SYNONYM = "SYNONYM"; + + private Stack synonymStack; + private SynonymEngine engine; + private AttributeSource.State current; + + private final TermAttribute termAtt; + private final PositionIncrementAttribute posIncrAtt; + + public SynonymFilter(TokenStream in, SynonymEngine engine) { + super(in); + synonymStack = new Stack(); //#1 + this.engine = engine; + + this.termAtt = addAttribute(TermAttribute.class); + this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); + } + + public final boolean incrementToken() throws IOException { + if (synonymStack.size() > 0) { //#2 + String syn = synonymStack.pop(); //#2 + restoreState(current); //#2 + termAtt.setTermBuffer(syn); + posIncrAtt.setPositionIncrement(0); //#3 + return true; + } + + if (!input.incrementToken()) //#4 + return false; + + if (addAliasesToStack()) { //#5 + current = captureState(); //#6 + } + + return true; //#7 + } + + private boolean addAliasesToStack() throws IOException { + String[] synonyms = engine.getSynonyms(termAtt.term()); //#8 + if (synonyms == null) { + return false; + } + for (String synonym : synonyms) { //#9 + synonymStack.push(synonym); + } + return true; + } +} + +/* +#1 Define synonym buffer +#2 Pop buffered synonyms +#3 Set position increment to 0 +#4 Read next token +#5 Push synonyms onto stack +#6 Save current token +#7 Return current token +#8 Retrieve synonyms +#9 Push synonyms onto stack +*/ Property changes on: lucene/src/java/org/apache/lucene/analysis/SynonymFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/analysis/AppendingStage.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/AppendingStage.java (revision 0) +++ lucene/src/java/org/apache/lucene/analysis/AppendingStage.java (revision 0) @@ -0,0 +1,122 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; +import java.util.List; + +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttributeImpl; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttributeImpl; + +import org.apache.lucene.index.ReusableStringReader; + +public final class AppendingStage extends Stage { + + private final Stage prev; + private final CharTokenizerStage start; + + private final OffsetAttribute offsetAttIn; + private final OffsetAttributeImpl offsetAttOut; + private final PositionIncrementAttribute posIncrAttIn; + private final PositionIncrementAttributeImpl posIncrAttOut; + + private final int offsetGap; + private final int posIncrGap; + + private String[] values; + private int upto = -1; + + private int curOffsetShift; + private int pendingPosIncrShift; + + private final ReusableStringReader stringReader = new ReusableStringReader(); + + // nocommit -- need to switch to TokenizerStage not CharTokenizerStage + public AppendingStage(CharTokenizerStage start, Stage prev, int posIncrGap, int offsetGap) + { + super(prev); + this.prev = prev; + this.start = start; + + posIncrAttIn = getInputAttr(PositionIncrementAttribute.class); + posIncrAttOut = new PositionIncrementAttributeImpl(); + addOutputAttr(posIncrAttOut); + + offsetAttIn = getInputAttr(OffsetAttribute.class); + offsetAttOut = new OffsetAttributeImpl(); + addOutputAttr(offsetAttOut); + + this.posIncrGap = posIncrGap; + this.offsetGap = offsetGap; + } + + // nocommit -- should take Field[]? Reader[]? Some kinda + // of ReaderSource factory thing? etc. + public void reset(String[] values) { + this.values = values; + upto = 0; + pendingPosIncrShift = 0; + curOffsetShift = 0; + if (values.length != 0) { + stringReader.init(values[0]); + start.reset(stringReader); + } + } + + /** + * Returns the next input Token whose term() is not a stop word. + */ + @Override + public boolean next() throws IOException { + + if (values.length == 0) { + return false; + } + + while(true) { + + assert upto < values.length; + + if (prev.next()) { + posIncrAttOut.setPositionIncrement(pendingPosIncrShift + posIncrAttIn.getPositionIncrement()); + pendingPosIncrShift = 0; + offsetAttOut.setOffset(curOffsetShift + offsetAttIn.startOffset(), + curOffsetShift + offsetAttIn.endOffset()); + return true; + } + + upto++; + if (upto < values.length) { + stringReader.init(values[upto]); + start.reset(stringReader); + curOffsetShift = offsetAttIn.startOffset() + offsetGap; + pendingPosIncrShift += posIncrAttIn.getPositionIncrement() + posIncrGap; + } else { + offsetAttOut.setOffset(curOffsetShift + offsetAttIn.startOffset(), + curOffsetShift + offsetAttIn.endOffset()); + posIncrAttOut.setPositionIncrement(pendingPosIncrShift + posIncrAttIn.getPositionIncrement()); + pendingPosIncrShift = 0; + return false; + } + } + } +} Property changes on: lucene/src/java/org/apache/lucene/analysis/AppendingStage.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/analysis/CharTokenizerStage.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/CharTokenizerStage.java (revision 0) +++ lucene/src/java/org/apache/lucene/analysis/CharTokenizerStage.java (revision 0) @@ -0,0 +1,117 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Version; +import org.apache.lucene.util.CharacterUtils; +import org.apache.lucene.util.CharacterUtils.CharacterBuffer; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttributeImpl; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttributeImpl; + +import java.io.Reader; +import java.io.IOException; + +public abstract class CharTokenizerStage extends Stage { + private Reader input; + private int bufferIndex, offset, dataLen; + private static final int MAX_WORD_LEN = 255; + private static final int IO_BUFFER_SIZE = 4096; + + private final CharTermAttribute termAtt; + private final PositionIncrementAttribute posIncrAtt; + private final OffsetAttribute offsetAtt; + + private final CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_31); + private final CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE); + + public CharTokenizerStage() { + super(null); + termAtt = new CharTermAttributeImpl(); + addOutputAttr(termAtt); + offsetAtt = new OffsetAttributeImpl(); + addOutputAttr(offsetAtt); + posIncrAtt = new PositionIncrementAttributeImpl(); + addOutputAttr(posIncrAtt); + } + + public void reset(Reader input) { + this.input = input; + bufferIndex = 0; + offset = 0; + dataLen = 0; + ioBuffer.reset(); + } + + @Override + public boolean next() throws IOException { + int length = 0; + int start = bufferIndex; + char[] buffer = termAtt.buffer(); + while (true) { + if (bufferIndex >= dataLen) { + offset += dataLen; + if(!charUtils.fill(ioBuffer, input)) { // read supplementary char aware with CharacterUtils + dataLen = 0; // so next offset += dataLen won't decrement offset + if (length > 0) + break; + else { + // set final offset + // nocommit -- get charfilter working: + //final int finalOffset = correctOffset(offset); + final int finalOffset = offset; + offsetAtt.setOffset(finalOffset, finalOffset); + return false; + } + } + dataLen = ioBuffer.getLength(); + bufferIndex = 0; + } + // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone + final int c = charUtils.codePointAt(ioBuffer.getBuffer(), bufferIndex); + bufferIndex += Character.charCount(c); + + if (isTokenChar(c)) { // if it's a token char + if (length == 0) // start of token + start = offset + bufferIndex - 1; + else if (length >= buffer.length-1) // check if a supplementary could run out of bounds + buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer + length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized + if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test + break; + } else if (length > 0) // at non-Letter w/ chars + break; // return 'em + } + + termAtt.setLength(length); + + // nocommit -- get charfilter working: + //offsetAtt.setOffset(correctOffset(start), correctOffset(start+length)); + offsetAtt.setOffset(start, start+length); + return true; + } + + protected abstract boolean isTokenChar(int c); + + protected int normalize(int c) { + return c; + } +} \ No newline at end of file Property changes on: lucene/src/java/org/apache/lucene/analysis/CharTokenizerStage.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/analysis/SynonymEngine.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/SynonymEngine.java (revision 0) +++ lucene/src/java/org/apache/lucene/analysis/SynonymEngine.java (revision 0) @@ -0,0 +1,23 @@ +package org.apache.lucene.analysis; + +/** + * Copyright Manning Publications Co. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific lan +*/ + +import java.io.IOException; + +// From chapter 4 +public interface SynonymEngine { + String[] getSynonyms(String s) throws IOException; +} Property changes on: lucene/src/java/org/apache/lucene/analysis/SynonymEngine.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/analysis/Stage.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/Stage.java (revision 0) +++ lucene/src/java/org/apache/lucene/analysis/Stage.java (revision 0) @@ -0,0 +1,99 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; + +import java.util.List; +import java.util.ArrayList; +import java.io.IOException; + +public abstract class Stage { + + private final Bindings readFrom; + private final Bindings writeTo; + + protected Stage(Stage prev) { + // I read addrs from previous stage's writeTo scope: + if (prev == null) { + readFrom = null; + } else { + readFrom = prev.writeTo; + readFrom.freeze(); + } + // I write to new [private] scope + writeTo = new Bindings(readFrom); + } + + public final A getInputAttr(Class attClass) { + return readFrom.get(attClass); + } + + public void addOutputAttr(Attribute impl) { + writeTo.add(impl); + } + + public static class Bindings { + private boolean frozen; + // nocommit - use map w/ sophisticated generics!! + private final List elems = new ArrayList(); + private final Bindings prev; + + public Bindings(Bindings prev) { + this.prev = prev; + } + + public A get(Class attClass) { + for(Attribute att : elems) { + if (attClass.isInstance(att)) { + return attClass.cast(att); + } + } + if (prev != null) { + return prev.get(attClass); + } + return null; + } + + public void add(Attribute att) { + if (frozen) { + throw new IllegalStateException("this stage is frozen"); + } + // nocommit -- throw exc if interfaces overlap + //if (map.containsKey(attClass)) { + //throw new IllegalArgumentException("class " + attClass + " has already been set"); + //} + elems.add(att); + } + + public void freeze() { + if (frozen) { + throw new IllegalStateException("this stage is frozen"); + } + frozen = true; + } + } + + public Bindings getFinalAttrs() { + writeTo.freeze(); + return writeTo; + } + + abstract public boolean next() throws IOException; +} Property changes on: lucene/src/java/org/apache/lucene/analysis/Stage.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/analysis/LowerCaseFilterStage.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/LowerCaseFilterStage.java (revision 0) +++ lucene/src/java/org/apache/lucene/analysis/LowerCaseFilterStage.java (revision 0) @@ -0,0 +1,61 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; +import org.apache.lucene.util.CharacterUtils; +import org.apache.lucene.util.Version; + +public final class LowerCaseFilterStage extends Stage { + + private final CharacterUtils charUtils; + private final CharTermAttribute termAttOut; + private final CharTermAttribute termAttIn; + private final Stage input; + + public LowerCaseFilterStage(Version version, Stage prev) { + super(prev); + this.input = prev; + charUtils = CharacterUtils.getInstance(version); + + termAttIn = getInputAttr(CharTermAttribute.class); + termAttOut = new CharTermAttributeImpl(); + addOutputAttr(termAttOut); + } + + @Override + public final boolean next() throws IOException { + if (input.next()) { + final char[] buffer = termAttIn.buffer(); + final int length = termAttIn.length(); + final char[] bufferOut = termAttOut.resizeBuffer(length); + for (int i = 0; i < length;) { + i += Character.toChars( + Character.toLowerCase( + charUtils.codePointAt(buffer, i)), bufferOut, i); + } + termAttOut.setLength(length); + return true; + } else { + return false; + } + } +} Property changes on: lucene/src/java/org/apache/lucene/analysis/LowerCaseFilterStage.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/ReusableStringReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/ReusableStringReader.java (revision 943430) +++ lucene/src/java/org/apache/lucene/index/ReusableStringReader.java (working copy) @@ -21,12 +21,14 @@ /** Used by DocumentsWriter to implemented a StringReader * that can be reset to a new string; we use this when - * tokenizing the string value from a Field. */ -final class ReusableStringReader extends Reader { - int upto; - int left; - String s; - void init(String s) { + * tokenizing the string value from a Field. + * + * @lucene.internal */ +public final class ReusableStringReader extends Reader { + private int upto; + private int left; + private String s; + public void init(String s) { this.s = s; left = s.length(); this.upto = 0;