Index: src/test/org/apache/lucene/analysis/TestTurkishLowerCaseFilter.java =================================================================== --- src/test/org/apache/lucene/analysis/TestTurkishLowerCaseFilter.java (revision 0) +++ src/test/org/apache/lucene/analysis/TestTurkishLowerCaseFilter.java (revision 0) @@ -0,0 +1,42 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.util.Version; + +import java.io.StringReader; + +public class TestTurkishLowerCaseFilter extends BaseTokenStreamTestCase { + + public void testFilter() throws Exception { + TokenStream stream = new WhitespaceTokenizer( + new StringReader("\u0130STANBUL \u0130ZM\u0130R ISPARTA")); + TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(Version.LUCENE_30, stream); + TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + + assertTrue(filter.incrementToken()); + assertEquals("istanbul", termAtt.term()); + assertTrue(filter.incrementToken()); + assertEquals("izmir", termAtt.term()); + assertTrue(filter.incrementToken()); + assertEquals("\u0131sparta", termAtt.term()); + assertFalse(filter.incrementToken()); + } + +} Index: src/java/org/apache/lucene/analysis/TurkishLowerCaseFilter.java =================================================================== --- src/java/org/apache/lucene/analysis/TurkishLowerCaseFilter.java (revision 0) +++ src/java/org/apache/lucene/analysis/TurkishLowerCaseFilter.java (revision 0) @@ -0,0 +1,73 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.util.CharacterUtils; +import org.apache.lucene.util.Version; + +/** + * Normalizes Turkish token text to lower case. + * + *

You must specify the required {@link Version} + * compatibility when creating TurkishLowerCaseFilter: + *

+ */ +public final class TurkishLowerCaseFilter extends TokenFilter { + private final CharacterUtils charUtils; + + /** + * Create a new TurkishLowerCaseFilter, that normalizes Turkish token text to lower case. + * + * @param matchVersion See
above + * @param in TokenStream to filter + */ + public TurkishLowerCaseFilter(Version matchVersion, TokenStream in) { + super(in); + termAtt = addAttribute(TermAttribute.class); + charUtils = CharacterUtils.getInstance(matchVersion); + } + + /** + * @deprecated Use {@link #TurkishLowerCaseFilter(Version, TokenStream)} instead. + */ + public TurkishLowerCaseFilter(TokenStream in) { + this(Version.LUCENE_30, in); + } + + private TermAttribute termAtt; + + @Override + public final boolean incrementToken() throws IOException { + if (input.incrementToken()) { + final char[] buffer = termAtt.termBuffer(); + final int length = termAtt.termLength(); + for (int i = 0; i < length;) { + i += Character.toChars( + Character.toLowerCase( + charUtils.codePointAt(buffer, i) == 73 ? 305 : charUtils.codePointAt(buffer, i)), buffer, i); + } + return true; + } else + return false; + } +}