Index: solr/core/src/java/org/apache/solr/analysis/KuromojiReadingFormFilterFactory.java
===================================================================
--- solr/core/src/java/org/apache/solr/analysis/KuromojiReadingFormFilterFactory.java (revision 0)
+++ solr/core/src/java/org/apache/solr/analysis/KuromojiReadingFormFilterFactory.java (revision 0)
@@ -0,0 +1,50 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.kuromoji.KuromojiReadingFormFilter;
+
+import java.util.Map;
+
+/**
+ * Factory for {@link KuromojiReadingFormFilter}.
+ *
+ * <fieldType name="text_ja" class="solr.TextField">
+ * <analyzer>
+ * <tokenizer class="solr.KuromojiTokenizerFactory"/>
+ * <filter class="solr.KuromojiReadingFormFilterFactory"
+ * useRomaji="false"/>
+ * </analyzer>
+ * </fieldType>
+ *
+ */
+public class KuromojiReadingFormFilterFactory extends BaseTokenFilterFactory {
+ private static final String ROMAJI_PARAM = "useRomaji";
+ private boolean useRomaji;
+
+ @Override
+ public void init(Map args) {
+ super.init(args);
+ useRomaji = getBoolean(ROMAJI_PARAM, false);
+ }
+
+ public TokenStream create(TokenStream input) {
+ return new KuromojiReadingFormFilter(input, useRomaji);
+ }
+}
Property changes on: solr/core/src/java/org/apache/solr/analysis/KuromojiReadingFormFilterFactory.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java
===================================================================
--- modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java (revision 0)
+++ modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java (revision 0)
@@ -0,0 +1,64 @@
+package org.apache.lucene.analysis.kuromoji;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * Tests for {@link TestKuromojiReadingFormFilter}
+ */
+public class TestKuromojiReadingFormFilter extends BaseTokenStreamTestCase {
+ private Analyzer katakanaAnalyzer = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KuromojiTokenizer(reader, null, true, KuromojiTokenizer.Mode.SEARCH);
+ return new TokenStreamComponents(tokenizer, new KuromojiReadingFormFilter(tokenizer, false));
+ }
+ };
+
+ private Analyzer romajiAnalyzer = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new KuromojiTokenizer(reader, null, true, KuromojiTokenizer.Mode.SEARCH);
+ return new TokenStreamComponents(tokenizer, new KuromojiReadingFormFilter(tokenizer, true));
+ }
+ };
+
+
+ public void testKatakanaReadings() throws IOException {
+ assertAnalyzesTo(katakanaAnalyzer, "今夜はロバート先生と話した",
+ new String[] { "コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ" }
+ );
+ }
+
+ public void testRomajiReadings() throws IOException {
+ assertAnalyzesTo(romajiAnalyzer, "今夜はロバート先生と話した",
+ new String[] { "kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta" }
+ );
+ }
+
+ public void testRandomData() throws IOException {
+ checkRandomData(random, katakanaAnalyzer, 1000*RANDOM_MULTIPLIER);
+ checkRandomData(random, romajiAnalyzer, 1000*RANDOM_MULTIPLIER);
+ }
+}
Property changes on: modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiReadingFormFilter.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiReadingFormFilter.java
===================================================================
--- modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiReadingFormFilter.java (revision 0)
+++ modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiReadingFormFilter.java (revision 0)
@@ -0,0 +1,65 @@
+package org.apache.lucene.analysis.kuromoji;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.kuromoji.tokenattributes.ReadingAttribute;
+import org.apache.lucene.analysis.kuromoji.util.ToStringUtil;
+
+import java.io.IOException;
+
+/**
+ * A {@link org.apache.lucene.analysis.TokenFilter} that replaces the term
+ * attribute with the reading of a token in either katakana or romaji form.
+ * The default reading form is katakana.
+ */
+
+public final class KuromojiReadingFormFilter extends TokenFilter {
+ private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class);
+ private final ReadingAttribute readingAttr = addAttribute(ReadingAttribute.class);
+
+ private boolean useRomaji;
+
+ public KuromojiReadingFormFilter(TokenStream input, boolean useRomaji) {
+ super(input);
+ this.useRomaji = useRomaji;
+ }
+
+ public KuromojiReadingFormFilter(TokenStream input) {
+ this(input, false);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ String reading = readingAttr.getReading();
+ if (reading != null) {
+ if (useRomaji) {
+ termAttr.setEmpty().append(ToStringUtil.getRomanization(reading));
+ } else {
+ termAttr.setEmpty().append(reading);
+ }
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+}
Property changes on: modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiReadingFormFilter.java
___________________________________________________________________
Added: svn:eol-style
+ native