Details
-
Improvement
-
Status: Closed
-
Minor
-
Resolution: Fixed
-
tools-1.5.3
-
Patch
Description
the end of sentence characters list was wrong for japanese
removed duplicate code
Index: opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java
===================================================================
— opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java (revision 1678426)
+++ opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java (local)
@@ -36,14 +36,12 @@
public static final char[] thEosCharacters = new char[]
{ ' ','\n' };
+ // TODO add more sentence enders
+ public static final char[] jpEosCharacters = new char[]
;
+
public EndOfSentenceScanner createEndOfSentenceScanner(String languageCode) {
- if ("th".equals(languageCode)) {
- return new DefaultEndOfSentenceScanner(new char[]
{' ','\n'}
);
- } else if("pt".equals(languageCode)) { - return new DefaultEndOfSentenceScanner(ptEosCharacters); - }
- return new DefaultEndOfSentenceScanner(defaultEosCharacters);
+ return new DefaultEndOfSentenceScanner(getEOSCharacters(languageCode));
}
public EndOfSentenceScanner createEndOfSentenceScanner(
@@ -76,6 +74,8 @@
return thEosCharacters;
} else if ("pt".equals(languageCode))
else if ("jp".equals(languageCode))
{ + return jpEosCharacters; }return defaultEosCharacters;