Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java (revision 1029227) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java (working copy) @@ -143,9 +143,13 @@ public static HyphenationTree getHyphenationTree(Reader hyphenationReader) throws Exception { HyphenationTree tree = new HyphenationTree(); + final InputSource is = new InputSource(hyphenationReader); + // we need this to load the DTD in very old parsers (like the one in JDK 1.4). + // The DTD itsself is provided via EntityResolver, so it should always load, but + // some parsers still want to have a base URL (Crimson). + is.setSystemId("dtd://non-null-dummy"); + tree.loadPatterns(is); - tree.loadPatterns(new InputSource(hyphenationReader)); - return tree; }