Index: lucene/benchmark/build.xml =================================================================== --- lucene/benchmark/build.xml (revision 1417689) +++ lucene/benchmark/build.xml (working copy) @@ -152,7 +152,7 @@ - + Index: lucene/benchmark/ivy.xml =================================================================== --- lucene/benchmark/ivy.xml (revision 1417689) +++ lucene/benchmark/ivy.xml (working copy) @@ -21,7 +21,7 @@ - + Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java (revision 1417689) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.Reader; import java.io.StringReader; +import java.util.Arrays; import java.util.Collections; import java.util.Date; import java.util.HashSet; @@ -65,10 +66,10 @@ @Override public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (inHEAD > 0) { - if (equalsIgnoreTurkish("title", localName)) { + if ("title".equals(localName)) { inTITLE++; } else { - if (equalsIgnoreTurkish("meta", localName)) { + if ("meta".equals(localName)) { String name = atts.getValue("name"); if (name == null) { name = atts.getValue("http-equiv"); @@ -82,7 +83,7 @@ } else if (inBODY > 0) { if (SUPPRESS_ELEMENTS.contains(localName)) { suppressed++; - } else if (equalsIgnoreTurkish("img", localName)) { + } else if ("img".equals(localName)) { // the original javacc-based parser preserved ... // attribute as body text in [] parenthesis: final String alt = atts.getValue("alt"); @@ -90,11 +91,11 @@ body.append('[').append(alt).append(']'); } } - } else if (equalsIgnoreTurkish("body", localName)) { + } else if ("body".equals(localName)) { inBODY++; - } else if (equalsIgnoreTurkish("head", localName)) { + } else if ("head".equals(localName)) { inHEAD++; - } else if (equalsIgnoreTurkish("frameset", localName)) { + } else if ("frameset".equals(localName)) { throw new SAXException("This parser does not support HTML framesets."); } } @@ -102,7 +103,7 @@ @Override public void endElement(String namespaceURI, String localName, String qName) throws SAXException { if (inBODY > 0) { - if (equalsIgnoreTurkish("body", localName)) { + if ("body".equals(localName)) { inBODY--; } else if (ENDLINE_ELEMENTS.contains(localName)) { body.append('\n'); @@ -110,9 +111,9 @@ suppressed--; } } else if (inHEAD > 0) { - if (equalsIgnoreTurkish("head", localName)) { + if ("head".equals(localName)) { inHEAD--; - } else if (inTITLE > 0 && equalsIgnoreTurkish("title", localName)) { + } else if (inTITLE > 0 && "title".equals(localName)) { inTITLE--; } } @@ -145,38 +146,10 @@ this.body = body.toString(); } - // TODO: remove the Turkish workaround once this is fixed in NekoHTML: - // https://sourceforge.net/tracker/?func=detail&aid=3544334&group_id=195122&atid=952178 - - // BEGIN: workaround - static final String convertTurkish(String s) { - return s.replace('i', 'ı'); + private static final Set createElementNameSet(String... names) { + return Collections.unmodifiableSet(new HashSet(Arrays.asList(names))); } - static final boolean equalsIgnoreTurkish(String s1, String s2) { - final int len1 = s1.length(), len2 = s2.length(); - if (len1 != len2) - return false; - for (int i = 0; i < len1; i++) { - char ch1 = s1.charAt(i), ch2 = s2.charAt(i); - if (ch1 == 'ı') ch1 = 'i'; - if (ch2 == 'ı') ch2 = 'i'; - if (ch1 != ch2) - return false; - } - return true; - } - // END: workaround - - static final Set createElementNameSet(String... names) { - final HashSet set = new HashSet(); - for (final String name : names) { - set.add(name); - set.add(convertTurkish(name)); - } - return Collections.unmodifiableSet(set); - } - /** HTML elements that cause a line break (they are block-elements) */ static final Set ENDLINE_ELEMENTS = createElementNameSet( "p", "h1", "h2", "h3", "h4", "h5", "h6", "div", "ul", "ol", "dl", Index: lucene/licenses/nekohtml-1.9.15.jar.sha1 =================================================================== --- lucene/licenses/nekohtml-1.9.15.jar.sha1 (revision 1417689) +++ lucene/licenses/nekohtml-1.9.15.jar.sha1 (working copy) @@ -1 +0,0 @@ -a45cd7b7401d9c2264d4908182380452c03ebf8f Index: lucene/licenses/nekohtml-1.9.17.jar.sha1 =================================================================== --- lucene/licenses/nekohtml-1.9.17.jar.sha1 (revision 0) +++ lucene/licenses/nekohtml-1.9.17.jar.sha1 (working copy) @@ -0,0 +1 @@ +39a870b0ea4cb0d2a3015c1ab569d17d83122d55 Index: lucene/licenses/nekohtml-1.9.17.jar.sha1 =================================================================== --- lucene/licenses/nekohtml-1.9.17.jar.sha1 (revision 0) +++ lucene/licenses/nekohtml-1.9.17.jar.sha1 (working copy) Property changes on: lucene/licenses/nekohtml-1.9.17.jar.sha1 ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property