Index: lucene/benchmark/build.xml
===================================================================
--- lucene/benchmark/build.xml (revision 1417689)
+++ lucene/benchmark/build.xml (working copy)
@@ -152,7 +152,7 @@
-
+
Index: lucene/benchmark/ivy.xml
===================================================================
--- lucene/benchmark/ivy.xml (revision 1417689)
+++ lucene/benchmark/ivy.xml (working copy)
@@ -21,7 +21,7 @@
-
+
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java (revision 1417689)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java (working copy)
@@ -20,6 +20,7 @@
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
+import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
@@ -65,10 +66,10 @@
@Override
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
if (inHEAD > 0) {
- if (equalsIgnoreTurkish("title", localName)) {
+ if ("title".equals(localName)) {
inTITLE++;
} else {
- if (equalsIgnoreTurkish("meta", localName)) {
+ if ("meta".equals(localName)) {
String name = atts.getValue("name");
if (name == null) {
name = atts.getValue("http-equiv");
@@ -82,7 +83,7 @@
} else if (inBODY > 0) {
if (SUPPRESS_ELEMENTS.contains(localName)) {
suppressed++;
- } else if (equalsIgnoreTurkish("img", localName)) {
+ } else if ("img".equals(localName)) {
// the original javacc-based parser preserved
// attribute as body text in [] parenthesis:
final String alt = atts.getValue("alt");
@@ -90,11 +91,11 @@
body.append('[').append(alt).append(']');
}
}
- } else if (equalsIgnoreTurkish("body", localName)) {
+ } else if ("body".equals(localName)) {
inBODY++;
- } else if (equalsIgnoreTurkish("head", localName)) {
+ } else if ("head".equals(localName)) {
inHEAD++;
- } else if (equalsIgnoreTurkish("frameset", localName)) {
+ } else if ("frameset".equals(localName)) {
throw new SAXException("This parser does not support HTML framesets.");
}
}
@@ -102,7 +103,7 @@
@Override
public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
if (inBODY > 0) {
- if (equalsIgnoreTurkish("body", localName)) {
+ if ("body".equals(localName)) {
inBODY--;
} else if (ENDLINE_ELEMENTS.contains(localName)) {
body.append('\n');
@@ -110,9 +111,9 @@
suppressed--;
}
} else if (inHEAD > 0) {
- if (equalsIgnoreTurkish("head", localName)) {
+ if ("head".equals(localName)) {
inHEAD--;
- } else if (inTITLE > 0 && equalsIgnoreTurkish("title", localName)) {
+ } else if (inTITLE > 0 && "title".equals(localName)) {
inTITLE--;
}
}
@@ -145,38 +146,10 @@
this.body = body.toString();
}
- // TODO: remove the Turkish workaround once this is fixed in NekoHTML:
- // https://sourceforge.net/tracker/?func=detail&aid=3544334&group_id=195122&atid=952178
-
- // BEGIN: workaround
- static final String convertTurkish(String s) {
- return s.replace('i', 'ı');
+ private static final Set createElementNameSet(String... names) {
+ return Collections.unmodifiableSet(new HashSet(Arrays.asList(names)));
}
- static final boolean equalsIgnoreTurkish(String s1, String s2) {
- final int len1 = s1.length(), len2 = s2.length();
- if (len1 != len2)
- return false;
- for (int i = 0; i < len1; i++) {
- char ch1 = s1.charAt(i), ch2 = s2.charAt(i);
- if (ch1 == 'ı') ch1 = 'i';
- if (ch2 == 'ı') ch2 = 'i';
- if (ch1 != ch2)
- return false;
- }
- return true;
- }
- // END: workaround
-
- static final Set createElementNameSet(String... names) {
- final HashSet set = new HashSet();
- for (final String name : names) {
- set.add(name);
- set.add(convertTurkish(name));
- }
- return Collections.unmodifiableSet(set);
- }
-
/** HTML elements that cause a line break (they are block-elements) */
static final Set ENDLINE_ELEMENTS = createElementNameSet(
"p", "h1", "h2", "h3", "h4", "h5", "h6", "div", "ul", "ol", "dl",
Index: lucene/licenses/nekohtml-1.9.15.jar.sha1
===================================================================
--- lucene/licenses/nekohtml-1.9.15.jar.sha1 (revision 1417689)
+++ lucene/licenses/nekohtml-1.9.15.jar.sha1 (working copy)
@@ -1 +0,0 @@
-a45cd7b7401d9c2264d4908182380452c03ebf8f
Index: lucene/licenses/nekohtml-1.9.17.jar.sha1
===================================================================
--- lucene/licenses/nekohtml-1.9.17.jar.sha1 (revision 0)
+++ lucene/licenses/nekohtml-1.9.17.jar.sha1 (working copy)
@@ -0,0 +1 @@
+39a870b0ea4cb0d2a3015c1ab569d17d83122d55
Index: lucene/licenses/nekohtml-1.9.17.jar.sha1
===================================================================
--- lucene/licenses/nekohtml-1.9.17.jar.sha1 (revision 0)
+++ lucene/licenses/nekohtml-1.9.17.jar.sha1 (working copy)
Property changes on: lucene/licenses/nekohtml-1.9.17.jar.sha1
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property