Description
the getURI() Method won't resolve the URL correctly if a baseUri is provided without the trailing slash and the relative url to be resolved does not start with "/". Under that circumstances it will resolve to: http://example.comRelativeUrl.
edit: previous patch did solve the problem only partially.
Modified methods (whole class attached):
public LinkExtractor(LinkTask base, Map<String, String> elements)
{ super(); this.base = base; this.elements = elements; this.setBaseUri(base.getURI()); } @Override
public void startElement(String uri, String loc, String raw, Attributes att) throws SAXException {
if (checkBase && BASE_ELEMENT.equalsIgnoreCase(loc) && att.getValue(BASE_ATTRIBUTE) != null) {
try
catch (URISyntaxException e)
{ log.debug("Base URI not valid: " + att.getValue(BASE_ATTRIBUTE)); }}
Iterator<String> it = elements.keySet().iterator();
String elem, linkAtt;
while (it.hasNext()) {
elem = it.next();
linkAtt = elements.get(elem);
if (elem.equalsIgnoreCase(loc) && att.getValue(linkAtt) != null) {
link = getURI(att.getValue(linkAtt));
log.debug("Found element: " + elem + " with link: " + link);
if (link != null)
}
}
}
public void setBaseUri(URI baseUri) {
if (baseUri.toString().endsWith(baseUri.getHost())) {
try
catch (URISyntaxException e)
{ log.error("could not fix base URI", e); }} else
{ this.baseUri = baseUri; }}