Description
when running with protocol-selenium in deploy mode I observe the following behaviour
lmcgibbn@LMC-032857 /usr/local/nutch(master) $ ./runtime/deploy/bin/nutch parsechecker -dumpText "http://www.jpl.nasa.gov" 16/02/25 15:22:08 INFO parse.ParserChecker: fetching: http://www.jpl.nasa.gov 16/02/25 15:22:08 INFO plugin.PluginRepository: Plugins: looking in: /usr/local/hadoop-2.5.2/hd-tmp/hadoop-unjar6419843999522854503/classes/plugins 16/02/25 15:22:09 INFO plugin.PluginRepository: Plugin Auto-activation mode: [true] 16/02/25 15:22:09 INFO plugin.PluginRepository: Registered Plugins: 16/02/25 15:22:09 INFO plugin.PluginRepository: the nutch core extension points (nutch-extensionpoints) 16/02/25 15:22:09 INFO plugin.PluginRepository: Basic URL Normalizer (urlnormalizer-basic) 16/02/25 15:22:09 INFO plugin.PluginRepository: Html Parse Plug-in (parse-html) 16/02/25 15:22:09 INFO plugin.PluginRepository: Basic Indexing Filter (index-basic) 16/02/25 15:22:09 INFO plugin.PluginRepository: Http Protocol Plug-in (protocol-selenium) 16/02/25 15:22:09 INFO plugin.PluginRepository: SolrIndexWriter (indexer-solr) 16/02/25 15:22:09 INFO plugin.PluginRepository: HTTP Framework (lib-http) 16/02/25 15:22:09 INFO plugin.PluginRepository: Regex URL Filter (urlfilter-regex) 16/02/25 15:22:09 INFO plugin.PluginRepository: Pass-through URL Normalizer (urlnormalizer-pass) 16/02/25 15:22:09 INFO plugin.PluginRepository: Regex URL Normalizer (urlnormalizer-regex) 16/02/25 15:22:09 INFO plugin.PluginRepository: CyberNeko HTML Parser (lib-nekohtml) 16/02/25 15:22:09 INFO plugin.PluginRepository: Tika Parser Plug-in (parse-tika) 16/02/25 15:22:09 INFO plugin.PluginRepository: OPIC Scoring Plug-in (scoring-opic) 16/02/25 15:22:09 INFO plugin.PluginRepository: Anchor Indexing Filter (index-anchor) 16/02/25 15:22:09 INFO plugin.PluginRepository: HTTP Framework (lib-selenium) 16/02/25 15:22:09 INFO plugin.PluginRepository: Regex URL Filter Framework (lib-regex-filter) 16/02/25 15:22:09 INFO plugin.PluginRepository: Registered Extension-Points: 16/02/25 15:22:09 INFO plugin.PluginRepository: Nutch URL Normalizer (org.apache.nutch.net.URLNormalizer) 16/02/25 15:22:09 INFO plugin.PluginRepository: Nutch Protocol (org.apache.nutch.protocol.Protocol) 16/02/25 15:22:09 INFO plugin.PluginRepository: Nutch Segment Merge Filter (org.apache.nutch.segment.SegmentMergeFilter) 16/02/25 15:22:09 INFO plugin.PluginRepository: Nutch URL Filter (org.apache.nutch.net.URLFilter) 16/02/25 15:22:09 INFO plugin.PluginRepository: Nutch Index Writer (org.apache.nutch.indexer.IndexWriter) 16/02/25 15:22:09 INFO plugin.PluginRepository: Nutch Indexing Filter (org.apache.nutch.indexer.IndexingFilter) 16/02/25 15:22:09 INFO plugin.PluginRepository: HTML Parse Filter (org.apache.nutch.parse.HtmlParseFilter) 16/02/25 15:22:09 INFO plugin.PluginRepository: Nutch Content Parser (org.apache.nutch.parse.Parser) 16/02/25 15:22:09 INFO plugin.PluginRepository: Nutch Scoring (org.apache.nutch.scoring.ScoringFilter) 16/02/25 15:22:09 INFO protocol.RobotRulesParser: robots.txt whitelist not configured. 16/02/25 15:22:09 INFO selenium.Http: http.proxy.host = null 16/02/25 15:22:09 INFO selenium.Http: http.proxy.port = 8080 16/02/25 15:22:09 INFO selenium.Http: http.proxy.exception.list = false 16/02/25 15:22:09 INFO selenium.Http: http.timeout = 10000 16/02/25 15:22:09 INFO selenium.Http: http.content.limit = -1 16/02/25 15:22:09 INFO selenium.Http: http.agent = nutch_test/Nutch-1.12-SNAPSHOT 16/02/25 15:22:09 INFO selenium.Http: http.accept.language = en-us,en-gb,en;q=0.7,*;q=0.3 16/02/25 15:22:09 INFO selenium.Http: http.accept = text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 16/02/25 15:22:09 ERROR selenium.Http: Failed to get protocol output java.lang.NoSuchFieldError: INSTANCE at org.apache.http.impl.io.DefaultHttpRequestWriterFactory.<init>(DefaultHttpRequestWriterFactory.java:52) at org.apache.http.impl.io.DefaultHttpRequestWriterFactory.<init>(DefaultHttpRequestWriterFactory.java:56) at org.apache.http.impl.io.DefaultHttpRequestWriterFactory.<clinit>(DefaultHttpRequestWriterFactory.java:46) at org.apache.http.impl.conn.ManagedHttpClientConnectionFactory.<init>(ManagedHttpClientConnectionFactory.java:72) at org.apache.http.impl.conn.ManagedHttpClientConnectionFactory.<init>(ManagedHttpClientConnectionFactory.java:84) at org.apache.http.impl.conn.ManagedHttpClientConnectionFactory.<clinit>(ManagedHttpClientConnectionFactory.java:59) at org.apache.http.impl.conn.PoolingHttpClientConnectionManager$InternalConnectionFactory.<init>(PoolingHttpClientConnectionManager.java:493) at org.apache.http.impl.conn.PoolingHttpClientConnectionManager.<init>(PoolingHttpClientConnectionManager.java:149) at org.apache.http.impl.conn.PoolingHttpClientConnectionManager.<init>(PoolingHttpClientConnectionManager.java:138) at org.apache.http.impl.conn.PoolingHttpClientConnectionManager.<init>(PoolingHttpClientConnectionManager.java:114) at org.openqa.selenium.remote.internal.HttpClientFactory.getClientConnectionManager(HttpClientFactory.java:74) at org.openqa.selenium.remote.internal.HttpClientFactory.<init>(HttpClientFactory.java:57) at org.openqa.selenium.remote.internal.HttpClientFactory.<init>(HttpClientFactory.java:60) at org.openqa.selenium.remote.internal.ApacheHttpClient$Factory.getDefaultHttpClientFactory(ApacheHttpClient.java:251) at org.openqa.selenium.remote.internal.ApacheHttpClient$Factory.<init>(ApacheHttpClient.java:228) at org.openqa.selenium.remote.HttpCommandExecutor.getDefaultClientFactory(HttpCommandExecutor.java:96) at org.openqa.selenium.remote.HttpCommandExecutor.<init>(HttpCommandExecutor.java:70) at org.openqa.selenium.remote.HttpCommandExecutor.<init>(HttpCommandExecutor.java:58) at org.openqa.selenium.firefox.internal.NewProfileExtensionConnection.start(NewProfileExtensionConnection.java:97) at org.openqa.selenium.firefox.FirefoxDriver.startClient(FirefoxDriver.java:271) at org.openqa.selenium.remote.RemoteWebDriver.<init>(RemoteWebDriver.java:117) at org.openqa.selenium.firefox.FirefoxDriver.<init>(FirefoxDriver.java:216) at org.openqa.selenium.firefox.FirefoxDriver.<init>(FirefoxDriver.java:211) at org.openqa.selenium.firefox.FirefoxDriver.<init>(FirefoxDriver.java:207) at org.openqa.selenium.firefox.FirefoxDriver.<init>(FirefoxDriver.java:120) at org.apache.nutch.protocol.selenium.HttpWebClient.getDriverForPage(HttpWebClient.java:75) at org.apache.nutch.protocol.selenium.HttpWebClient.getHtmlPage(HttpWebClient.java:155) at org.apache.nutch.protocol.selenium.HttpResponse.readPlainContent(HttpResponse.java:244) at org.apache.nutch.protocol.selenium.HttpResponse.<init>(HttpResponse.java:168) at org.apache.nutch.protocol.selenium.Http.getResponse(Http.java:56) at org.apache.nutch.protocol.http.api.HttpBase.getProtocolOutput(HttpBase.java:261) at org.apache.nutch.parse.ParserChecker.run(ParserChecker.java:136) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) at org.apache.nutch.parse.ParserChecker.main(ParserChecker.java:265) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.main(RunJar.java:212) Fetch failed with protocol status: exception(16), lastModified=0: java.lang.NoSuchFieldError: INSTANCE