Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Duplicate
-
0.21.0
-
None
-
None
-
None
-
Java 1.6
Description
Hi,
I am using hadoop 0.21 vesrsion and java 1.6. Please help me to fix the issue. What version jar should i put.
The sample code with xml i have attached here.
<?xml version="1.0"?>
<Company>
<Employee>
<id>100</id>
<ename>ranjini</ename>
<dept>IT</dept>
<sal>123456</sal>
<location>nextlevel</location>
</Employee>
</Company>
import java.io.IOException; import java.util.*; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.conf.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; import org.apache.hadoop.util.*; import java.io.*; import org.apache.hadoop.mapred.lib.*; import java.io.Reader; import java.io.StringReader; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; public class ParseXml { public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> { public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String s=""; FileSystem fs=null; Configuration conf=new Configuration(); conf.set("fs.default.name","hdfs://localhost:4440/"); Path srcpath=new Path("/user/hduser/Ran/"); try { String xmlString = value.toString(); SAXBuilder builder = new SAXBuilder(); Reader in = new StringReader(xmlString); Document doc = builder.build(in); Element root = doc.getRootElement(); s =root.getChild("Employee").getChild("id").getChild("ename").getChild("dept").getChild("sal").getChild("location").getTextTrim(); output.collect(new Text(""),new Text(s)); } catch (Exception e) { e.printStackTrace(); } } } public static void main(String[] args) throws Exception { String input="/user/hduser/Ran/"; String fileoutput="/user/task/Sales/"; JobConf conf = new JobConf(ParseXml.class); conf.setJobName("file"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setNumReduceTasks(1); conf.setMapperClass(Map.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf,input); Path outPath = new Path(fileoutput); FileOutputFormat.setOutputPath(conf, outPath); FileSystem dfs = FileSystem.get(outPath.toUri(), conf); if (dfs.exists(outPath)) { dfs.delete(outPath, true); } //conf.setOutputFormat(MultiFileOutput.class); JobClient.runJob(conf); } }
When processing xml file as input via map reduce, the error occurred is
conf.Configuration: error parsing conf file: javax.xml.parsers.ParserConfigurationException: Feature 'http://apache.org/xml/features/xinclude' is not recognized. Exception in thread "main" java.lang.RuntimeException: javax.xml.parsers.ParserConfigurationException: Feature 'http://apache.org/xml/features/xinclude' is not recognized. at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:1171) at org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:1030) at org.apache.hadoop.conf.Configuration.getProps(Configuration.java:980) at org.apache.hadoop.conf.Configuration.get(Configuration.java:382) at org.apache.hadoop.util.RunJar.main(RunJar.java:109) Caused by: javax.xml.parsers.ParserConfigurationException: Feature 'http://apache.org/xml/features/xinclude' is not recognized. at org.apache.xerces.jaxp.DocumentBuilderFactoryImpl.newDocumentBuilder(Unknown Source) at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:1061) ... 4 more
Please help to fix the issue
Attachments
Attachments
Issue Links
- duplicates
-
HADOOP-5254 Xinclude setup results in a stack trace
- Resolved
- is duplicated by
-
MAPREDUCE-5667 Error in runtime in mapreduce code
- Resolved
- relates to
-
HADOOP-5254 Xinclude setup results in a stack trace
- Resolved