Details
-
Bug
-
Status: Open
-
Major
-
Resolution: Unresolved
-
0.11
-
None
-
None
-
None
-
Mac OSX Lion 10.7.2, Hadoop (0.20.205.0), Pig (Apache Pig version 0.11.0-SNAPSHOT (r1230844).
Description
When I try to run illustrate while using a Javascript UDF, I always get a NEP....but if I do a dump/store, everything works fine.
dans-MacBook-Pro:pig danoyoung$ pig -x local
2012-01-12 22:16:57,131 [main] INFO org.apache.pig.Main - Logging error messages to: /Users/danoyoung/projects/pig/pig_1326431817128.log
2012-01-12 22:16:57,386 [main] INFO org.apache.pig.backend.hadoop.executionengine.HExecutionEngine - Connecting to hadoop file system at: file:///
2012-01-12 22:16:57.483 java[27809:1903] Unable to load realm info from SCDynamicStore
grunt> set io.sort.mb 500;
grunt> register '/Users/danoyoung/projects/pig/udf/udf.js' using org.apache.pig.scripting.js.JsScriptEngine as myfuncs;
2012-01-12 22:17:24,803 [main] INFO org.apache.pig.scripting.js.JsScriptEngine - Register scripting UDF: get_date_marker
2012-01-12 22:17:24,804 [main] INFO org.apache.pig.scripting.js.JsScriptEngine - Register scripting UDF: get_record
grunt> register '/usr/local/pig/piggybank.jar';
grunt>
grunt> a = LOAD '/Users/danoyoung/Downloads/adwords_KeywordDailyReport_1-1-2012_daily' USING org.apache.pig.piggybank.storage.XMLLoader('row') AS (doc:chararray);
grunt>
grunt> b = FOREACH a GENERATE FLATTEN(myfuncs.get_record(doc)) AS (dw_date_marker:int,ad_network_ad_group_key:long,ad_network_keyword_key:long,firstpagecpc:int,qualityscore:int,cost:float,position:float);
grunt> describe b;
b:
grunt>
grunt> illustrate b;
2012-01-12 22:17:25,642 [main] INFO org.apache.pig.backend.hadoop.executionengine.HExecutionEngine - Connecting to hadoop file system at: file:///
2012-01-12 22:17:25,846 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler - File concatenation threshold: 100 optimistic? false
2012-01-12 22:17:25,860 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size before optimization: 1
2012-01-12 22:17:25,860 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size after optimization: 1
2012-01-12 22:17:25,870 [main] INFO org.apache.pig.tools.pigstats.ScriptState - Pig script settings are added to the job
2012-01-12 22:17:25,887 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - mapred.job.reduce.markreset.buffer.percent is not set, set to default 0.3
2012-01-12 22:17:26,161 [main] INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat - Total input paths to process : 1
2012-01-12 22:17:26,721 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler - File concatenation threshold: 100 optimistic? false
2012-01-12 22:17:26,723 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size before optimization: 1
2012-01-12 22:17:26,723 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size after optimization: 1
2012-01-12 22:17:26,724 [main] INFO org.apache.pig.tools.pigstats.ScriptState - Pig script settings are added to the job
2012-01-12 22:17:26,725 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler - mapred.job.reduce.markreset.buffer.percent is not set, set to default 0.3
java.lang.NullPointerException
at org.apache.pig.scripting.js.JsFunction.exec(JsFunction.java:200)
at org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.getNext(POUserFunc.java:225)
at org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc.getNext(POUserFunc.java:262)
at org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator.getNext(PhysicalOperator.java:334)
at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach.processPlan(POForEach.java:332)
at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach.getNext(POForEach.java:284)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.runPipeline(PigGenericMapBase.java:271)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:266)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:64)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.pig.pen.LocalMapReduceSimulator.launchPig(LocalMapReduceSimulator.java:194)
at org.apache.pig.pen.ExampleGenerator.getData(ExampleGenerator.java:257)
at org.apache.pig.pen.ExampleGenerator.getData(ExampleGenerator.java:238)
at org.apache.pig.pen.LineageTrimmingVisitor.init(LineageTrimmingVisitor.java:103)
at org.apache.pig.pen.LineageTrimmingVisitor.<init>(LineageTrimmingVisitor.java:98)
at org.apache.pig.pen.ExampleGenerator.getExamples(ExampleGenerator.java:166)
at org.apache.pig.PigServer.getExamples(PigServer.java:1202)
at org.apache.pig.tools.grunt.GruntParser.processIllustrate(GruntParser.java:698)
at org.apache.pig.tools.pigscript.parser.PigScriptParser.Illustrate(PigScriptParser.java:591)
at org.apache.pig.tools.pigscript.parser.PigScriptParser.parse(PigScriptParser.java:306)
at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:188)
at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:164)
at org.apache.pig.tools.grunt.Grunt.run(Grunt.java:69)
at org.apache.pig.Main.run(Main.java:523)
at org.apache.pig.Main.main(Main.java:148)
2012-01-12 22:17:26,767 [main] ERROR org.apache.pig.tools.grunt.Grunt - ERROR 2997: Encountered IOException. Exception : null
Details at logfile: /Users/danoyoung/projects/pig/pig_1326431817128.log
grunt>
Here's the log details:
Pig Stack Trace
---------------
ERROR 2997: Encountered IOException. Exception : null
java.io.IOException: Exception : null
at org.apache.pig.PigServer.getExamples(PigServer.java:1208)
at org.apache.pig.tools.grunt.GruntParser.processIllustrate(GruntParser.java:698)
at org.apache.pig.tools.pigscript.parser.PigScriptParser.Illustrate(PigScriptParser.java:591)
at org.apache.pig.tools.pigscript.parser.PigScriptParser.parse(PigScriptParser.java:306)
at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:188)
at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:164)
at org.apache.pig.tools.grunt.Grunt.run(Grunt.java:69)
at org.apache.pig.Main.run(Main.java:523)
at org.apache.pig.Main.main(Main.java:148)
================================================================================
Here's the Javascript UDF:
get_record.outputSchema = "dw_date_marker:int,ad_network_ad_group_key:long,ad_network_keyword_key:long,firstpagecpc:int,qualityscore:int,cost:float,position:float";
function get_record(doc){
var rec = new Object;
rec.dt = doc.substring(doc.search(/date=/ig)+6,doc.search(/\" campaignid=/ig));
//rec.campaign_id = doc.substring(doc.search(/campaignid=/ig)+12,doc.search(/\" adgroupid=/ig));
rec.ad_network_ad_group_key = doc.substring(doc.search(/adgroupid=/ig)+11,doc.search(/\" keywordid=/ig));
rec.ad_network_keyword_key = doc.substring(doc.search(/keywordid=/ig)+11,doc.search(/\" keyword=/ig));
rec.firstpagecpc = doc.substring(doc.search(/firstpagecpc=/ig)+14,doc.search(/\" qualityscore=/ig))/1000000;
rec.qualityscore = doc.substring(doc.search(/qualityscore=/ig)+14,doc.search(/\" imps=/ig));
rec.cost = doc.substring(doc.search(/cost=/ig)+6,doc.search(/\" pos=/ig))/1000000;
rec.position = doc.substring(doc.search(/pos=/ig)+5,doc.search(/\"><\/row/ig));
rec.dw_date_marker = get_date_marker(rec.dt);
return
{dw_date_marker:rec.dw_date_marker, ad_network_ad_group_key:rec.ad_network_ad_group_key, ad_network_keyword_key:rec.ad_network_keyword_key, firstpagecpc:rec.firstpagecpc, qualityscore:rec.qualityscore, cost:rec.cost, position:rec.position};
}
function get_date_marker(dt){
var dateMarkers =
;
var rec = new Object;
rec.dt = dt.replace(/-/g, "_");
rec.date_marker = dateMarkers["_" + rec.dt];
return rec.date_marker;
}