Details
-
Bug
-
Status: Closed
-
Major
-
Resolution: Fixed
-
1.6.1
-
None
-
Spark 2.3.2
Description
Create bloom datamap fails with null pointer exception
create table brinjal_bloom (imei string,AMSize string,channelsId string,ActiveCountry string, Activecity string,gamePointId double,deviceInformationId double,productionDate Timestamp,deliveryDate timestamp,deliverycharge double) STORED BY 'carbondata' TBLPROPERTIES('table_blocksize'='1');
LOAD DATA INPATH 'hdfs://hacluster/chetan/vardhandaterestruct.csv' INTO TABLE brinjal_bloom OPTIONS('DELIMITER'=',', 'QUOTECHAR'= '"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'= 'imei,deviceInformationId,AMSize,channelsId,ActiveCountry,Activecity,gamePointId,productionDate,deliveryDate,deliverycharge');
0: jdbc:hive2://10.20.255.171:23040/default> CREATE DATAMAP dm_brinjal4 ON TABLE brinjal_bloom USING 'bloomfilter' DMPROPERTIES ('INDEX_COLUMNS' = 'AMSize', 'BLOOM_SIZE'='640000', 'BLOOM_FPP'='0.00001');
Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 210.0 failed 4 times, most recent failure: Lost task 0.3 in stage 210.0 (TID 1477, vm2, executor 2): java.lang.NullPointerException
at org.apache.carbondata.core.datamap.Segment.getCommittedIndexFile(Segment.java:150)
at org.apache.carbondata.core.util.BlockletDataMapUtil.getTableBlockUniqueIdentifiers(BlockletDataMapUtil.java:198)
at org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getTableBlockIndexUniqueIdentifiers(BlockletDataMapFactory.java:176)
at org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getDataMaps(BlockletDataMapFactory.java:154)
at org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.getSegmentProperties(BlockletDataMapFactory.java:425)
at org.apache.carbondata.datamap.IndexDataMapRebuildRDD.internalCompute(IndexDataMapRebuildRDD.scala:359)
at org.apache.carbondata.spark.rdd.CarbonRDD.compute(CarbonRDD.scala:84)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace: (state=,code=0)