Description
This is an ORC fix for a regression introduced by ORC-540. It has also been tracked on HIVE-22348, but it turned out an ORC fix is needed.
With ORC 1.5.6 + ORC-540 picked, the following issue can be reproduced (I will provide a unit test here for confirmation)
-Dtest=TestMiniLlapLocalCliDriver -Dqfile=orc_ppd_schema_evol_3a.q can fail if schema evolution and force positional evolution is switched on:
use case
SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; SET hive.fetch.task.conversion=none; SET hive.cbo.enable=false; SET hive.map.aggr=false; CREATE TABLE orc_ppd_n3(t tinyint, si smallint, i int, b bigint, f float, d double, bo boolean, s string, c char(50), v varchar(50), da date, `dec` decimal(4,2), bin binary) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); insert overwrite table orc_ppd_n3 select t, si, i, b, f, d, bo, s, cast(s as char(50)) as c, cast(s as varchar(50)) as v, da, `dec`, bin from orc_ppd_staging_n2 order by t, si, i, b, f, d, bo, s, c, v, da, `dec`, bin; ... alter table orc_ppd_n3 add columns (boo boolean); SET hive.optimize.index.filter=true; -- ppd on newly added column select count(*) from orc_ppd_n3 where si = 442 or boo is not null or boo = false;
2019-10-15T07:19:07,130 INFO [TezTR-94589_1_87_0_0_0] orc.OrcInputFormat: ORC pushdown predicate: leaf-0 = (EQUALS si 442), leaf-1 = (IS_NULL boo), leaf-2 = (EQUALS boo false), expr = (or leaf-0 (not leaf-1) leaf-2) 2019-10-15T07:19:07,130 INFO [TezTR-94589_1_87_0_0_0] orc.ReaderImpl: Reading ORC rows from file:/Users/lbodor/CDH/hive/itests/qtest/target/localfs/warehouse/orc_ppd_n3/000000_0 with {include: [true, false, true, false, false, false, false, false, false, false, false, false, false, false, true], offset: 3, length: 54928, sarg: leaf-0 = (EQUALS si 442), leaf-1 = (IS_NULL boo), leaf-2 = (EQUALS boo false), expr = (or leaf-0 (not leaf-1) leaf-2), columns: ['null', 'null', 'si', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'null', 'boo'], schema: struct<t:string,si:smallint,i:int,b:bigint,f:string,d:double,bo:boolean,s:string,c:char(50),v:varchar(50),da:date,dec:decimal(4,2),bin:binary,boo:boolean>, includeAcidColumns: true} 2019-10-15T07:19:07,133 ERROR [TezTR-94589_1_87_0_0_0] tez.TezProcessor: java.lang.RuntimeException: java.io.IOException: java.io.IOException: java.lang.ArrayIndexOutOfBoundsException: 14 at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:206) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:145) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:111) at org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:157) at org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83) at org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:703) at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:662) at org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150) at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:532) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:178) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:266) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1876) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool$WrappedCallable.call(StatsRecordingThreadPool.java:118) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.io.IOException: java.io.IOException: java.lang.ArrayIndexOutOfBoundsException: 14 at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97) at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57) at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:422) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:203) ... 26 more Caused by: java.io.IOException: java.lang.ArrayIndexOutOfBoundsException: 14 at org.apache.hadoop.hive.llap.io.api.impl.LlapInputFormat.getRecordReader(LlapInputFormat.java:131) at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:419) ... 27 more Caused by: java.lang.ArrayIndexOutOfBoundsException: 14 at org.apache.orc.impl.RecordReaderImpl$SargApplier.<init>(RecordReaderImpl.java:894) at org.apache.orc.impl.RecordReaderImpl.<init>(RecordReaderImpl.java:221) at org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.<init>(RecordReaderImpl.java:67) at org.apache.hadoop.hive.ql.io.orc.ReaderImpl.rowsOptions(ReaderImpl.java:83) at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.createReaderFromFile(OrcInputFormat.java:336) at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$OrcRecordReader.<init>(OrcInputFormat.java:246) at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getRecordReader(OrcInputFormat.java:2033) at org.apache.hadoop.hive.llap.io.api.impl.LlapInputFormat.getRecordReader(LlapInputFormat.java:113) ... 28 more
Attachments
Issue Links
- is related to
-
ORC-571 ArrayIndexOutOfBoundsException in StripePlanner.readRowIndex
- Closed
- links to