diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 98280c52fe9..f030d230624 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -633,6 +633,7 @@ minillaplocal.query.files=\ mm_conversions.q,\ mm_exim.q,\ mm_loaddata.q,\ + mm_loaddata_split_change.q,\ mrr.q,\ multiMapJoin1.q,\ multiMapJoin2.q,\ diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java index 462b25fa234..d414b1405b7 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java @@ -820,7 +820,8 @@ public Boolean readFileWithCache(long startTime) throws IOException, Interrupted List slices = cachedData.getData(); if (slices.isEmpty()) return false; long uncachedPrefixEnd = slices.get(0).getKnownTornStart(), - uncachedSuffixStart = slices.get(slices.size() - 1).getLastEnd(); + uncachedSuffixStart = slices.get(slices.size() - 1).getLastEnd(), + lastStripeLastStart = slices.get(slices.size() - 1).getLastStart(); Ref stripeIx = Ref.from(0); if (uncachedPrefixEnd > split.getStart()) { // TODO: can we merge neighboring splits? So we don't init so many readers. @@ -856,8 +857,9 @@ public Boolean readFileWithCache(long startTime) throws IOException, Interrupted if (uncachedSuffixStart < endOfSplit || isUnfortunate) { // Note: we assume 0-length split is correct given now LRR interprets offsets (reading an // extra row). Should we instead assume 1+ chars and add 1 for isUnfortunate? - FileSplit splitPart = new FileSplit(split.getPath(), uncachedSuffixStart, - endOfSplit - uncachedSuffixStart, hosts, inMemoryHosts); + // Do not read from uncachedSuffixStart as LineRecordReader skips first row + FileSplit splitPart = new FileSplit(split.getPath(), lastStripeLastStart, + endOfSplit - lastStripeLastStart, hosts, inMemoryHosts); if (!processOneFileSplit(splitPart, startTime, stripeIx, null)) return null; } return true; diff --git a/ql/src/test/queries/clientpositive/mm_loaddata_split_change.q b/ql/src/test/queries/clientpositive/mm_loaddata_split_change.q new file mode 100644 index 00000000000..67e4fd3f4b7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/mm_loaddata_split_change.q @@ -0,0 +1,37 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set tez.grouping.min-size=1; +set tez.grouping.max-size=2; +set mapreduce.map.memory.mb=1024; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.use.vector.serde.deserialize=true; +set hive.vectorized.use.row.serde.deserialize=true; + +drop table load0_ss; +create table load0_ss (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only"); + +load data local inpath '../../data/files/kv1.txt' into table load0_ss; +select count(1) from load0_ss; +load data local inpath '../../data/files/kv2.txt' into table load0_ss; +select count(1) from load0_ss; +load data local inpath '../../data/files/kv1.txt' into table load0_ss; +select count(1) from load0_ss; +load data local inpath '../../data/files/kv2.txt' into table load0_ss; +select count(1) from load0_ss; +load data local inpath '../../data/files/kv1.txt' into table load0_ss; +select count(1) from load0_ss; +load data local inpath '../../data/files/kv2.txt' into table load0_ss; +select count(1) from load0_ss; +load data local inpath '../../data/files/kv1.txt' into table load0_ss; +select count(1) from load0_ss; +load data local inpath '../../data/files/kv2.txt' into table load0_ss; +select count(1) from load0_ss; +load data local inpath '../../data/files/kv1.txt' into table load0_ss; +select count(1) from load0_ss; +load data local inpath '../../data/files/kv2.txt' into table load0_ss; +select count(1) from load0_ss; + +drop table load0_ss; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/mm_loaddata_split_change.q.out b/ql/src/test/results/clientpositive/llap/mm_loaddata_split_change.q.out new file mode 100644 index 00000000000..d7ecfd620dc --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/mm_loaddata_split_change.q.out @@ -0,0 +1,190 @@ +PREHOOK: query: drop table load0_ss +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table load0_ss +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table load0_ss (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@load0_ss +POSTHOOK: query: create table load0_ss (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@load0_ss +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_ss +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_ss +PREHOOK: query: select count(1) from load0_ss +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_ss +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_ss +#### A masked pattern was here #### +500 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_ss +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_ss +PREHOOK: query: select count(1) from load0_ss +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_ss +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_ss +#### A masked pattern was here #### +1000 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_ss +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_ss +PREHOOK: query: select count(1) from load0_ss +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_ss +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_ss +#### A masked pattern was here #### +1500 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_ss +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_ss +PREHOOK: query: select count(1) from load0_ss +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_ss +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_ss +#### A masked pattern was here #### +2000 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_ss +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_ss +PREHOOK: query: select count(1) from load0_ss +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_ss +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_ss +#### A masked pattern was here #### +2500 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_ss +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_ss +PREHOOK: query: select count(1) from load0_ss +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_ss +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_ss +#### A masked pattern was here #### +3000 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_ss +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_ss +PREHOOK: query: select count(1) from load0_ss +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_ss +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_ss +#### A masked pattern was here #### +3500 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_ss +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_ss +PREHOOK: query: select count(1) from load0_ss +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_ss +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_ss +#### A masked pattern was here #### +4000 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_ss +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_ss +PREHOOK: query: select count(1) from load0_ss +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_ss +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_ss +#### A masked pattern was here #### +4500 +PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load0_ss +POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_ss +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load0_ss +PREHOOK: query: select count(1) from load0_ss +PREHOOK: type: QUERY +PREHOOK: Input: default@load0_ss +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from load0_ss +POSTHOOK: type: QUERY +POSTHOOK: Input: default@load0_ss +#### A masked pattern was here #### +5000 +PREHOOK: query: drop table load0_ss +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@load0_ss +PREHOOK: Output: default@load0_ss +POSTHOOK: query: drop table load0_ss +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@load0_ss +POSTHOOK: Output: default@load0_ss