diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index 9b1a905..748f095 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -169,8 +169,11 @@ public boolean init() { private boolean checkOrcSchemaEvolution() { for (int i = 0; i < columnCount; ++i) { - int colId = columnIds == null ? i : columnIds.get(i); - if (!evolution.isPPDSafeConversion(colId)) { + int projectedColId = columnIds == null ? i : columnIds.get(i); + // For adjusting index for ORC struct + // TODO: LLAP IO does not support ACID. Add OrcInputFormat.getRootColumn(!isAcidScan) with fileColId later + int fileColId = projectedColId + 1; + if (!evolution.isPPDSafeConversion(fileColId)) { LlapIoImpl.LOG.warn("Unsupported schema evolution! Disabling Llap IO for {}", split); return false; } diff --git ql/src/test/queries/clientpositive/llap_reader.q ql/src/test/queries/clientpositive/llap_reader.q new file mode 100644 index 0000000..8a7853c --- /dev/null +++ ql/src/test/queries/clientpositive/llap_reader.q @@ -0,0 +1,22 @@ +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; +SET hive.vectorized.execution.enabled=true; +SET hive.llap.io.enabled=true; + +CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc; +INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4); + +ALTER TABLE test CHANGE f1 f1 bigint; +ALTER TABLE test CHANGE f2 f2 bigint; +ALTER TABLE test CHANGE f3 f3 bigint; + +-- llap counters +SELECT count(f1) FROM test GROUP BY f1; + +DROP TABLE test; + +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.support.concurrency=true; +CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); +INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4'); + +SELECT count(f1) FROM test_acid GROUP BY f1; diff --git ql/src/test/results/clientpositive/llap/llap_reader.q.out ql/src/test/results/clientpositive/llap/llap_reader.q.out new file mode 100644 index 0000000..6b6b5b8 --- /dev/null +++ ql/src/test/results/clientpositive/llap/llap_reader.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +PREHOOK: query: INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@test +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 24 + HDFS_BYTES_WRITTEN: 340 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 3 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_1_default.test: 4 +Stage-1 LLAP IO COUNTERS: + CACHE_MISS_BYTES: 24 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 4 +PREHOOK: query: ALTER TABLE test CHANGE f1 f1 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: ALTER TABLE test CHANGE f2 f2 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: ALTER TABLE test CHANGE f3 f3 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 358 + HDFS_BYTES_WRITTEN: 143 + HDFS_READ_OPS: 6 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_0: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 4 +Stage-1 LLAP IO COUNTERS: + ALLOCATED_BYTES: 262144 + ALLOCATED_USED_BYTES: 4 + CACHE_MISS_BYTES: 7 + METADATA_CACHE_MISS: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 4 + SELECTED_ROWGROUPS: 1 +1 +1 +1 +1 +PREHOOK: query: DROP TABLE test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_acid +PREHOOK: query: INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4') +PREHOOK: type: QUERY +PREHOOK: Output: default@test_acid +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 28 + HDFS_BYTES_WRITTEN: 1501 + HDFS_READ_OPS: 9 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 8 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 2 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_1_default.test_acid: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 4 +Stage-1 LLAP IO COUNTERS: + CACHE_MISS_BYTES: 28 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 4 +PREHOOK: query: SELECT count(f1) FROM test_acid GROUP BY f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_acid +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 1567 + HDFS_BYTES_WRITTEN: 143 + HDFS_READ_OPS: 12 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_0: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 4 +1 +1 +1 +1 diff --git ql/src/test/results/clientpositive/llap_reader.q.out ql/src/test/results/clientpositive/llap_reader.q.out new file mode 100644 index 0000000..6c49214 --- /dev/null +++ ql/src/test/results/clientpositive/llap_reader.q.out @@ -0,0 +1,46 @@ +PREHOOK: query: CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +PREHOOK: query: INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@test +PREHOOK: query: ALTER TABLE test CHANGE f1 f1 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: ALTER TABLE test CHANGE f2 f2 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: ALTER TABLE test CHANGE f3 f3 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +1 +1 +1 +1 +PREHOOK: query: DROP TABLE test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_acid +PREHOOK: query: INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4') +PREHOOK: type: QUERY +PREHOOK: Output: default@test_acid +PREHOOK: query: SELECT count(f1) FROM test_acid GROUP BY f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_acid +#### A masked pattern was here #### +1 +1 +1 +1