diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index 9b1a905..d4e14a8 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -93,6 +93,8 @@ private SchemaEvolution evolution; + private final boolean isAcidScan; + public LlapRecordReader(JobConf job, FileSplit split, List includedCols, String hostName, ColumnVectorProducer cvp, ExecutorService executor, InputFormat sourceInputFormat, Deserializer sourceSerDe, Reporter reporter) @@ -139,7 +141,7 @@ public LlapRecordReader(JobConf job, FileSplit split, List includedCols partitionValues = null; } - boolean isAcidScan = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); + isAcidScan = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr( job, isAcidScan, Integer.MAX_VALUE); @@ -169,8 +171,11 @@ public boolean init() { private boolean checkOrcSchemaEvolution() { for (int i = 0; i < columnCount; ++i) { - int colId = columnIds == null ? i : columnIds.get(i); - if (!evolution.isPPDSafeConversion(colId)) { + int projectedColId = columnIds == null ? i : columnIds.get(i); + // Adjust file column index for ORC struct. + // LLAP IO does not support ACID. When it supports, this would be auto adjusted. + int fileColId = OrcInputFormat.getRootColumn(!isAcidScan) + projectedColId + 1; + if (!evolution.isPPDSafeConversion(fileColId)) { LlapIoImpl.LOG.warn("Unsupported schema evolution! Disabling Llap IO for {}", split); return false; } diff --git ql/src/test/queries/clientpositive/llap_reader.q ql/src/test/queries/clientpositive/llap_reader.q new file mode 100644 index 0000000..aa2f5f0 --- /dev/null +++ ql/src/test/queries/clientpositive/llap_reader.q @@ -0,0 +1,34 @@ +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; +SET hive.vectorized.execution.enabled=true; +SET hive.llap.io.enabled=true; + +CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc; +INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4); + +ALTER TABLE test CHANGE f1 f1 bigint; +ALTER TABLE test CHANGE f2 f2 bigint; +ALTER TABLE test CHANGE f3 f3 bigint; + +-- llap counters with data and meta cache +SELECT count(f1) FROM test GROUP BY f1; +SELECT count(f1) FROM test GROUP BY f1; + +CREATE TABLE test_bigint(f1 bigint, f2 bigint, f3 bigint) stored as orc; +INSERT OVERWRITE TABLE test_bigint select * from test; +ALTER TABLE test_bigint CHANGE f1 f1 double; +ALTER TABLE test_bigint CHANGE f2 f2 double; +ALTER TABLE test_bigint CHANGE f3 f3 double; + +-- llap counters with meta cache alone +select count(f1) from test_bigint group by f1; +select count(f1) from test_bigint group by f1; + + +-- Check with ACID table +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.support.concurrency=true; +CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); +INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4'); + +-- should not have llap counters +SELECT count(f1) FROM test_acid GROUP BY f1; diff --git ql/src/test/results/clientpositive/llap/llap_reader.q.out ql/src/test/results/clientpositive/llap/llap_reader.q.out new file mode 100644 index 0000000..edbd48f --- /dev/null +++ ql/src/test/results/clientpositive/llap/llap_reader.q.out @@ -0,0 +1,222 @@ +PREHOOK: query: CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +PREHOOK: query: INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@test +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 24 + HDFS_BYTES_WRITTEN: 340 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 3 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_1_default.test: 4 +Stage-1 LLAP IO COUNTERS: + CACHE_MISS_BYTES: 24 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 4 +PREHOOK: query: ALTER TABLE test CHANGE f1 f1 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: ALTER TABLE test CHANGE f2 f2 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: ALTER TABLE test CHANGE f3 f3 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 358 + HDFS_BYTES_WRITTEN: 143 + HDFS_READ_OPS: 6 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_0: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 4 +Stage-1 LLAP IO COUNTERS: + ALLOCATED_BYTES: 262144 + ALLOCATED_USED_BYTES: 4 + CACHE_MISS_BYTES: 7 + METADATA_CACHE_MISS: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 4 + SELECTED_ROWGROUPS: 1 +1 +1 +1 +1 +PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 143 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_0: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 4 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 7 + CACHE_MISS_BYTES: 0 + METADATA_CACHE_HIT: 2 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 4 + SELECTED_ROWGROUPS: 1 +1 +1 +1 +1 +PREHOOK: query: CREATE TABLE test_bigint(f1 bigint, f2 bigint, f3 bigint) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_bigint +PREHOOK: query: INSERT OVERWRITE TABLE test_bigint select * from test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +PREHOOK: Output: default@test_bigint +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 141 + HDFS_BYTES_WRITTEN: 347 + HDFS_READ_OPS: 5 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 3 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_1_default.test_bigint: 4 +Stage-1 LLAP IO COUNTERS: + ALLOCATED_BYTES: 524288 + ALLOCATED_USED_BYTES: 8 + CACHE_HIT_BYTES: 7 + CACHE_MISS_BYTES: 14 + METADATA_CACHE_HIT: 1 + METADATA_CACHE_MISS: 1 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 4 + SELECTED_ROWGROUPS: 1 +PREHOOK: query: ALTER TABLE test_bigint CHANGE f1 f1 double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test_bigint +PREHOOK: Output: default@test_bigint +PREHOOK: query: ALTER TABLE test_bigint CHANGE f2 f2 double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test_bigint +PREHOOK: Output: default@test_bigint +PREHOOK: query: ALTER TABLE test_bigint CHANGE f3 f3 double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test_bigint +PREHOOK: Output: default@test_bigint +PREHOOK: query: select count(f1) from test_bigint group by f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_bigint +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 595 + HDFS_BYTES_WRITTEN: 143 + HDFS_READ_OPS: 6 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_0: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 4 +Stage-1 LLAP IO COUNTERS: + METADATA_CACHE_MISS: 1 +1 +1 +1 +1 +PREHOOK: query: select count(f1) from test_bigint group by f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_bigint +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 323 + HDFS_BYTES_WRITTEN: 143 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_0: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 4 +Stage-1 LLAP IO COUNTERS: + METADATA_CACHE_HIT: 1 +1 +1 +1 +1 +PREHOOK: query: CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_acid +PREHOOK: query: INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4') +PREHOOK: type: QUERY +PREHOOK: Output: default@test_acid +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 28 + HDFS_BYTES_WRITTEN: 1501 + HDFS_READ_OPS: 9 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 8 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 2 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_1_default.test_acid: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 4 +Stage-1 LLAP IO COUNTERS: + CACHE_MISS_BYTES: 28 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 4 +PREHOOK: query: SELECT count(f1) FROM test_acid GROUP BY f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_acid +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 1567 + HDFS_BYTES_WRITTEN: 143 + HDFS_READ_OPS: 12 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 4 + RECORDS_OUT_0: 4 + RECORDS_OUT_INTERMEDIATE_Map_1: 4 +1 +1 +1 +1 diff --git ql/src/test/results/clientpositive/llap_reader.q.out ql/src/test/results/clientpositive/llap_reader.q.out new file mode 100644 index 0000000..dcbd3aa --- /dev/null +++ ql/src/test/results/clientpositive/llap_reader.q.out @@ -0,0 +1,86 @@ +PREHOOK: query: CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +PREHOOK: query: INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4) +PREHOOK: type: QUERY +PREHOOK: Output: default@test +PREHOOK: query: ALTER TABLE test CHANGE f1 f1 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: ALTER TABLE test CHANGE f2 f2 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: ALTER TABLE test CHANGE f3 f3 bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test +PREHOOK: Output: default@test +PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +1 +1 +1 +1 +PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +1 +1 +1 +1 +PREHOOK: query: CREATE TABLE test_bigint(f1 bigint, f2 bigint, f3 bigint) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_bigint +PREHOOK: query: INSERT OVERWRITE TABLE test_bigint select * from test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +PREHOOK: Output: default@test_bigint +PREHOOK: query: ALTER TABLE test_bigint CHANGE f1 f1 double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test_bigint +PREHOOK: Output: default@test_bigint +PREHOOK: query: ALTER TABLE test_bigint CHANGE f2 f2 double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test_bigint +PREHOOK: Output: default@test_bigint +PREHOOK: query: ALTER TABLE test_bigint CHANGE f3 f3 double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@test_bigint +PREHOOK: Output: default@test_bigint +PREHOOK: query: select count(f1) from test_bigint group by f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_bigint +#### A masked pattern was here #### +1 +1 +1 +1 +PREHOOK: query: select count(f1) from test_bigint group by f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_bigint +#### A masked pattern was here #### +1 +1 +1 +1 +PREHOOK: query: CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_acid +PREHOOK: query: INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4') +PREHOOK: type: QUERY +PREHOOK: Output: default@test_acid +PREHOOK: query: SELECT count(f1) FROM test_acid GROUP BY f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_acid +#### A masked pattern was here #### +1 +1 +1 +1