diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0c783e144d..4fd0da2ff1 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4112,6 +4112,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal LLAP_EXECUTION_MODE("hive.llap.execution.mode", "none", new StringSet("auto", "none", "all", "map", "only"), "Chooses whether query fragments will run in container or in llap"), + LLAP_IO_ETL_SKIP_FORMAT("hive.llap.io.etl.skip.format", "encode", new StringSet("none", "encode", "all"), + "For ETL queries, determines whether to skip llap io cache. By default, hive.llap.io.encode.enabled " + + "will be set to false which disables LLAP IO for text formats. Setting it to 'all' will disable LLAP IO for all" + + " formats. 'none' will not disable LLAP IO for any formats."), LLAP_OBJECT_CACHE_ENABLED("hive.llap.object.cache.enabled", true, "Cache objects (plans, hashtables, etc) in llap"), LLAP_IO_DECODING_METRICS_PERCENTILE_INTERVALS("hive.llap.io.decoding.metrics.percentiles.intervals", "30", diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 8c4d9b7de7..972f8a0039 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -391,6 +391,7 @@ minillap.query.files=acid_bucket_pruning.q,\ intersect_merge.q,\ llap_udf.q,\ llapdecider.q,\ + llap_io_etl.q,\ mm_dp.q,\ reduce_deduplicate.q,\ reduce_deduplicate_distinct.q, \ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6252013335..1e79f325f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12334,6 +12334,17 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce } } + final String llapIOETLSkipFormat = HiveConf.getVar(conf, ConfVars.LLAP_IO_ETL_SKIP_FORMAT); + if (qb.getParseInfo().hasInsertTables() || qb.isCTAS()) { + if (llapIOETLSkipFormat.equalsIgnoreCase("encode")) { + conf.setBoolean(ConfVars.LLAP_IO_ENCODE_ENABLED.varname, false); + LOG.info("Disabling LLAP IO encode as ETL query is detected"); + } else if (llapIOETLSkipFormat.equalsIgnoreCase("all")) { + conf.setBoolean(ConfVars.LLAP_IO_ENABLED.varname, false); + LOG.info("Disabling LLAP IO as ETL query is detected"); + } + } + // Check query results cache. // If no masking/filtering required, then we can check the cache now, before // generating the operator tree and going through CBO. diff --git a/ql/src/test/queries/clientpositive/llap_io_etl.q b/ql/src/test/queries/clientpositive/llap_io_etl.q new file mode 100644 index 0000000000..1e85320abf --- /dev/null +++ b/ql/src/test/queries/clientpositive/llap_io_etl.q @@ -0,0 +1,49 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.fetch.task.conversion=none; + +SET hive.llap.io.enabled=true; +set hive.llap.cache.allow.synthetic.fileid=true; + +create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + vc1 varchar(5), + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile; + +load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes; + +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; +-- disables llap io for all etl (should not see LLAP IO COUNTERS) +set hive.llap.io.etl.skip.format=all; +create table alltypes_orc stored as orc as select * from alltypes; +insert into alltypes_orc select * from alltypes; + +-- disables llap io for all etl + text (should not see LLAP IO COUNTERS) +set hive.llap.io.etl.skip.format=encode; +create table alltypes_text1 stored as textfile as select * from alltypes; +insert into alltypes_text1 select * from alltypes; + +-- does not disable llap io for etl (should see LLAP IO COUNTERS) +set hive.llap.io.etl.skip.format=none; +create table alltypes_text2 stored as textfile as select * from alltypes; +insert into alltypes_text2 select * from alltypes; + +drop table alltypes_text1; +drop table alltypes_text2; +drop table alltypes_orc; diff --git a/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out b/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out new file mode 100644 index 0000000000..1a967fafef --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out @@ -0,0 +1,203 @@ +PREHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + vc1 varchar(5), + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypes +POSTHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + vc1 varchar(5), + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypes +PREHOOK: query: load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@alltypes +POSTHOOK: query: load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@alltypes +PREHOOK: query: create table alltypes_orc stored as orc as select * from alltypes +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypes +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypes_orc +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_orc: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: insert into alltypes_orc select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_orc +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_orc: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: create table alltypes_text1 stored as textfile as select * from alltypes +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypes +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypes_text1 +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_text1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: insert into alltypes_text1 select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_text1 +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_text1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: create table alltypes_text2 stored as textfile as select * from alltypes +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypes +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypes_text2 +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_text2: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 LLAP IO COUNTERS: + CACHE_MISS_BYTES: 244 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: insert into alltypes_text2 select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_text2 +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_text2: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 LLAP IO COUNTERS: + CACHE_MISS_BYTES: 244 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: drop table alltypes_text1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypes_text1 +PREHOOK: Output: default@alltypes_text1 +PREHOOK: query: drop table alltypes_text2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypes_text2 +PREHOOK: Output: default@alltypes_text2 +PREHOOK: query: drop table alltypes_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc diff --git a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out index ce63ba512b..35699bf273 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out @@ -166,12 +166,12 @@ POSTHOOK: Lineage: orcfile_merge1_n1 PARTITION(ds=1,part=0).value SIMPLE [(src)s POSTHOOK: Lineage: orcfile_merge1_n1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge1_n1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] Found 6 items --rw-r--r-- 3 ### USER ### ### GROUP ### 555 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 562 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 561 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 496 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 554 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-r--r-- 3 ### USER ### ### GROUP ### 478 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 555 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 562 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 561 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 496 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 554 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 478 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge1b_n1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part diff --git a/ql/src/test/results/clientpositive/llap/orc_merge3.q.out b/ql/src/test/results/clientpositive/llap/orc_merge3.q.out index 659d862fbe..0513bac9c0 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge3.q.out @@ -163,7 +163,7 @@ POSTHOOK: Output: default@orcfile_merge3b_n0 POSTHOOK: Lineage: orcfile_merge3b_n0.key SIMPLE [(orcfile_merge3a_n0)orcfile_merge3a_n0.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: orcfile_merge3b_n0.value SIMPLE [(orcfile_merge3a_n0)orcfile_merge3a_n0.FieldSchema(name:value, type:string, comment:null), ] Found 1 items --rw-r--r-- 3 ### USER ### ### GROUP ### 2572 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 2572 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) FROM orcfile_merge3a_n0 diff --git a/ql/src/test/results/clientpositive/llap/orc_merge4.q.out b/ql/src/test/results/clientpositive/llap/orc_merge4.q.out index 81b47d3407..42df4448b7 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge4.q.out @@ -37,7 +37,7 @@ POSTHOOK: Output: default@orcfile_merge3a@ds=1 POSTHOOK: Lineage: orcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] Found 1 items --rw-r--r-- 3 ### USER ### ### GROUP ### 2530 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 2530 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge3a PARTITION (ds='1') SELECT * FROM src PREHOOK: type: QUERY @@ -63,9 +63,9 @@ POSTHOOK: Output: default@orcfile_merge3a@ds=2 POSTHOOK: Lineage: orcfile_merge3a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcfile_merge3a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] Found 1 items --rw-r--r-- 3 ### USER ### ### GROUP ### 2530 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 2530 ### HDFS DATE ### hdfs://### HDFS PATH ### Found 1 items --rw-r--r-- 3 ### USER ### ### GROUP ### 2530 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 2530 ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE orcfile_merge3b SELECT key, value FROM orcfile_merge3a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out index 53c6cfdfaa..b49fce447a 100644 --- a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out @@ -2124,7 +2124,6 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@staging_n7 PREHOOK: Output: database:default PREHOOK: Output: default@tmp_orcppd -Stage-1 LLAP IO COUNTERS: PREHOOK: query: insert into table tmp_orcppd values(null, null) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/tez_input_counters.q.out b/ql/src/test/results/clientpositive/llap/tez_input_counters.q.out index 16a45feb8b..9aa6a211d8 100644 --- a/ql/src/test/results/clientpositive/llap/tez_input_counters.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_input_counters.q.out @@ -1815,7 +1815,7 @@ Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 74 CREATED_FILES: 76 DESERIALIZE_ERRORS: 0 - RECORDS_IN_Map_1: 240 + RECORDS_IN_Map_1: 1 RECORDS_OUT_0: 74 RECORDS_OUT_1_default.testpart1: 240 RECORDS_OUT_INTERMEDIATE_Map_1: 240 @@ -1830,11 +1830,6 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_SEL_8: 74 RECORDS_OUT_OPERATOR_TS_0: 240 TOTAL_TABLE_ROWS_WRITTEN: 240 -Stage-1 LLAP IO COUNTERS: - CACHE_HIT_BYTES: 922 - NUM_DECODED_BATCHES: 148 - NUM_VECTOR_BATCHES: 148 - ROWS_EMITTED: 240 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 INPUT_DIRECTORIES_Map_1: 74