diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0c783e144d..4fd0da2ff1 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4112,6 +4112,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal LLAP_EXECUTION_MODE("hive.llap.execution.mode", "none", new StringSet("auto", "none", "all", "map", "only"), "Chooses whether query fragments will run in container or in llap"), + LLAP_IO_ETL_SKIP_FORMAT("hive.llap.io.etl.skip.format", "encode", new StringSet("none", "encode", "all"), + "For ETL queries, determines whether to skip llap io cache. By default, hive.llap.io.encode.enabled " + + "will be set to false which disables LLAP IO for text formats. Setting it to 'all' will disable LLAP IO for all" + + " formats. 'none' will not disable LLAP IO for any formats."), LLAP_OBJECT_CACHE_ENABLED("hive.llap.object.cache.enabled", true, "Cache objects (plans, hashtables, etc) in llap"), LLAP_IO_DECODING_METRICS_PERCENTILE_INTERVALS("hive.llap.io.decoding.metrics.percentiles.intervals", "30", diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 8c4d9b7de7..18ef720801 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -165,6 +165,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ kill_query.q,\ leftsemijoin.q,\ limit_pushdown.q,\ + llap_io_etl.q,\ load_dyn_part1.q,\ load_dyn_part2.q,\ load_dyn_part3.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6252013335..1e79f325f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12334,6 +12334,17 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce } } + final String llapIOETLSkipFormat = HiveConf.getVar(conf, ConfVars.LLAP_IO_ETL_SKIP_FORMAT); + if (qb.getParseInfo().hasInsertTables() || qb.isCTAS()) { + if (llapIOETLSkipFormat.equalsIgnoreCase("encode")) { + conf.setBoolean(ConfVars.LLAP_IO_ENCODE_ENABLED.varname, false); + LOG.info("Disabling LLAP IO encode as ETL query is detected"); + } else if (llapIOETLSkipFormat.equalsIgnoreCase("all")) { + conf.setBoolean(ConfVars.LLAP_IO_ENABLED.varname, false); + LOG.info("Disabling LLAP IO as ETL query is detected"); + } + } + // Check query results cache. // If no masking/filtering required, then we can check the cache now, before // generating the operator tree and going through CBO. diff --git a/ql/src/test/queries/clientpositive/llap_io_etl.q b/ql/src/test/queries/clientpositive/llap_io_etl.q new file mode 100644 index 0000000000..1e85320abf --- /dev/null +++ b/ql/src/test/queries/clientpositive/llap_io_etl.q @@ -0,0 +1,49 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.fetch.task.conversion=none; + +SET hive.llap.io.enabled=true; +set hive.llap.cache.allow.synthetic.fileid=true; + +create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + vc1 varchar(5), + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile; + +load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes; + +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; +-- disables llap io for all etl (should not see LLAP IO COUNTERS) +set hive.llap.io.etl.skip.format=all; +create table alltypes_orc stored as orc as select * from alltypes; +insert into alltypes_orc select * from alltypes; + +-- disables llap io for all etl + text (should not see LLAP IO COUNTERS) +set hive.llap.io.etl.skip.format=encode; +create table alltypes_text1 stored as textfile as select * from alltypes; +insert into alltypes_text1 select * from alltypes; + +-- does not disable llap io for etl (should see LLAP IO COUNTERS) +set hive.llap.io.etl.skip.format=none; +create table alltypes_text2 stored as textfile as select * from alltypes; +insert into alltypes_text2 select * from alltypes; + +drop table alltypes_text1; +drop table alltypes_text2; +drop table alltypes_orc; diff --git a/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out b/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out new file mode 100644 index 0000000000..1a967fafef --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out @@ -0,0 +1,203 @@ +PREHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + vc1 varchar(5), + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypes +POSTHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + vc1 varchar(5), + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypes +PREHOOK: query: load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@alltypes +POSTHOOK: query: load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@alltypes +PREHOOK: query: create table alltypes_orc stored as orc as select * from alltypes +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypes +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypes_orc +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_orc: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: insert into alltypes_orc select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_orc +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_orc: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: create table alltypes_text1 stored as textfile as select * from alltypes +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypes +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypes_text1 +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_text1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: insert into alltypes_text1 select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_text1 +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_text1: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: create table alltypes_text2 stored as textfile as select * from alltypes +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypes +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypes_text2 +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_text2: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 LLAP IO COUNTERS: + CACHE_MISS_BYTES: 244 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: insert into alltypes_text2 select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_text2 +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2 + RECORDS_OUT_1_default.alltypes_text2: 2 + RECORDS_OUT_INTERMEDIATE_Map_1: 0 + RECORDS_OUT_OPERATOR_FS_4: 2 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_SEL_3: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 + TOTAL_TABLE_ROWS_WRITTEN: 2 +Stage-1 LLAP IO COUNTERS: + CACHE_MISS_BYTES: 244 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 2 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +PREHOOK: query: drop table alltypes_text1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypes_text1 +PREHOOK: Output: default@alltypes_text1 +PREHOOK: query: drop table alltypes_text2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypes_text2 +PREHOOK: Output: default@alltypes_text2 +PREHOOK: query: drop table alltypes_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc