diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 18696adbfc..da7150a08d 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4428,6 +4428,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "If the query results cache is enabled for queries involving non-transactional tables." + "Users who enable this setting should be willing to tolerate some amount of stale results in the cache."), + HIVE_QUERY_RESULTS_CACHE_EXTERNAL_TABLES_ENABLED("hive.query.results.cache.external.tables.enabled", false, + "If the query results cache is enabled for queries involving external tables." + + "Users who enable this setting should be willing to tolerate some amount of stale results in the cache."), + HIVE_QUERY_RESULTS_CACHE_WAIT_FOR_PENDING_RESULTS("hive.query.results.cache.wait.for.pending.results", true, "Should a query wait for the pending results of an already running query, " + "in order to use the cached result when it becomes ready"), diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 50bfe6a06a..b76dae0802 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -611,6 +611,7 @@ minillaplocal.query.files=\ results_cache_2.q,\ results_cache_capacity.q,\ results_cache_empty_result.q,\ + results_cache_externaltable.q,\ results_cache_invalidation.q,\ results_cache_invalidation2.q,\ results_cache_lifetime.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOpMaterializationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOpMaterializationValidator.java index df216e7555..1eebb3ed03 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOpMaterializationValidator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOpMaterializationValidator.java @@ -68,6 +68,11 @@ static final Logger LOG = LoggerFactory.getLogger(HiveRelOpMaterializationValidator.class); protected String invalidMaterializationReason; + protected boolean allowExternalTables; + + public HiveRelOpMaterializationValidator(boolean allowExternalTables) { + this.allowExternalTables = allowExternalTables; + } public void validateQueryMaterialization(RelNode relNode) { try { @@ -86,8 +91,7 @@ public RelNode visit(TableScan scan) { if (tab.isTemporary()) { fail(tab.getTableName() + " is a temporary table"); } - TableType tt = tab.getTableType(); - if (tab.getTableType() == TableType.EXTERNAL_TABLE) { + if (!allowExternalTables && tab.getTableType() == TableType.EXTERNAL_TABLE) { fail(tab.getFullyQualifiedName() + " is an external table"); } return scan; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f008c4dfae..0f59b3cd4d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1752,7 +1752,10 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // Validate query materialization (materialized views, query results caching. // This check needs to occur before constant folding, which may remove some // function calls from the query plan. - HiveRelOpMaterializationValidator matValidator = new HiveRelOpMaterializationValidator(); + boolean allowCachedExternalTables = + HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_EXTERNAL_TABLES_ENABLED); + HiveRelOpMaterializationValidator matValidator = + new HiveRelOpMaterializationValidator(allowCachedExternalTables); matValidator.validateQueryMaterialization(calciteGenPlan); if (!matValidator.isValidMaterialization()) { String reason = matValidator.getInvalidMaterializationReason(); diff --git a/ql/src/test/queries/clientpositive/results_cache_externaltable.q b/ql/src/test/queries/clientpositive/results_cache_externaltable.q new file mode 100644 index 0000000000..d98aa52cdd --- /dev/null +++ b/ql/src/test/queries/clientpositive/results_cache_externaltable.q @@ -0,0 +1,25 @@ +set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; +set hive.query.results.cache.external.tables.enabled=false; + +create external table rcet (key string, value string); +load data local inpath '../../data/files/kv1.txt' overwrite into table rcet; + +select count(*) from rcet where key = 0; + +set test.comment="external table not enabled for cache, no cached result"; +set test.comment; + +explain +select count(*) from rcet where key = 0; + +set hive.query.results.cache.external.tables.enabled=true; + +select count(*) from rcet where key = 0; + +set test.comment="external table enabled, cached should be used"; +set test.comment; +explain +select count(*) from rcet where key = 0; +select count(*) from rcet where key = 0; + diff --git a/ql/src/test/results/clientpositive/llap/results_cache_externaltable.q.out b/ql/src/test/results/clientpositive/llap/results_cache_externaltable.q.out new file mode 100644 index 0000000000..5d79bf5b88 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/results_cache_externaltable.q.out @@ -0,0 +1,122 @@ +PREHOOK: query: create external table rcet (key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@rcet +POSTHOOK: query: create external table rcet (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@rcet +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table rcet +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@rcet +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table rcet +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@rcet +PREHOOK: query: select count(*) from rcet where key = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@rcet +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from rcet where key = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@rcet +#### A masked pattern was here #### +3 +test.comment="external table not enabled for cache, no cached result" +PREHOOK: query: explain +select count(*) from rcet where key = 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from rcet where key = 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: rcet + filterExpr: (UDFToDouble(key) = 0.0D) (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 0.0D) (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from rcet where key = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@rcet +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from rcet where key = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@rcet +#### A masked pattern was here #### +3 +test.comment="external table enabled, cached should be used" +PREHOOK: query: explain +select count(*) from rcet where key = 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from rcet where key = 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from rcet where key = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@rcet +POSTHOOK: query: select count(*) from rcet where key = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@rcet +3