diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index a3a70ecd49..7497af1211 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -408,6 +408,7 @@ minillap.query.files=acid_bucket_pruning.q,\ orc_ppd_schema_evol_3a.q,\ global_limit.q,\ dynamic_partition_pruning_2.q,\ + results_cache_diff_fs.q,\ tez_union_dynamic_partition.q,\ tez_union_dynamic_partition_2.q,\ unionDistinct_1.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java index 1ca7c11b43..66f3b78f14 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java @@ -65,6 +65,7 @@ import org.apache.hadoop.hive.ql.hooks.Entity.Type; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.events.EventConsumer; @@ -790,7 +791,16 @@ private Path moveResultsToCacheDirectory(Path queryResultsPath) throws IOExcepti String dirName = UUID.randomUUID().toString(); Path cachedResultsPath = new Path(cacheDirPath, dirName); FileSystem fs = cachedResultsPath.getFileSystem(conf); - fs.rename(queryResultsPath, cachedResultsPath); + try { + boolean resultsMoved = Hive.moveFile(conf, queryResultsPath, cachedResultsPath, false, false, false); + if (!resultsMoved) { + throw new IOException("Failed to move " + queryResultsPath + " to " + cachedResultsPath); + } + } catch (IOException err) { + throw err; + } catch (Exception err) { + throw new IOException("Error moving " + queryResultsPath + " to " + cachedResultsPath, err); + } return cachedResultsPath; } diff --git a/ql/src/test/queries/clientpositive/results_cache_diff_fs.q b/ql/src/test/queries/clientpositive/results_cache_diff_fs.q new file mode 100644 index 0000000000..0e15b06310 --- /dev/null +++ b/ql/src/test/queries/clientpositive/results_cache_diff_fs.q @@ -0,0 +1,18 @@ +--! qt:dataset:src + +set hive.query.results.cache.enabled=true; +set hive.query.results.cache.nontransactional.tables.enabled=true; +set hive.query.results.cache.directory=pfile://${system:test.tmp.dir}/results_cache_diff_fs; +set test.comment=hive.exec.scratchdir is; +set hive.exec.scratchdir; + +explain +select count(*) from src a join src b on (a.key = b.key); +select count(*) from src a join src b on (a.key = b.key); + +set test.comment="Cache should be used for this query"; +set test.comment; +explain +select count(*) from src a join src b on (a.key = b.key); +select count(*) from src a join src b on (a.key = b.key); + diff --git a/ql/src/test/results/clientpositive/llap/results_cache_diff_fs.q.out b/ql/src/test/results/clientpositive/llap/results_cache_diff_fs.q.out new file mode 100644 index 0000000000..d4e7db8653 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/results_cache_diff_fs.q.out @@ -0,0 +1,135 @@ +#### A masked pattern was here #### +PREHOOK: query: explain +select count(*) from src a join src b on (a.key = b.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from src a join src b on (a.key = b.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 791 Data size: 6328 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from src a join src b on (a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from src a join src b on (a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +1028 +test.comment="Cache should be used for this query" +PREHOOK: query: explain +select count(*) from src a join src b on (a.key = b.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from src a join src b on (a.key = b.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select count(*) from src a join src b on (a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +POSTHOOK: query: select count(*) from src a join src b on (a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +1028