diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 16d9b03..f855221 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2897,6 +2897,11 @@ public static boolean moveFile(final HiveConf conf, Path srcf, final Path destf, /* Move files one by one because source is a subdirectory of destination */ for (final FileStatus srcStatus : srcs) { + final Path destPath = new Path(destf, srcStatus.getPath().getName()); + if (replace && destFs.exists(destPath)) { + destFs.delete(destPath, false); + } + if (null == pool) { if(!destFs.rename(srcStatus.getPath(), destf)) { throw new IOException("rename for src path: " + srcStatus.getPath() + " to dest:" @@ -2907,7 +2912,6 @@ public static boolean moveFile(final HiveConf conf, Path srcf, final Path destf, @Override public Void call() throws Exception { SessionState.setCurrentSessionState(parentSession); - final Path destPath = new Path(destf, srcStatus.getPath().getName()); final String group = srcStatus.getGroup(); if(destFs.rename(srcStatus.getPath(), destf)) { if (inheritPerms) { diff --git a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out index 367dab9..69b2ba9 100644 --- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out +++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out @@ -1,10 +1,11 @@ +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT src1.key, src1.key + 1, src2.value - FROM src src1 join src src2 ON src1.key = src2.key AND src1.key = 100 + FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND src1.key = 100 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT src1.key, src1.key + 1, src2.value - FROM src src1 join src src2 ON src1.key = src2.key AND src1.key = 100 + FROM srcbucket src1 join srcbucket src2 ON src1.key = src2.key AND src1.key = 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -14,41 +15,37 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) = 100.0) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '100' (type: string) - sort order: + - Map-reduce partition columns: '100' (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(key) = 100.0) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key = 100) (type: boolean) + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '100' (type: string) - sort order: + - Map-reduce partition columns: '100' (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -56,17 +53,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + 0 + 1 outputColumnNames: _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '100' (type: string), 101.0 (type: double), _col2 (type: string) + expressions: 100 (type: int), 101 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250000 Data size: 5551000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -78,20 +75,6 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src1.key + 1, src2.value - FROM src src1 join src src2 ON src1.key = src2.key AND src1.key = 100 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src1.key + 1, src2.value - FROM src src1 join src src2 ON src1.key = src2.key AND src1.key = 100 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -100 101.0 val_100 -100 101.0 val_100 -100 101.0 val_100 -100 101.0 val_100 PREHOOK: query: EXPLAIN SELECT l_partkey, l_suppkey FROM lineitem li @@ -139,21 +122,21 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean) + predicate: ((l_shipmode = 'AIR') and (l_linenumber = 1) and l_orderkey is not null) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int) - outputColumnNames: _col0 + expressions: l_orderkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int), 1 (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), 1 (type: int) + key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: int), 1 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -161,7 +144,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col0 (type: int), _col3 (type: int) + 0 _col0 (type: int), 1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col1, _col2 Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE @@ -183,19 +166,3 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT l_partkey, l_suppkey -FROM lineitem li -WHERE li.l_linenumber = 1 AND - li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' AND l_linenumber = li.l_linenumber) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: SELECT l_partkey, l_suppkey -FROM lineitem li -WHERE li.l_linenumber = 1 AND - li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' AND l_linenumber = li.l_linenumber) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -108570 8571 -4297 1798