diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index ca86301..cb678df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import static com.google.common.base.Preconditions.checkNotNull; - import java.beans.DefaultPersistenceDelegate; import java.beans.Encoder; import java.beans.ExceptionListener; @@ -82,7 +80,6 @@ import java.util.zip.InflaterInputStream; import org.antlr.runtime.CommonToken; -import org.apache.calcite.util.ChunkList; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.WordUtils; @@ -2844,11 +2841,7 @@ private static void getMRTasks(List> tasks, List fullPartSpec = new LinkedHashMap(partSpec); - Warehouse.makeSpecFromName(fullPartSpec, partPath); - fullPartSpecs.add(fullPartSpec); + fullPartSpecs.add(extractPartSpecFromPath(partSpec, partPath)); } return fullPartSpecs; } catch (IOException e) { @@ -2856,6 +2849,33 @@ private static void getMRTasks(List> tasks, List extractPartSpecFromPath( + Map partSpec, Path partPath) { + // generate a full partition specification + LinkedHashMap fullPartSpec = new LinkedHashMap(); + Warehouse.makeSpecFromName(fullPartSpec, partPath); + + // retain partSpecs only.. we believe the path is created as part-spec + List nonPartSpecs = new ArrayList(); + for (Map.Entry entry : fullPartSpec.entrySet()) { + String key = entry.getKey(); + String value = entry.getValue(); + if (!partSpec.containsKey(key)) { + nonPartSpecs.add(key); + continue; + } + String expected = partSpec.get(key); + if (StringUtils.isNotEmpty(expected) && !expected.equals(value)) { + // should not happen + LOG.warn("Path " + entry + " is not consistent with expected " + key + "=" + expected); + } + } + for (String nonPartSpec : nonPartSpecs) { + fullPartSpec.remove(nonPartSpec); + } + return fullPartSpec; + } + public static StatsPublisher getStatsPublisher(JobConf jc) { StatsFactory factory = StatsFactory.newFactory(jc); return factory == null ? null : factory.getStatsPublisher(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 396c070..bc1fd94 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1590,8 +1590,8 @@ private void constructOneLBLocationMap(FileStatus fSta, "partitions " + partPath + " is not a directory !"; // generate a full partition specification - LinkedHashMap fullPartSpec = new LinkedHashMap(partSpec); - Warehouse.makeSpecFromName(fullPartSpec, partPath); + LinkedHashMap fullPartSpec = + Utilities.extractPartSpecFromPath(partSpec, partPath); Partition newPartition = loadPartition(partPath, tbl, fullPartSpec, replace, holdDDLTime, true, listBucketingEnabled, false, isAcid); partitionsMap.put(fullPartSpec, newPartition); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java index 3f07ea7..857c45b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -30,8 +29,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.Utilities; /** * Conditional task resolution interface. This is invoked at run time to get the @@ -317,11 +316,8 @@ private void generateActualTasks(HiveConf conf, List fullPartSpec = new LinkedHashMap( - dpCtx.getPartSpec()); - Warehouse.makeSpecFromName(fullPartSpec, status[i].getPath()); - PartitionDesc pDesc = new PartitionDesc(tblDesc, (LinkedHashMap) fullPartSpec); - return pDesc; + return new PartitionDesc(tblDesc, + Utilities.extractPartSpecFromPath(dpCtx.getPartSpec(), status[i].getPath())); } private void setupMapRedWork(HiveConf conf, MapWork mWork, long targetSize, long totalSize) { diff --git a/ql/src/test/queries/clientpositive/dynamic_partition_insert_external.q b/ql/src/test/queries/clientpositive/dynamic_partition_insert_external.q new file mode 100644 index 0000000..be29cb2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/dynamic_partition_insert_external.q @@ -0,0 +1,23 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/HIVE-11662/somekey=somevalue; + +CREATE TABLE t1 (c1 BIGINT, c2 STRING); + +CREATE EXTERNAL TABLE t2 (c1 INT, c2 STRING) +PARTITIONED BY (p1 STRING) +LOCATION '${system:test.tmp.dir}/HIVE-11662/somekey=somevalue'; + +LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1; +LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1; +LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1; +LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1; +LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1; + +SET hive.exec.dynamic.partition.mode=nonstrict; +INSERT OVERWRITE TABLE t2 partition(p1) SELECT *,c1 AS p1 FROM t1 DISTRIBUTE BY p1; + +SELECT * FROM t2; + +DROP TABLE t1; +DROP TABLE t2; + +dfs -rmr ${system:test.tmp.dir}/HIVE-11662 \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/dynamic_partition_insert_external.q.out b/ql/src/test/results/clientpositive/dynamic_partition_insert_external.q.out new file mode 100644 index 0000000..bfb6054 --- /dev/null +++ b/ql/src/test/results/clientpositive/dynamic_partition_insert_external.q.out @@ -0,0 +1,144 @@ +PREHOOK: query: CREATE TABLE t1 (c1 BIGINT, c2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: CREATE TABLE t1 (c1 BIGINT, c2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE EXTERNAL TABLE t2 (c1 INT, c2 STRING) +PARTITIONED BY (p1 STRING) +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: CREATE EXTERNAL TABLE t2 (c1 INT, c2 STRING) +PARTITIONED BY (p1 STRING) +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dynamic_partition_insert.txt' INTO TABLE t1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: INSERT OVERWRITE TABLE t2 partition(p1) SELECT *,c1 AS p1 FROM t1 DISTRIBUTE BY p1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE t2 partition(p1) SELECT *,c1 AS p1 FROM t1 DISTRIBUTE BY p1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2@p1=1 +POSTHOOK: Output: default@t2@p1=2 +POSTHOOK: Output: default@t2@p1=3 +POSTHOOK: Output: default@t2@p1=4 +POSTHOOK: Output: default@t2@p1=5 +POSTHOOK: Lineage: t2 PARTITION(p1=1).c1 EXPRESSION [(t1)t1.FieldSchema(name:c1, type:bigint, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=1).c2 SIMPLE [(t1)t1.FieldSchema(name:c2, type:string, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=2).c1 EXPRESSION [(t1)t1.FieldSchema(name:c1, type:bigint, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=2).c2 SIMPLE [(t1)t1.FieldSchema(name:c2, type:string, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=3).c1 EXPRESSION [(t1)t1.FieldSchema(name:c1, type:bigint, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=3).c2 SIMPLE [(t1)t1.FieldSchema(name:c2, type:string, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=4).c1 EXPRESSION [(t1)t1.FieldSchema(name:c1, type:bigint, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=4).c2 SIMPLE [(t1)t1.FieldSchema(name:c2, type:string, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=5).c1 EXPRESSION [(t1)t1.FieldSchema(name:c1, type:bigint, comment:null), ] +POSTHOOK: Lineage: t2 PARTITION(p1=5).c2 SIMPLE [(t1)t1.FieldSchema(name:c2, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@p1=1 +PREHOOK: Input: default@t2@p1=2 +PREHOOK: Input: default@t2@p1=3 +PREHOOK: Input: default@t2@p1=4 +PREHOOK: Input: default@t2@p1=5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t2@p1=1 +POSTHOOK: Input: default@t2@p1=2 +POSTHOOK: Input: default@t2@p1=3 +POSTHOOK: Input: default@t2@p1=4 +POSTHOOK: Input: default@t2@p1=5 +#### A masked pattern was here #### +1 one 1 +1 one 1 +1 one 1 +1 one 1 +1 one 1 +2 two 2 +2 two 2 +2 two 2 +2 two 2 +2 two 2 +3 three 3 +3 three 3 +3 three 3 +3 three 3 +3 three 3 +4 four 4 +4 four 4 +4 four 4 +4 four 4 +4 four 4 +5 five 5 +5 five 5 +5 five 5 +5 five 5 +5 five 5 +PREHOOK: query: DROP TABLE t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: DROP TABLE t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: DROP TABLE t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: DROP TABLE t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +#### A masked pattern was here ####