diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index f80a945be5..b531941bc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -477,7 +477,7 @@ private DataContainer handleStaticParts(Hive db, Table table, LoadTableDesc tbd, db.loadPartition(tbd.getSourcePath(), db.getTable(tbd.getTable().getTableName()), tbd.getPartitionSpec(), tbd.getLoadFileType(), tbd.getInheritTableSpecs(), - isSkewedStoredAsDirs(tbd), work.isSrcLocal(), + tbd.getInheritLocation(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && !tbd.isMmTable(), hasFollowingStatsTask(), diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 2ec131e274..5c0aeaa042 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1681,7 +1681,8 @@ public Database getDatabaseCurrent() throws HiveException { * @return Partition object being loaded with data */ public Partition loadPartition(Path loadPath, Table tbl, Map partSpec, - LoadFileType loadFileType, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, + LoadFileType loadFileType, boolean inheritTableSpecs, boolean inheritLocation, + boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcidIUDoperation, boolean hasFollowingStatsTask, Long writeId, int stmtId, boolean isInsertOverwrite) throws HiveException { Path tblDataLocationPath = tbl.getDataLocation(); @@ -1705,10 +1706,8 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par Path oldPartPath = (oldPart != null) ? oldPart.getDataLocation() : null; Path newPartPath = null; - if (inheritTableSpecs) { - Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec)); - newPartPath = new Path(tblDataLocationPath.toUri().getScheme(), - tblDataLocationPath.toUri().getAuthority(), partPath.toUri().getPath()); + if (inheritLocation) { + newPartPath = genPartPathFromTable(tbl, partSpec, tblDataLocationPath); if(oldPart != null) { /* @@ -1725,7 +1724,8 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par } } } else { - newPartPath = oldPartPath; + newPartPath = oldPartPath == null + ? newPartPath = genPartPathFromTable(tbl, partSpec, tblDataLocationPath) : oldPartPath; } List newFiles = null; @@ -1888,6 +1888,14 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par } } + + private static Path genPartPathFromTable(Table tbl, Map partSpec, + Path tblDataLocationPath) throws MetaException { + Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec)); + return new Path(tblDataLocationPath.toUri().getScheme(), + tblDataLocationPath.toUri().getAuthority(), partPath.toUri().getPath()); + } + /** * Load Data commands for fullAcid tables write to base_x (if there is overwrite clause) or * delta_x_x directory - same as any other Acid write. This method modifies the destPath to add @@ -2213,7 +2221,7 @@ public Void call() throws Exception { // load the partition Partition newPartition = loadPartition(partPath, tbl, fullPartSpec, loadFileType, - true, numLB > 0, false, isAcid, hasFollowingStatsTask, writeId, stmtId, + true, false, numLB > 0, false, isAcid, hasFollowingStatsTask, writeId, stmtId, isInsertOverwrite); partitionsMap.put(fullPartSpec, newPartition); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 2dea254c87..8e696a7eeb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -2083,6 +2083,7 @@ private void analyzeAlterTablePartMergeFiles(ASTNode ast, LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap<>() : partSpec); ltd.setLbCtx(lbCtx); + ltd.setInheritTableSpecs(true); Task moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false)); mergeTask.addDependentTask(moveTsk); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java index f15b3c3879..af2ece44dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java @@ -38,6 +38,7 @@ private ListBucketingCtx lbCtx; private boolean inheritTableSpecs = true; //For partitions, flag controlling whether the current //table specs are to be used + private boolean inheritLocation = false; // A silly setting. private int stmtId; private Long currentWriteId; private boolean isInsertOverwrite; @@ -71,6 +72,7 @@ public LoadTableDesc(final LoadTableDesc o) { this.dpCtx = o.dpCtx; this.lbCtx = o.lbCtx; this.inheritTableSpecs = o.inheritTableSpecs; + this.inheritLocation = o.inheritLocation; this.currentWriteId = o.currentWriteId; this.table = o.table; this.partitionSpec = o.partitionSpec; @@ -207,8 +209,14 @@ public boolean getInheritTableSpecs() { return inheritTableSpecs; } + public boolean getInheritLocation() { + return inheritLocation; + } + public void setInheritTableSpecs(boolean inheritTableSpecs) { - this.inheritTableSpecs = inheritTableSpecs; + // Set inheritLocation if this is set to true explicitly. + // TODO: Who actually needs this? Might just be some be pointless legacy code. + this.inheritTableSpecs = inheritLocation = inheritTableSpecs; } public boolean isInsertOverwrite() { diff --git ql/src/test/queries/clientpositive/external_insert.q ql/src/test/queries/clientpositive/external_insert.q new file mode 100644 index 0000000000..93cd3a8125 --- /dev/null +++ ql/src/test/queries/clientpositive/external_insert.q @@ -0,0 +1,12 @@ +drop table tbl1; +CREATE TABLE tbl1 (index int, value int ) PARTITIONED BY ( created_date string ); +insert into tbl1 partition(created_date='2018-02-01') VALUES (2, 2); + +CREATE external TABLE tbl2 (index int, value int ) PARTITIONED BY ( created_date string ); +ALTER TABLE tbl2 ADD PARTITION(created_date='2018-02-01'); +ALTER TABLE tbl2 PARTITION(created_date='2018-02-01') SET LOCATION 'file:/Users/sergey/git/hivegit/itests/qtest/target/warehouse/tbl1/created_date=2018-02-01'; +select * from tbl2; +describe formatted tbl2 partition(created_date='2018-02-01'); +insert into tbl2 partition(created_date='2018-02-01') VALUES (1, 1); +select * from tbl2; +describe formatted tbl2 partition(created_date='2018-02-01'); diff --git ql/src/test/results/clientpositive/external_insert.q.out ql/src/test/results/clientpositive/external_insert.q.out new file mode 100644 index 0000000000..90aa17b8cd --- /dev/null +++ ql/src/test/results/clientpositive/external_insert.q.out @@ -0,0 +1,156 @@ +PREHOOK: query: drop table tbl1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tbl1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE tbl1 (index int, value int ) PARTITIONED BY ( created_date string ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: CREATE TABLE tbl1 (index int, value int ) PARTITIONED BY ( created_date string ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: insert into tbl1 partition(created_date='2018-02-01') VALUES (2, 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl1@created_date=2018-02-01 +POSTHOOK: query: insert into tbl1 partition(created_date='2018-02-01') VALUES (2, 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl1@created_date=2018-02-01 +POSTHOOK: Lineage: tbl1 PARTITION(created_date=2018-02-01).index SCRIPT [] +POSTHOOK: Lineage: tbl1 PARTITION(created_date=2018-02-01).value SCRIPT [] +PREHOOK: query: CREATE external TABLE tbl2 (index int, value int ) PARTITIONED BY ( created_date string ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE external TABLE tbl2 (index int, value int ) PARTITIONED BY ( created_date string ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: ALTER TABLE tbl2 ADD PARTITION(created_date='2018-02-01') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@tbl2 +POSTHOOK: query: ALTER TABLE tbl2 ADD PARTITION(created_date='2018-02-01') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@tbl2 +POSTHOOK: Output: default@tbl2@created_date=2018-02-01 +#### A masked pattern was here #### +PREHOOK: type: ALTERPARTITION_LOCATION +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@tbl2@created_date=2018-02-01 +#### A masked pattern was here #### +POSTHOOK: type: ALTERPARTITION_LOCATION +POSTHOOK: Input: default@tbl2 +POSTHOOK: Input: default@tbl2@created_date=2018-02-01 +POSTHOOK: Output: default@tbl2@created_date=2018-02-01 +#### A masked pattern was here #### +PREHOOK: query: select * from tbl2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl2 +PREHOOK: Input: default@tbl2@created_date=2018-02-01 +#### A masked pattern was here #### +POSTHOOK: query: select * from tbl2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl2 +POSTHOOK: Input: default@tbl2@created_date=2018-02-01 +#### A masked pattern was here #### +2 2 2018-02-01 +PREHOOK: query: describe formatted tbl2 partition(created_date='2018-02-01') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tbl2 +POSTHOOK: query: describe formatted tbl2 partition(created_date='2018-02-01') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tbl2 +# col_name data_type comment +index int +value int + +# Partition Information +# col_name data_type comment +created_date string + +# Detailed Partition Information +Partition Value: [2018-02-01] +Database: default +Table: tbl2 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"index\":\"true\",\"value\":\"true\"}} +#### A masked pattern was here #### + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into tbl2 partition(created_date='2018-02-01') VALUES (1, 1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl2@created_date=2018-02-01 +POSTHOOK: query: insert into tbl2 partition(created_date='2018-02-01') VALUES (1, 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl2@created_date=2018-02-01 +POSTHOOK: Lineage: tbl2 PARTITION(created_date=2018-02-01).index SCRIPT [] +POSTHOOK: Lineage: tbl2 PARTITION(created_date=2018-02-01).value SCRIPT [] +PREHOOK: query: select * from tbl2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl2 +PREHOOK: Input: default@tbl2@created_date=2018-02-01 +#### A masked pattern was here #### +POSTHOOK: query: select * from tbl2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl2 +POSTHOOK: Input: default@tbl2@created_date=2018-02-01 +#### A masked pattern was here #### +2 2 2018-02-01 +1 1 2018-02-01 +PREHOOK: query: describe formatted tbl2 partition(created_date='2018-02-01') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tbl2 +POSTHOOK: query: describe formatted tbl2 partition(created_date='2018-02-01') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tbl2 +# col_name data_type comment +index int +value int + +# Partition Information +# col_name data_type comment +created_date string + +# Detailed Partition Information +Partition Value: [2018-02-01] +Database: default +Table: tbl2 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"index\":\"true\",\"value\":\"true\"}} +#### A masked pattern was here #### + numFiles 2 + numRows 1 + rawDataSize 3 + totalSize 8 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1