diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 55e4df3885..72a5ceca1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1920,42 +1920,20 @@ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase targetColumns.remove(f.getName()); } if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns - /* We just checked the user specified schema columns among regular table column and found some which are not - 'regular'. Now check is they are dynamic partition columns - For dynamic partitioning, - Given "create table multipart(a int, b int) partitioned by (c int, d int);" - for "insert into multipart partition(c='1',d)(d,a) values(2,3);" we expect parse tree to look like this - (TOK_INSERT_INTO - (TOK_TAB - (TOK_TABNAME multipart) - (TOK_PARTSPEC - (TOK_PARTVAL c '1') - (TOK_PARTVAL d) - ) - ) - (TOK_TABCOLNAME d a) - )*/ - List dynamicPartitionColumns = new ArrayList(); if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) { - ASTNode tokTab = (ASTNode)ast.getChild(0); - ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC); - if(tokPartSpec != null) { - for(Node n : tokPartSpec.getChildren()) { - ASTNode tokPartVal = null; - if(n instanceof ASTNode) { - tokPartVal = (ASTNode)n; - } - if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) { - assert tokPartVal.getChild(0).getType() == HiveParser.Identifier : - "Expected column name; found tokType=" + tokPartVal.getType(); - dynamicPartitionColumns.add(tokPartVal.getChild(0).getText()); - } - } + ASTNode tokTab = (ASTNode) ast.getChild(0); + ASTNode tokPartSpec = (ASTNode) tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC); + if (tokPartSpec != null) { + throw new SemanticException(generateErrorMessage(tokPartSpec, + "Partition specification is not allowed if partitions are specified in column schema: ")); } } - for(String colName : dynamicPartitionColumns) { - targetColumns.remove(colName); + + for(FieldSchema f : targetTable.getPartCols()) { + //parser only allows foo(a,b), not foo(foo.a, foo.b) + targetColumns.remove(f.getName()); } + if(!targetColumns.isEmpty()) { //Found some columns in user specified schema which are neither regular not dynamic partition columns throw new SemanticException(generateErrorMessage(tabColName, @@ -4703,7 +4681,16 @@ public RowResolver handleInsertStatementSpec(List col_list, String targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType())); } Map partSpec = qb.getMetaData().getPartSpecForAlias(dest); - if(partSpec != null) { + if(targetTableSchema.size() > targetTableCols.size()) { + // this mean column schema had partition spec specified, therefore we need to take partition colummns + // into account so that projection appropriately reorders them + // Note that target can't be NULL here since if parition spec is specified in column schema, partition clause + // couldn't have been specified (phase1 makes sure of this) + for(FieldSchema fs : target.getPartCols()) { + targetTableColNames.add(fs.getName()); + targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType())); + } + } else if(partSpec != null) { //find dynamic partition columns //relies on consistent order via LinkedHashMap for(Map.Entry partKeyVal : partSpec.entrySet()) { diff --git a/ql/src/test/queries/clientpositive/dynamic_partition_insert.q b/ql/src/test/queries/clientpositive/dynamic_partition_insert.q index ee13bd5480..19538c357e 100644 --- a/ql/src/test/queries/clientpositive/dynamic_partition_insert.q +++ b/ql/src/test/queries/clientpositive/dynamic_partition_insert.q @@ -1,3 +1,4 @@ +--! qt:dataset:src SET hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; CREATE TABLE t1_n131 (c1 BIGINT, c2 STRING); @@ -55,3 +56,33 @@ SELECT distinct value FROM SRC WHERE src.key >= 100 and src.key < 200; SHOW PARTITIONS dest2_n37; DROP TABLE dest1_n143; DROP TABLE dest2_n37; + +-- partitio spec within column spec +CREATE TABLE table1_n15 (name string, age int) PARTITIONED BY (country string, state string); +INSERT INTO table1_n15 values ('John Doe', 23, 'USA', 'CA'), ('Jane Doe', 22, 'USA', 'TX'); +SHOW PARTITIONS table1_n15; + +CREATE TABLE table2_n10 (name string, age int) PARTITIONED BY (country string, state string); + +-- full partition schema with column schema +INSERT INTO TABLE table2_n10(age, name, country, state) SELECT age, name, country, state FROM table1_n15; +SHOW PARTITIONS table2_n10; +SELECT * from table2_n10; + +-- only partition schema +INSERT INTO TABLE table2_n10(state, country) SELECT state, country FROM table1_n15; +SHOW PARTITIONS table2_n10; +SELECT * from table2_n10; + +-- full column schema with partial partition schema +INSERT INTO TABLE table2_n10(age, name, country) SELECT age, name, country FROM table1_n15; +SHOW PARTITIONS table2_n10; +SELECT * from table2_n10; + +-- partial column schema with partial partition schema +INSERT INTO TABLE table2_n10( name, country) SELECT name, country FROM table1_n15; +SHOW PARTITIONS table2_n10; +SELECT * from table2_n10; + +DROP TABLE table2_n10; +DROP TABLE table1_n15; diff --git a/ql/src/test/results/clientpositive/dynamic_partition_insert.q.out b/ql/src/test/results/clientpositive/dynamic_partition_insert.q.out index ff28dbb036..f154fa4e37 100644 --- a/ql/src/test/results/clientpositive/dynamic_partition_insert.q.out +++ b/ql/src/test/results/clientpositive/dynamic_partition_insert.q.out @@ -942,3 +942,224 @@ POSTHOOK: query: DROP TABLE dest2_n37 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest2_n37 POSTHOOK: Output: default@dest2_n37 +PREHOOK: query: CREATE TABLE table1_n15 (name string, age int) PARTITIONED BY (country string, state string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1_n15 +POSTHOOK: query: CREATE TABLE table1_n15 (name string, age int) PARTITIONED BY (country string, state string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1_n15 +PREHOOK: query: INSERT INTO table1_n15 values ('John Doe', 23, 'USA', 'CA'), ('Jane Doe', 22, 'USA', 'TX') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table1_n15 +POSTHOOK: query: INSERT INTO table1_n15 values ('John Doe', 23, 'USA', 'CA'), ('Jane Doe', 22, 'USA', 'TX') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table1_n15@country=USA/state=CA +POSTHOOK: Output: default@table1_n15@country=USA/state=TX +POSTHOOK: Lineage: table1_n15 PARTITION(country=USA,state=CA).age SCRIPT [] +POSTHOOK: Lineage: table1_n15 PARTITION(country=USA,state=CA).name SCRIPT [] +POSTHOOK: Lineage: table1_n15 PARTITION(country=USA,state=TX).age SCRIPT [] +POSTHOOK: Lineage: table1_n15 PARTITION(country=USA,state=TX).name SCRIPT [] +PREHOOK: query: SHOW PARTITIONS table1_n15 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@table1_n15 +POSTHOOK: query: SHOW PARTITIONS table1_n15 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@table1_n15 +country=USA/state=CA +country=USA/state=TX +PREHOOK: query: CREATE TABLE table2_n10 (name string, age int) PARTITIONED BY (country string, state string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2_n10 +POSTHOOK: query: CREATE TABLE table2_n10 (name string, age int) PARTITIONED BY (country string, state string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2_n10 +PREHOOK: query: INSERT INTO TABLE table2_n10(age, name, country, state) SELECT age, name, country, state FROM table1_n15 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1_n15 +PREHOOK: Input: default@table1_n15@country=USA/state=CA +PREHOOK: Input: default@table1_n15@country=USA/state=TX +PREHOOK: Output: default@table2_n10 +POSTHOOK: query: INSERT INTO TABLE table2_n10(age, name, country, state) SELECT age, name, country, state FROM table1_n15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1_n15 +POSTHOOK: Input: default@table1_n15@country=USA/state=CA +POSTHOOK: Input: default@table1_n15@country=USA/state=TX +POSTHOOK: Output: default@table2_n10@country=USA/state=CA +POSTHOOK: Output: default@table2_n10@country=USA/state=TX +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=CA).age SIMPLE [(table1_n15)table1_n15.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=CA).name SIMPLE [(table1_n15)table1_n15.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=TX).age SIMPLE [(table1_n15)table1_n15.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=TX).name SIMPLE [(table1_n15)table1_n15.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: SHOW PARTITIONS table2_n10 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@table2_n10 +POSTHOOK: query: SHOW PARTITIONS table2_n10 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@table2_n10 +country=USA/state=CA +country=USA/state=TX +PREHOOK: query: SELECT * from table2_n10 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2_n10 +PREHOOK: Input: default@table2_n10@country=USA/state=CA +PREHOOK: Input: default@table2_n10@country=USA/state=TX +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from table2_n10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2_n10 +POSTHOOK: Input: default@table2_n10@country=USA/state=CA +POSTHOOK: Input: default@table2_n10@country=USA/state=TX +#### A masked pattern was here #### +John Doe 23 USA CA +Jane Doe 22 USA TX +PREHOOK: query: INSERT INTO TABLE table2_n10(state, country) SELECT state, country FROM table1_n15 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1_n15 +PREHOOK: Input: default@table1_n15@country=USA/state=CA +PREHOOK: Input: default@table1_n15@country=USA/state=TX +PREHOOK: Output: default@table2_n10 +POSTHOOK: query: INSERT INTO TABLE table2_n10(state, country) SELECT state, country FROM table1_n15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1_n15 +POSTHOOK: Input: default@table1_n15@country=USA/state=CA +POSTHOOK: Input: default@table1_n15@country=USA/state=TX +POSTHOOK: Output: default@table2_n10@country=USA/state=CA +POSTHOOK: Output: default@table2_n10@country=USA/state=TX +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=CA).age SIMPLE [] +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=CA).name SIMPLE [] +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=TX).age SIMPLE [] +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=TX).name SIMPLE [] +PREHOOK: query: SHOW PARTITIONS table2_n10 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@table2_n10 +POSTHOOK: query: SHOW PARTITIONS table2_n10 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@table2_n10 +country=USA/state=CA +country=USA/state=TX +PREHOOK: query: SELECT * from table2_n10 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2_n10 +PREHOOK: Input: default@table2_n10@country=USA/state=CA +PREHOOK: Input: default@table2_n10@country=USA/state=TX +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from table2_n10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2_n10 +POSTHOOK: Input: default@table2_n10@country=USA/state=CA +POSTHOOK: Input: default@table2_n10@country=USA/state=TX +#### A masked pattern was here #### +John Doe 23 USA CA +NULL NULL USA CA +Jane Doe 22 USA TX +NULL NULL USA TX +PREHOOK: query: INSERT INTO TABLE table2_n10(age, name, country) SELECT age, name, country FROM table1_n15 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1_n15 +PREHOOK: Input: default@table1_n15@country=USA/state=CA +PREHOOK: Input: default@table1_n15@country=USA/state=TX +PREHOOK: Output: default@table2_n10 +POSTHOOK: query: INSERT INTO TABLE table2_n10(age, name, country) SELECT age, name, country FROM table1_n15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1_n15 +POSTHOOK: Input: default@table1_n15@country=USA/state=CA +POSTHOOK: Input: default@table1_n15@country=USA/state=TX +POSTHOOK: Output: default@table2_n10@country=USA/state=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=__HIVE_DEFAULT_PARTITION__).age SIMPLE [(table1_n15)table1_n15.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=__HIVE_DEFAULT_PARTITION__).name SIMPLE [(table1_n15)table1_n15.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: SHOW PARTITIONS table2_n10 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@table2_n10 +POSTHOOK: query: SHOW PARTITIONS table2_n10 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@table2_n10 +country=USA/state=CA +country=USA/state=TX +country=USA/state=__HIVE_DEFAULT_PARTITION__ +PREHOOK: query: SELECT * from table2_n10 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2_n10 +PREHOOK: Input: default@table2_n10@country=USA/state=CA +PREHOOK: Input: default@table2_n10@country=USA/state=TX +PREHOOK: Input: default@table2_n10@country=USA/state=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from table2_n10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2_n10 +POSTHOOK: Input: default@table2_n10@country=USA/state=CA +POSTHOOK: Input: default@table2_n10@country=USA/state=TX +POSTHOOK: Input: default@table2_n10@country=USA/state=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +John Doe 23 USA CA +NULL NULL USA CA +Jane Doe 22 USA TX +NULL NULL USA TX +John Doe 23 USA __HIVE_DEFAULT_PARTITION__ +Jane Doe 22 USA __HIVE_DEFAULT_PARTITION__ +PREHOOK: query: INSERT INTO TABLE table2_n10( name, country) SELECT name, country FROM table1_n15 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1_n15 +PREHOOK: Input: default@table1_n15@country=USA/state=CA +PREHOOK: Input: default@table1_n15@country=USA/state=TX +PREHOOK: Output: default@table2_n10 +POSTHOOK: query: INSERT INTO TABLE table2_n10( name, country) SELECT name, country FROM table1_n15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1_n15 +POSTHOOK: Input: default@table1_n15@country=USA/state=CA +POSTHOOK: Input: default@table1_n15@country=USA/state=TX +POSTHOOK: Output: default@table2_n10@country=USA/state=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=__HIVE_DEFAULT_PARTITION__).age SIMPLE [] +POSTHOOK: Lineage: table2_n10 PARTITION(country=USA,state=__HIVE_DEFAULT_PARTITION__).name SIMPLE [(table1_n15)table1_n15.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: SHOW PARTITIONS table2_n10 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@table2_n10 +POSTHOOK: query: SHOW PARTITIONS table2_n10 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@table2_n10 +country=USA/state=CA +country=USA/state=TX +country=USA/state=__HIVE_DEFAULT_PARTITION__ +PREHOOK: query: SELECT * from table2_n10 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2_n10 +PREHOOK: Input: default@table2_n10@country=USA/state=CA +PREHOOK: Input: default@table2_n10@country=USA/state=TX +PREHOOK: Input: default@table2_n10@country=USA/state=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from table2_n10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2_n10 +POSTHOOK: Input: default@table2_n10@country=USA/state=CA +POSTHOOK: Input: default@table2_n10@country=USA/state=TX +POSTHOOK: Input: default@table2_n10@country=USA/state=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +John Doe 23 USA CA +NULL NULL USA CA +Jane Doe 22 USA TX +NULL NULL USA TX +John Doe 23 USA __HIVE_DEFAULT_PARTITION__ +Jane Doe 22 USA __HIVE_DEFAULT_PARTITION__ +John Doe NULL USA __HIVE_DEFAULT_PARTITION__ +Jane Doe NULL USA __HIVE_DEFAULT_PARTITION__ +PREHOOK: query: DROP TABLE table2_n10 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2_n10 +PREHOOK: Output: default@table2_n10 +POSTHOOK: query: DROP TABLE table2_n10 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2_n10 +POSTHOOK: Output: default@table2_n10 +PREHOOK: query: DROP TABLE table1_n15 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1_n15 +PREHOOK: Output: default@table1_n15 +POSTHOOK: query: DROP TABLE table1_n15 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1_n15 +POSTHOOK: Output: default@table1_n15