diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index d2f8861..0a5cf00 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -109,7 +109,7 @@ private boolean shouldRewrite(ASTNode tree) { } } - private void handlePartialPartitionSpec(Map partSpec) throws + private void handlePartialPartitionSpec(Map partSpec, ColumnStatsAutoGatherContext context) throws SemanticException { // If user has fully specified partition, validate that partition exists @@ -120,7 +120,7 @@ private void handlePartialPartitionSpec(Map partSpec) throws try { // for static partition, it may not exist when HIVESTATSCOLAUTOGATHER is // set to true - if (!conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER)) { + if (context == null) { if ((partValsSpecified == tbl.getPartitionKeys().size()) && (db.getPartition(tbl, partSpec, false, null, false) == null)) { throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_PARTITION.getMsg() @@ -372,7 +372,7 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { if (isPartitionStats) { isTableLevel = false; partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf); - handlePartialPartitionSpec(partSpec); + handlePartialPartitionSpec(partSpec, null); } else { isTableLevel = true; } @@ -442,7 +442,7 @@ public ASTNode rewriteAST(ASTNode ast, ColumnStatsAutoGatherContext context) if (isPartitionStats) { isTableLevel = false; partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf); - handlePartialPartitionSpec(partSpec); + handlePartialPartitionSpec(partSpec, context); } else { isTableLevel = true; } diff --git a/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values_autogather.q b/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values_autogather.q new file mode 100644 index 0000000..5f5cb42 --- /dev/null +++ b/ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values_autogather.q @@ -0,0 +1,17 @@ +set hive.stats.column.autogather=true; +DROP TABLE Employee_Part; + +CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK'); + +-- specify invalid values for the partitioning keys +explain +analyze table Employee_Part partition (employeeSalary='4000.0', country='Canada') compute statistics for columns employeeName, employeeID; +analyze table Employee_Part partition (employeeSalary='4000.0', country='Canada') compute statistics for columns employeeName, employeeID; diff --git a/ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values_autogather.q.out b/ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values_autogather.q.out new file mode 100644 index 0000000..3261f78 --- /dev/null +++ b/ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values_autogather.q.out @@ -0,0 +1,69 @@ +PREHOOK: query: DROP TABLE Employee_Part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE Employee_Part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@Employee_Part +POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@Employee_Part +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {employeesalary=4000.0, country=Canada}