diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 900259f..12a43a2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -696,7 +696,8 @@ WHEN NOT MATCHED THEN INSERT VALUES(source.a2, source.b2) if(numWhenMatchedDeleteClauses + numWhenMatchedUpdateClauses == 2 && extraPredicate == null) { throw new SemanticException(ErrorMsg.MERGE_PREDIACTE_REQUIRED, ctx.getCmd()); } - boolean validating = handleCardinalityViolation(rewrittenQueryStr, target, onClauseAsText, targetTable); + boolean validating = handleCardinalityViolation(rewrittenQueryStr, target, onClauseAsText, + targetTable, numWhenMatchedDeleteClauses == 0 && numWhenMatchedUpdateClauses == 0); ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd()); Context rewrittenCtx = rr.rewrittenCtx; ASTNode rewrittenTree = rr.rewrittenTree; @@ -828,13 +829,18 @@ private boolean isTargetTable(Entity entity, Table targetTable) { * @return true if another Insert clause was added */ private boolean handleCardinalityViolation(StringBuilder rewrittenQueryStr, ASTNode target, - String onClauseAsString, Table targetTable) + String onClauseAsString, Table targetTable, + boolean onlyHaveWhenNotMatchedClause) throws SemanticException { if(!conf.getBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK)) { LOG.info("Merge statement cardinality violation check is disabled: " + HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK.varname); return false; } + if(onlyHaveWhenNotMatchedClause) { + //if no update or delete in Merge, there is no need to to do cardinality check + return false; + } //this is a tmp table and thus Session scoped and acid requires SQL statement to be serial in a // given session, i.e. the name can be fixed across all invocations String tableName = "merge_tmp_table"; @@ -873,7 +879,7 @@ private boolean handleCardinalityViolation(StringBuilder rewrittenQueryStr, ASTN catch(HiveException|MetaException e) { throw new SemanticException(e.getMessage(), e); } - return false; + return true; } /** * @param onClauseAsString - because there is no clone() and we need to use in multiple places diff --git ql/src/test/queries/clientpositive/sqlmerge.q ql/src/test/queries/clientpositive/sqlmerge.q index 313e999..deaf91e 100644 --- ql/src/test/queries/clientpositive/sqlmerge.q +++ ql/src/test/queries/clientpositive/sqlmerge.q @@ -2,11 +2,17 @@ set hive.mapred.mode=nonstrict; set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.explain.user=false; +set hive.merge.cardinality.check=true; create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); create table nonAcidOrcTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false'); +--expect a cardinality check because there is update and hive.merge.cardinality.check=true by default explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a WHEN MATCHED AND s.a > 8 THEN DELETE WHEN MATCHED THEN UPDATE SET b = 7 WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b); + +--now we expect no cardinality check since only have insert clause +explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a +WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b); diff --git ql/src/test/results/clientpositive/llap/sqlmerge.q.out ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 486e812..2a3d7db 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -266,3 +266,104 @@ STAGE PLANS: Stage: Stage-9 Stats-Aggr Operator +PREHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a +WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) +PREHOOK: type: QUERY +POSTHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a +WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Execution mode: llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: b (type: int) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 a (type: int) + 1 a (type: int) + outputColumnNames: _col0, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: _col0 is null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col5 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.acidtbl + + Stage: Stage-3 + Stats-Aggr Operator +