diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 4246d68..f1f3c69 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -460,7 +460,14 @@ "to fail because of this, set hive.stats.atomic=false", true), STATS_SKIPPING_BY_ERROR(30017, "Skipping stats aggregation by error {0}", true), ORC_CORRUPTED_READ(30018, "Corruption in ORC data encountered. To skip reading corrupted " - + "data, set " + HiveConf.ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA + " to true"); + + "data, set " + HiveConf.ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA + " to true"), + UPDATEDELETE_PARSE_ERROR(30020, "Encountered parse error while parsing rewritten update or " + + "delete query"), + UPDATEDELETE_IO_ERROR(30021, "Encountered I/O error while parsing rewritten update or " + + "delete query"), + UPDATE_CANNOT_UPDATE_PART_VALUE(30022, "Updating values of partition columns is not supported"), + + ; private int errorCode; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index a76cad7..8bf3697 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -331,6 +331,9 @@ TOK_RESOURCE_LIST; TOK_COMPACT; TOK_SHOW_COMPACTIONS; TOK_SHOW_TRANSACTIONS; +TOK_DELETE_FROM; +TOK_UPDATE_TABLE; +TOK_SET_COLUMNS_CLAUSE; } @@ -469,6 +472,7 @@ import java.util.HashMap; xlateMap.put("KW_DEFINED", "DEFINED"); xlateMap.put("KW_SUBQUERY", "SUBQUERY"); xlateMap.put("KW_REWRITE", "REWRITE"); + xlateMap.put("KW_UPDATE", "UPDATE"); // Operators xlateMap.put("DOT", "."); @@ -638,6 +642,8 @@ execStatement | exportStatement | importStatement | ddlStatement + | deleteStatement + | updateStatement ; loadStatement @@ -2208,3 +2214,34 @@ limitClause : KW_LIMIT num=Number -> ^(TOK_LIMIT $num) ; + +//DELETE FROM WHERE ...; +deleteStatement +@init { pushMsg("delete statement", state); } +@after { popMsg(state); } + : + KW_DELETE KW_FROM tableName (whereClause)? -> ^(TOK_DELETE_FROM tableName whereClause?) + ; + +/*SET = (3 + col2)*/ +columnAssignmentClause + : + tableOrColumn EQUAL^ atomExpression + ; + +/*SET col1 = 5, col2 = (4 + col4), ...*/ +setColumnsClause + : + KW_SET columnAssignmentClause (COMMA columnAssignmentClause)* -> ^(TOK_SET_COLUMNS_CLAUSE columnAssignmentClause* ) + ; + +/* + UPDATE + SET col1 = val1, col2 = val2... WHERE ... +*/ +updateStatement +@init { pushMsg("update statement", state); } +@after { popMsg(state); } + : + KW_UPDATE tableName setColumnsClause whereClause? -> ^(TOK_UPDATE_TABLE tableName setColumnsClause whereClause?) + ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 7a71ec7..f743614 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -37,6 +37,7 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import com.google.common.annotations.VisibleForTesting; import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeWizard; import org.antlr.runtime.tree.TreeWizard.ContextVisitor; @@ -85,6 +86,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; @@ -5940,18 +5942,24 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) ArrayList vecCol = new ArrayList(); - try { - StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc - .getDeserializer().getObjectInspector(); - List fields = rowObjectInspector - .getAllStructFieldRefs(); - for (int i = 0; i < fields.size(); i++) { - vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils - .getTypeInfoFromObjectInspector(fields.get(i) - .getFieldObjectInspector()), "", false)); + if (deleting()) { + vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), + TypeInfoUtils.getTypeInfoFromObjectInspector(VirtualColumn.ROWID.getObjectInspector()), + "", true)); + } else { + try { + StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc + .getDeserializer().getObjectInspector(); + List fields = rowObjectInspector + .getAllStructFieldRefs(); + for (int i = 0; i < fields.size(); i++) { + vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils + .getTypeInfoFromObjectInspector(fields.get(i) + .getFieldObjectInspector()), "", false)); + } + } catch (Exception e) { + throw new SemanticException(e.getMessage(), e); } - } catch (Exception e) { - throw new SemanticException(e.getMessage(), e); } RowSchema fsRS = new RowSchema(vecCol); @@ -5964,6 +5972,17 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) (dest_tab.getSortCols() != null && dest_tab.getSortCols().size() > 0 && conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCESORTING)))); + // If this table is working with ACID semantics, turn off merging + Class[] interfaces = dest_tab.getOutputFormatClass().getInterfaces(); + boolean sawAcid = false; + for (Class iface : interfaces) { + if (iface.equals(AcidOutputFormat.class)) { + sawAcid = true; + break; + } + } + canBeMerged &= !sawAcid; + FileSinkDesc fileSinkDesc = new FileSinkDesc( queryTmpdir, table_desc, @@ -6053,16 +6072,32 @@ Operator genConversionSelectOperator(String dest, QB qb, Operator input, outColumnCnt += dpCtx.getNumDPCols(); } - if (inColumnCnt != outColumnCnt) { - String reason = "Table " + dest + " has " + outColumnCnt - + " columns, but query has " + inColumnCnt + " columns."; - throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg( - qb.getParseInfo().getDestForClause(dest), reason)); - } else if (dynPart && dpCtx != null) { - // create the mapping from input ExprNode to dest table DP column - dpCtx.mapInputToDP(rowFields.subList(tableFields.size(), rowFields.size())); + if (deleting()) { + if (rowFields.size() > 1) { + // This means we have partition columns to deal with, so set up the mapping from the + // input to the partition columns. + dpCtx.mapInputToDP(rowFields.subList(1, rowFields.size())); + } + } else if (updating()) { + // In this case we expect the number of in fields to exceed the number of out fields by one + // (for the ROW__ID virtual column). If there are more columns than this, + // then the extras are for dynamic partitioning + if (dynPart && dpCtx != null) { + dpCtx.mapInputToDP(rowFields.subList(tableFields.size() + 1, rowFields.size())); + } + } else { + if (inColumnCnt != outColumnCnt) { + String reason = "Table " + dest + " has " + outColumnCnt + + " columns, but query has " + inColumnCnt + " columns."; + throw new SemanticException(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg( + qb.getParseInfo().getDestForClause(dest), reason)); + } else if (dynPart && dpCtx != null) { + // create the mapping from input ExprNode to dest table DP column + dpCtx.mapInputToDP(rowFields.subList(tableFields.size(), rowFields.size())); + } } + // Check column types boolean converted = false; int columnNumber = tableFields.size(); @@ -6074,7 +6109,7 @@ Operator genConversionSelectOperator(String dest, QB qb, Operator input, MetadataTypedColumnsetSerDe.class); boolean isLazySimpleSerDe = table_desc.getDeserializerClass().equals( LazySimpleSerDe.class); - if (!isMetaDataSerDe) { + if (!isMetaDataSerDe && !deleting()) { // here only deals with non-partition columns. We deal with partition columns next for (int i = 0; i < columnNumber; i++) { @@ -11579,4 +11614,13 @@ private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, else return (ltd.getReplace() ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT); } + + protected boolean updating() { + return false; + } + + protected boolean deleting() { + return false; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java index 3dfce99..9e27e1f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java @@ -257,6 +257,11 @@ public static BaseSemanticAnalyzer get(HiveConf conf, ASTNode tree) case HiveParser.TOK_CREATEMACRO: case HiveParser.TOK_DROPMACRO: return new MacroSemanticAnalyzer(conf); + + case HiveParser.TOK_UPDATE_TABLE: + case HiveParser.TOK_DELETE_FROM: + return new UpdateDeleteSemanticAnalyzer(conf); + default: return new SemanticAnalyzer(conf); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java new file mode 100644 index 0000000..a72e068 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -0,0 +1,307 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.FileSinkDesc; + + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * A subclass of the {@link org.apache.hadoop.hive.ql.parse.SemanticAnalyzer} that just handles + * update and delete statements. It works by rewriting the updates and deletes into insert + * statements (since they are actually inserts) and then doing some patch up to make them work as + * updates and deletes instead. + */ +public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { + + private Context rewrittenCtx; + boolean useSuper = false; + boolean doingUpdate; + boolean doingDelete; + + public UpdateDeleteSemanticAnalyzer(HiveConf conf) throws SemanticException { + super(conf); + } + + @Override + public void analyzeInternal(ASTNode tree) throws SemanticException { + if (useSuper) { + super.analyzeInternal(tree); + } else { + switch (tree.getToken().getType()) { + case HiveParser.TOK_DELETE_FROM: + analyzeDelete(tree); + return; + + case HiveParser.TOK_UPDATE_TABLE: + analyzeUpdate(tree); + return; + + default: + throw new RuntimeException("Asked to parse token " + tree.getName() + " in " + + "UpdateDeleteSemanticAnalyzer"); + } + } + } + + @Override + protected boolean updating() { + return doingUpdate; + } + + @Override + protected boolean deleting() { + return doingDelete; + } + + private void analyzeUpdate(ASTNode tree) throws SemanticException { + doingUpdate = true; + reparseAndSuperAnalyze(tree); + } + + private void analyzeDelete(ASTNode tree) throws SemanticException { + doingDelete = true; + reparseAndSuperAnalyze(tree); + } + + private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { + List children = tree.getChildren(); + // The first child should be the table we are deleting from + ASTNode tabName = (ASTNode)children.get(0); + assert tabName.getToken().getType() == HiveParser.TOK_TABNAME : + "Expected tablename as first child of " + operation() + " but found " + tabName.getName(); + String[] tableName = getQualifiedTableName(tabName); + + // Rewrite the delete or update into an insert. Crazy, but it works as deletes and update + // actually are inserts into the delta file in Hive. A delete + // DELETE FROM _tablename_ [WHERE ...] + // will be rewritten as + // INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT ROW__ID[, + // _partcols_] from _tablename_ + // An update + // UPDATE _tablename_ SET x = _expr_ [WHERE...] + // will be rewritten as + // INSERT INTO TABLE _tablename_ [PARTITION (_partcols_)] SELECT _all_, + // _partcols_from _tablename_ + // where _all_ is all the columns. The expressions from the set clause will be re-attached + // later. + // The where clause will also be re-attached later. + + StringBuilder rewrittenQueryStr = new StringBuilder(); + Table mTable = null; + try { + mTable = db.getTable(tableName[0], tableName[1]); + } catch (HiveException e) { + throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e); + } + List partCols = mTable.getPartCols(); + + rewrittenQueryStr.append("insert into table " + qualifiedTableName(tableName)); + + // If the table is partitioned we have to put the partition() clause in + if (partCols != null && partCols.size() > 0) { + rewrittenQueryStr.append(" partition ("); + boolean first = true; + for (FieldSchema fschema : partCols) { + if (first) first = false; + else rewrittenQueryStr.append(", "); + rewrittenQueryStr.append(fschema.getName()); + } + rewrittenQueryStr.append(")"); + } + + rewrittenQueryStr.append(" select ROW__ID"); + Map setColExprs = null; + if (doingUpdate) { + // An update needs to select all of the columns, as we rewrite the entire row. Also, + // we need to figure out which columns we are going to replace. We won't write the set + // expressions in the rewritten query. We'll patch that up later. + // The set list from update should be the second child (index 1) + assert children.size() >= 2 : "Expected update token to have at least two children"; + ASTNode setClause = (ASTNode)children.get(1); + assert setClause.getToken().getType() == HiveParser.TOK_SET_COLUMNS_CLAUSE : + "Expected second child of update token to be set token"; + + // Get the children of the set clause, each of which should be a column assignment + List assignments = setClause.getChildren(); + Map setCols = new HashMap(assignments.size()); + setColExprs = new HashMap(assignments.size()); + for (Node a : assignments) { + ASTNode assignment = (ASTNode)a; + assert assignment.getToken().getType() == HiveParser.EQUAL : + "Expected set assignments to use equals operator but found " + assignment.getName(); + ASTNode tableOrColTok = (ASTNode)assignment.getChildren().get(0); + assert tableOrColTok.getToken().getType() == HiveParser.TOK_TABLE_OR_COL : + "Expected left side of assignment to be table or column"; + ASTNode colName = (ASTNode)tableOrColTok.getChildren().get(0); + assert colName.getToken().getType() == HiveParser.Identifier : + "Expected column name"; + + String columnName = colName.getText(); + + // Make sure this isn't one of the partitioning columns, that's not supported. + if (partCols != null) { + for (FieldSchema fschema : partCols) { + if (fschema.getName().equalsIgnoreCase(columnName)) { + throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_PART_VALUE.getMsg()); + } + } + } + + // This means that in UPDATE T SET x = _something_ + // _something_ can be whatever is supported in SELECT _something_ + setCols.put(columnName, (ASTNode)assignment.getChildren().get(1)); + } + + List nonPartCols = mTable.getCols(); + for (int i = 0; i < nonPartCols.size(); i++) { + rewrittenQueryStr.append(','); + String name = nonPartCols.get(i).getName(); + ASTNode setCol = setCols.get(name); + rewrittenQueryStr.append(name); + if (setCol != null) { + // This is one of the columns we're setting, record it's position so we can come back + // later and patch it up. + // Add one to the index because the select has the ROW__ID as the first column. + setColExprs.put(i + 1, setCol); + } + } + } + + // If the table is partitioned, we need to select the partition columns as well. + if (partCols != null) { + for (FieldSchema fschema : partCols) { + rewrittenQueryStr.append(", "); + rewrittenQueryStr.append(fschema.getName()); + } + } + rewrittenQueryStr.append(" from " + qualifiedTableName(tableName)); + + ASTNode where = null; + int whereIndex = doingDelete ? 1 : 2; + if (children.size() > whereIndex) { + where = (ASTNode)children.get(whereIndex); + assert where.getToken().getType() == HiveParser.TOK_WHERE : + "Expected where clause, but found " + where.getName(); + } + + // Parse the rewritten query string + try { + // Set dynamic partitioning to nonstrict so that queries do not need any partition + // references. + HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); + rewrittenCtx = new Context(conf); + } catch (IOException e) { + throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg()); + } + rewrittenCtx.setCmd(rewrittenQueryStr.toString()); + + ParseDriver pd = new ParseDriver(); + ASTNode rewrittenTree = null; + try { + LOG.info("Going to reparse " + operation() + " as <" + rewrittenQueryStr.toString() + ">"); + rewrittenTree = pd.parse(rewrittenQueryStr.toString(), rewrittenCtx); + rewrittenTree = ParseUtils.findRootNonNullToken(rewrittenTree); + + } catch (ParseException e) { + throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg()); + } + + ASTNode rewrittenInsert = (ASTNode)rewrittenTree.getChildren().get(1); + assert rewrittenInsert.getToken().getType() == HiveParser.TOK_INSERT : + "Expected TOK_INSERT as second child of TOK_QUERY but found " + rewrittenInsert.getName(); + + if (where != null) { + // The structure of the AST for the rewritten insert statement is: + // TOK_QUERY -> TOK_FROM + // \-> TOK_INSERT -> TOK_INSERT_INTO + // \-> TOK_SELECT + // The following adds the TOK_WHERE and its subtree from the original query as a child of + // TOK_INSERT, which is where it would have landed if it had been there originally in the + // string. We do it this way because it's easy then turning the original AST back into a + // string and reparsing it. + rewrittenInsert.addChild(where); + } + + // Patch up the projection list for updates, putting back the original set expressions. + if (doingUpdate && setColExprs != null) { + // Walk through the projection list and replace the column names with the + // expressions from the original update. Under the TOK_SELECT (see above) the structure + // looks like: + // TOK_SELECT -> TOK_SELEXPR -> expr + // \-> TOK_SELEXPR -> expr ... + ASTNode rewrittenSelect = (ASTNode)rewrittenInsert.getChildren().get(1); + assert rewrittenSelect.getToken().getType() == HiveParser.TOK_SELECT : + "Expected TOK_SELECT as second child of TOK_INSERT but found " + + rewrittenSelect.getName(); + for (Map.Entry entry : setColExprs.entrySet()) { + ASTNode selExpr = (ASTNode)rewrittenSelect.getChildren().get(entry.getKey()); + assert selExpr.getToken().getType() == HiveParser.TOK_SELEXPR : + "Expected child of TOK_SELECT to be TOK_SELEXPR but was " + selExpr.getName(); + // Now, change it's child + selExpr.setChild(0, entry.getValue()); + } + } + + try { + useSuper = true; + super.analyze(rewrittenTree, rewrittenCtx); + } finally { + useSuper = false; + } + + if (inputs.size() != outputs.size()) { + // This means the table is partitioned. In order to avoid locking the entire write table + // we need to replace the single WriteEntity with a WriteEntity for each partition + for (ReadEntity input : inputs) { + if (input.getTyp() == Entity.Type.PARTITION) { + WriteEntity.WriteType writeType = doingDelete ? WriteEntity.WriteType.DELETE : + WriteEntity.WriteType.UPDATE; + outputs.add(new WriteEntity(input.getPartition(), writeType)); + } + } + } + } + + private String qualifiedTableName(String[] tabName) { + return tabName[0] + "." + tabName[1]; + } + + private String operation() { + if (doingUpdate) return "update"; + else if (doingDelete) return "delete"; + else throw new RuntimeException("I don't know what I'm doing!"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index 301dde5..8ed143d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -84,6 +84,11 @@ private boolean statsCollectRawDataSize; + // Record what type of write this is. Default is non-ACID (ie old style). + public enum WriteType {NON_ACID, ACID_INSERT, ACID_UPDATE, ACID_DELETE}; + private WriteType writeType = WriteType.NON_ACID; + private long txnId = 0; // transaction id for this operation + public FileSinkDesc() { } @@ -137,6 +142,8 @@ public Object clone() throws CloneNotSupportedException { ret.setMaxStatsKeyPrefixLength(maxStatsKeyPrefixLength); ret.setStatsCollectRawDataSize(statsCollectRawDataSize); ret.setDpSortState(dpSortState); + ret.setWriteType(writeType); + ret.setTransactionId(txnId); return (Object) ret; } @@ -398,4 +405,20 @@ public DPSortState getDpSortState() { public void setDpSortState(DPSortState dpSortState) { this.dpSortState = dpSortState; } + + public void setWriteType(WriteType type) { + writeType = type; + } + + public WriteType getWriteType() { + return writeType; + } + + public void setTransactionId(long id) { + txnId = id; + } + + public long getTransactionId() { + return txnId; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java index 4a6dc61..f5bc427 100644 --- ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java +++ ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java @@ -49,6 +49,9 @@ public static HiveCommand find(String[] command) { if (command.length > 1 && "role".equalsIgnoreCase(command[1])) { // special handling for set role r1 statement return null; + } else if(command.length > 1 && "from".equalsIgnoreCase(command[1])) { + //special handling for SQL "delete from
where..." + return null; } else if (COMMANDS.contains(cmd)) { return HiveCommand.valueOf(cmd); } diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java new file mode 100644 index 0000000..d1e536d --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestIUD { + private static HiveConf conf; + + private ParseDriver pd; + private SemanticAnalyzer sA; + + @BeforeClass + public static void initialize() { + conf = new HiveConf(SemanticAnalyzer.class); + SessionState.start(conf); + } + + @Before + public void setup() throws SemanticException { + pd = new ParseDriver(); + sA = new SemanticAnalyzer(conf); + } + + ASTNode parse(String query) throws ParseException { + ASTNode nd = pd.parse(query); + return (ASTNode) nd.getChild(0); + } + @Test + public void testDeleteNoWhere() throws ParseException { + ASTNode ast = parse("DELETE FROM src"); + Assert.assertEquals("AST doesn't match", + "(TOK_DELETE_FROM " + + "(TOK_TABNAME src))", ast.toStringTree()); + } + @Test + public void testDeleteWithWhere() throws ParseException { + ASTNode ast = parse("DELETE FROM src WHERE key IS NOT NULL AND src.value < 0"); + Assert.assertEquals("AST doesn't match", + "(TOK_DELETE_FROM " + + "(TOK_TABNAME src) " + + "(TOK_WHERE " + + "(AND " + + "(TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL key)) " + + "(< (. (TOK_TABLE_OR_COL src) value) 0))))", + ast.toStringTree()); + } + @Test + public void testUpdateNoWhereSingleSet() throws ParseException { + ASTNode ast = parse("UPDATE src set key = 3"); + Assert.assertEquals("AST doesn't match", + "(TOK_UPDATE_TABLE " + + "(TOK_TABNAME src) " + + "(TOK_SET_COLUMNS_CLAUSE " + + "(= " + + "(TOK_TABLE_OR_COL key) 3)))", + ast.toStringTree()); + } + @Test + public void testUpdateNoWhereMultiSet() throws ParseException { + ASTNode ast = parse("UPDATE src set key = 3, value = 8"); + Assert.assertEquals("AST doesn't match", + "(TOK_UPDATE_TABLE " + + "(TOK_TABNAME src) " + + "(TOK_SET_COLUMNS_CLAUSE " + + "(= " + + "(TOK_TABLE_OR_COL key) 3) " + + "(= " + + "(TOK_TABLE_OR_COL value) 8)))", + ast.toStringTree()); + } + @Test + public void testUpdateWithWhereSingleSet() throws ParseException { + ASTNode ast = parse("UPDATE src SET key = 3 WHERE value IS NULL"); + Assert.assertEquals("AST doesn't match", + "(TOK_UPDATE_TABLE " + + "(TOK_TABNAME src) " + + "(TOK_SET_COLUMNS_CLAUSE " + + "(= " + + "(TOK_TABLE_OR_COL key) 3)) " + + "(TOK_WHERE (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL value))))", + ast.toStringTree()); + } + @Test + public void testUpdateWithWhereMultiSet() throws ParseException { + ASTNode ast = parse("UPDATE src SET key = 3, value = 8 WHERE VALUE = 1230997"); + Assert.assertEquals("AST doesn't match", + "(TOK_UPDATE_TABLE " + + "(TOK_TABNAME src) " + + "(TOK_SET_COLUMNS_CLAUSE " + + "(= " + + "(TOK_TABLE_OR_COL key) 3) " + + "(= " + + "(TOK_TABLE_OR_COL value) 8)) " + + "(TOK_WHERE (= (TOK_TABLE_OR_COL VALUE) 1230997)))", + ast.toStringTree()); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java new file mode 100644 index 0000000..6d3125d --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java @@ -0,0 +1,2376 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import junit.framework.Assert; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.exec.ExplainTask; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.ExplainWork; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; + +public class TestUpdateDeleteSemanticAnalyzer { + + static final private Log LOG = LogFactory.getLog(TestSemanticAnalyzer.class.getName()); + + private HiveConf conf; + private Hive db; + + // All of the insert, update, and delete tests assume two tables, T and U, each with columns a, + // and b. U it partitioned by an additional column ds. These are created by parseAndAnalyze + // and removed by cleanupTables(). + + @Test + public void testInsertSelect() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("insert into table T select a, b from U", "testInsertSelect"); + + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_QUERY\n" + + " TOK_FROM\n" + + " TOK_TABREF\n" + + " TOK_TABNAME\n" + + " U\n" + + " TOK_INSERT\n" + + " TOK_INSERT_INTO\n" + + " TOK_TAB\n" + + " TOK_TABNAME\n" + + " T\n" + + " TOK_SELECT\n" + + " TOK_SELEXPR\n" + + " TOK_TABLE_OR_COL\n" + + " a\n" + + " TOK_SELEXPR\n" + + " TOK_TABLE_OR_COL\n" + + " b\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-1 is a root stage\n" + + " Stage-0 depends on stages: Stage-1\n" + + " Stage-2 depends on stages: Stage-0\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-1\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: u\n" + + " GatherStats: false\n" + + " Select Operator\n" + + " expressions: a (type: string), b (type: string)\n" + + " outputColumnNames: _col0, _col1\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=today\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds today\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=yesterday\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds yesterday\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " Truncated Path -> Alias:\n" + + " /u/ds=today [u]\n" + + " /u/ds=yesterday [u]\n" + + "\n" + + " Stage: Stage-0\n" + + " Move Operator\n" + + " tables:\n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + "\n" + + " Stage: Stage-2\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + + } finally { + cleanupTables(); + } + } + + @Test + public void testDeleteAllNonPartitioned() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("delete from T", "testDeleteAllNonPartitioned"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_DELETE_FROM\n" + + " TOK_TABNAME\n" + + " T\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-4 is a root stage\n" + + " Stage-3 depends on stages: Stage-4\n" + + " Stage-5 depends on stages: Stage-3\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-4\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: t\n" + + " GatherStats: false\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct)\n" + + " outputColumnNames: _col0\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: t\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + " name: default.t\n" + + " Truncated Path -> Alias:\n" + + " /t [t]\n" + + "\n" + + " Stage: Stage-3\n" + + " Move Operator\n" + + " tables:\n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + "\n" + + " Stage: Stage-5\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n\n", + explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testDeleteWhereNoPartition() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("delete from T where a > 5", "testDeleteWhereNoPartition"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_DELETE_FROM\n" + + " TOK_TABNAME\n" + + " T\n" + + " TOK_WHERE\n" + + " >\n" + + " TOK_TABLE_OR_COL\n" + + " a\n" + + " 5\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-7 is a root stage\n" + + " Stage-6 depends on stages: Stage-7\n" + + " Stage-8 depends on stages: Stage-6\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-7\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: t\n" + + " GatherStats: false\n" + + " Filter Operator\n" + + " isSamplingPred: false\n" + + " predicate: (a > 5) (type: boolean)\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct)\n" + + " outputColumnNames: _col0\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: t\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + " name: default.t\n" + + " Truncated Path -> Alias:\n" + + " /t [t]\n" + + "\n" + + " Stage: Stage-6\n" + + " Move Operator\n" + + " tables:\n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + "\n" + + " Stage: Stage-8\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testDeleteAllPartitioned() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("delete from U", "testDeleteAllPartitioned"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_DELETE_FROM\n" + + " TOK_TABNAME\n" + + " U\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-10 is a root stage\n" + + " Stage-9 depends on stages: Stage-10\n" + + " Stage-11 depends on stages: Stage-9\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-10\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: u\n" + + " GatherStats: false\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct), ds (type: string)\n" + + " outputColumnNames: _col0, _col1\n" + + " Reduce Output Operator\n" + + " key expressions: _col1 (type: string)\n" + + " sort order: +\n" + + " Map-reduce partition columns: _col1 (type: string)\n" + + " tag: -1\n" + + " value expressions: _col0 (type: struct), _col1 (type: string)\n" + + " auto parallelism: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=today\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds today\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=yesterday\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds yesterday\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " Truncated Path -> Alias:\n" + + " /u/ds=today [u]\n" + + " /u/ds=yesterday [u]\n" + + " Needs Tagging: false\n" + + " Reduce Operator Tree:\n" + + " Extract\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + "\n" + + " Stage: Stage-9\n" + + " Move Operator\n" + + " tables:\n" + + " partition:\n" + + " ds \n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + "\n" + + " Stage: Stage-11\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testDeleteAllWherePartitioned() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("delete from U where a > 5", "testDeleteAllWherePartitioned"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_DELETE_FROM\n" + + " TOK_TABNAME\n" + + " U\n" + + " TOK_WHERE\n" + + " >\n" + + " TOK_TABLE_OR_COL\n" + + " a\n" + + " 5\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-13 is a root stage\n" + + " Stage-12 depends on stages: Stage-13\n" + + " Stage-14 depends on stages: Stage-12\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-13\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: u\n" + + " GatherStats: false\n" + + " Filter Operator\n" + + " isSamplingPred: false\n" + + " predicate: (a > 5) (type: boolean)\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct), ds (type: string)\n" + + " outputColumnNames: _col0, _col1\n" + + " Reduce Output Operator\n" + + " key expressions: _col1 (type: string)\n" + + " sort order: +\n" + + " Map-reduce partition columns: _col1 (type: string)\n" + + " tag: -1\n" + + " value expressions: _col0 (type: struct), _col1 (type: string)\n" + + " auto parallelism: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=today\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds today\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=yesterday\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds yesterday\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " Truncated Path -> Alias:\n" + + " /u/ds=today [u]\n" + + " /u/ds=yesterday [u]\n" + + " Needs Tagging: false\n" + + " Reduce Operator Tree:\n" + + " Extract\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + "\n" + + " Stage: Stage-12\n" + + " Move Operator\n" + + " tables:\n" + + " partition:\n" + + " ds \n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + "\n" + + " Stage: Stage-14\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testDeleteOnePartition() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("delete from U where ds = 'today'", + "testDeleteFromPartitionOnly"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_DELETE_FROM\n" + + " TOK_TABNAME\n" + + " U\n" + + " TOK_WHERE\n" + + " =\n" + + " TOK_TABLE_OR_COL\n" + + " ds\n" + + " 'today'\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-16 is a root stage\n" + + " Stage-15 depends on stages: Stage-16\n" + + " Stage-17 depends on stages: Stage-15\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-16\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: u\n" + + " GatherStats: false\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct), ds (type: string)\n" + + " outputColumnNames: _col0, _col1\n" + + " Reduce Output Operator\n" + + " key expressions: _col1 (type: string)\n" + + " sort order: +\n" + + " Map-reduce partition columns: _col1 (type: string)\n" + + " tag: -1\n" + + " value expressions: _col0 (type: struct), _col1 (type: string)\n" + + " auto parallelism: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=today\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds today\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " Truncated Path -> Alias:\n" + + " /u/ds=today [u]\n" + + " Needs Tagging: false\n" + + " Reduce Operator Tree:\n" + + " Extract\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + "\n" + + " Stage: Stage-15\n" + + " Move Operator\n" + + " tables:\n" + + " partition:\n" + + " ds \n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + "\n" + + " Stage: Stage-17\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testDeleteOnePartitionWhere() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("delete from U where ds = 'today' and a > 5", + "testDeletePartitionWhere"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_DELETE_FROM\n" + + " TOK_TABNAME\n" + + " U\n" + + " TOK_WHERE\n" + + " and\n" + + " =\n" + + " TOK_TABLE_OR_COL\n" + + " ds\n" + + " 'today'\n" + + " >\n" + + " TOK_TABLE_OR_COL\n" + + " a\n" + + " 5\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-19 is a root stage\n" + + " Stage-18 depends on stages: Stage-19\n" + + " Stage-20 depends on stages: Stage-18\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-19\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: u\n" + + " GatherStats: false\n" + + " Filter Operator\n" + + " isSamplingPred: false\n" + + " predicate: (a > 5) (type: boolean)\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct), ds (type: string)\n" + + " outputColumnNames: _col0, _col1\n" + + " Reduce Output Operator\n" + + " key expressions: _col1 (type: string)\n" + + " sort order: +\n" + + " Map-reduce partition columns: _col1 (type: string)\n" + + " tag: -1\n" + + " value expressions: _col0 (type: struct), _col1 (type: string)\n" + + " auto parallelism: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=today\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds today\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " Truncated Path -> Alias:\n" + + " /u/ds=today [u]\n" + + " Needs Tagging: false\n" + + " Reduce Operator Tree:\n" + + " Extract\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + "\n" + + " Stage: Stage-18\n" + + " Move Operator\n" + + " tables:\n" + + " partition:\n" + + " ds \n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + "\n" + + " Stage: Stage-20\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testUpdateAllNonPartitioned() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("update T set a = 5", "testUpdateAllNonPartitioned"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_UPDATE_TABLE\n" + + " TOK_TABNAME\n" + + " T\n" + + " TOK_SET_COLUMNS_CLAUSE\n" + + " =\n" + + " TOK_TABLE_OR_COL\n" + + " a\n" + + " 5\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-22 is a root stage\n" + + " Stage-21 depends on stages: Stage-22\n" + + " Stage-23 depends on stages: Stage-21\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-22\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: t\n" + + " GatherStats: false\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct), 5 (type: int), b (type: string)\n" + + " outputColumnNames: _col0, _col1, _col2\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: t\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + " name: default.t\n" + + " Truncated Path -> Alias:\n" + + " /t [t]\n" + + "\n" + + " Stage: Stage-21\n" + + " Move Operator\n" + + " tables:\n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + "\n" + + " Stage: Stage-23\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testUpdateAllNonPartitionedWhere() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("update T set a = 5 where b > 5", + "testUpdateAllNonPartitionedWhere"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_UPDATE_TABLE\n" + + " TOK_TABNAME\n" + + " T\n" + + " TOK_SET_COLUMNS_CLAUSE\n" + + " =\n" + + " TOK_TABLE_OR_COL\n" + + " a\n" + + " 5\n" + + " TOK_WHERE\n" + + " >\n" + + " TOK_TABLE_OR_COL\n" + + " b\n" + + " 5\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-25 is a root stage\n" + + " Stage-24 depends on stages: Stage-25\n" + + " Stage-26 depends on stages: Stage-24\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-25\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: t\n" + + " GatherStats: false\n" + + " Filter Operator\n" + + " isSamplingPred: false\n" + + " predicate: (b > 5) (type: boolean)\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct), 5 (type: int), b (type: string)\n" + + " outputColumnNames: _col0, _col1, _col2\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: t\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + " name: default.t\n" + + " Truncated Path -> Alias:\n" + + " /t [t]\n" + + "\n" + + " Stage: Stage-24\n" + + " Move Operator\n" + + " tables:\n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.t\n" + + " serialization.ddl struct t { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.t\n" + + "\n" + + " Stage: Stage-26\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testUpdateAllPartitioned() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("update U set a = 5", "testUpdateAllPartitioned"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_UPDATE_TABLE\n" + + " TOK_TABNAME\n" + + " U\n" + + " TOK_SET_COLUMNS_CLAUSE\n" + + " =\n" + + " TOK_TABLE_OR_COL\n" + + " a\n" + + " 5\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-28 is a root stage\n" + + " Stage-27 depends on stages: Stage-28\n" + + " Stage-29 depends on stages: Stage-27\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-28\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: u\n" + + " GatherStats: false\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct), 5 (type: int), b (type: string), ds (type: string)\n" + + " outputColumnNames: _col0, _col1, _col2, _col3\n" + + " Reduce Output Operator\n" + + " key expressions: _col3 (type: string)\n" + + " sort order: +\n" + + " Map-reduce partition columns: _col3 (type: string)\n" + + " tag: -1\n" + + " value expressions: _col0 (type: struct), _col1 (type: int), _col2 (type: string), _col3 (type: string)\n" + + " auto parallelism: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=today\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds today\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=yesterday\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds yesterday\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " Truncated Path -> Alias:\n" + + " /u/ds=today [u]\n" + + " /u/ds=yesterday [u]\n" + + " Needs Tagging: false\n" + + " Reduce Operator Tree:\n" + + " Extract\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + "\n" + + " Stage: Stage-27\n" + + " Move Operator\n" + + " tables:\n" + + " partition:\n" + + " ds \n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + "\n" + + " Stage: Stage-29\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testUpdateAllPartitionedWhere() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("update U set a = 5 where b > 5", + "testUpdateAllPartitionedWhere"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_UPDATE_TABLE\n" + + " TOK_TABNAME\n" + + " U\n" + + " TOK_SET_COLUMNS_CLAUSE\n" + + " =\n" + + " TOK_TABLE_OR_COL\n" + + " a\n" + + " 5\n" + + " TOK_WHERE\n" + + " >\n" + + " TOK_TABLE_OR_COL\n" + + " b\n" + + " 5\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-31 is a root stage\n" + + " Stage-30 depends on stages: Stage-31\n" + + " Stage-32 depends on stages: Stage-30\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-31\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: u\n" + + " GatherStats: false\n" + + " Filter Operator\n" + + " isSamplingPred: false\n" + + " predicate: (b > 5) (type: boolean)\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct), 5 (type: int), b (type: string), ds (type: string)\n" + + " outputColumnNames: _col0, _col1, _col2, _col3\n" + + " Reduce Output Operator\n" + + " key expressions: _col3 (type: string)\n" + + " sort order: +\n" + + " Map-reduce partition columns: _col3 (type: string)\n" + + " tag: -1\n" + + " value expressions: _col0 (type: struct), _col1 (type: int), _col2 (type: string), _col3 (type: string)\n" + + " auto parallelism: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=today\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds today\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=yesterday\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds yesterday\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " Truncated Path -> Alias:\n" + + " /u/ds=today [u]\n" + + " /u/ds=yesterday [u]\n" + + " Needs Tagging: false\n" + + " Reduce Operator Tree:\n" + + " Extract\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + "\n" + + " Stage: Stage-30\n" + + " Move Operator\n" + + " tables:\n" + + " partition:\n" + + " ds \n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + "\n" + + " Stage: Stage-32\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testUpdateOnePartition() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("update U set a = 5 where ds = 'today'", + "testUpdateOnePartition"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_UPDATE_TABLE\n" + + " TOK_TABNAME\n" + + " U\n" + + " TOK_SET_COLUMNS_CLAUSE\n" + + " =\n" + + " TOK_TABLE_OR_COL\n" + + " a\n" + + " 5\n" + + " TOK_WHERE\n" + + " =\n" + + " TOK_TABLE_OR_COL\n" + + " ds\n" + + " 'today'\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-34 is a root stage\n" + + " Stage-33 depends on stages: Stage-34\n" + + " Stage-35 depends on stages: Stage-33\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-34\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: u\n" + + " GatherStats: false\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct), 5 (type: int), b (type: string), ds (type: string)\n" + + " outputColumnNames: _col0, _col1, _col2, _col3\n" + + " Reduce Output Operator\n" + + " key expressions: _col3 (type: string)\n" + + " sort order: +\n" + + " Map-reduce partition columns: _col3 (type: string)\n" + + " tag: -1\n" + + " value expressions: _col0 (type: struct), _col1 (type: int), _col2 (type: string), _col3 (type: string)\n" + + " auto parallelism: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=today\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds today\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " Truncated Path -> Alias:\n" + + " /u/ds=today [u]\n" + + " Needs Tagging: false\n" + + " Reduce Operator Tree:\n" + + " Extract\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + "\n" + + " Stage: Stage-33\n" + + " Move Operator\n" + + " tables:\n" + + " partition:\n" + + " ds \n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + "\n" + + " Stage: Stage-35\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Test + public void testUpdateOnePartitionWhere() throws Exception { + try { + ReturnInfo rc = parseAndAnalyze("update U set a = 5 where ds = 'today' and b > 5", + "testUpdateOnePartitionWhere"); + assertEquals("ABSTRACT SYNTAX TREE:\n" + + " \n" + + "TOK_UPDATE_TABLE\n" + + " TOK_TABNAME\n" + + " U\n" + + " TOK_SET_COLUMNS_CLAUSE\n" + + " =\n" + + " TOK_TABLE_OR_COL\n" + + " a\n" + + " 5\n" + + " TOK_WHERE\n" + + " and\n" + + " =\n" + + " TOK_TABLE_OR_COL\n" + + " ds\n" + + " 'today'\n" + + " >\n" + + " TOK_TABLE_OR_COL\n" + + " b\n" + + " 5\n" + + "\n" + + "\n" + + "STAGE DEPENDENCIES:\n" + + " Stage-37 is a root stage\n" + + " Stage-36 depends on stages: Stage-37\n" + + " Stage-38 depends on stages: Stage-36\n" + + "\n" + + "STAGE PLANS:\n" + + " Stage: Stage-37\n" + + " Map Reduce\n" + + " Map Operator Tree:\n" + + " TableScan\n" + + " alias: u\n" + + " GatherStats: false\n" + + " Filter Operator\n" + + " isSamplingPred: false\n" + + " predicate: (b > 5) (type: boolean)\n" + + " Select Operator\n" + + " expressions: ROW__ID (type: struct), 5 (type: int), b (type: string), ds (type: string)\n" + + " outputColumnNames: _col0, _col1, _col2, _col3\n" + + " Reduce Output Operator\n" + + " key expressions: _col3 (type: string)\n" + + " sort order: +\n" + + " Map-reduce partition columns: _col3 (type: string)\n" + + " tag: -1\n" + + " value expressions: _col0 (type: struct), _col1 (type: int), _col2 (type: string), _col3 (type: string)\n" + + " auto parallelism: false\n" + + " Path -> Alias:\n" + + " pfile:MASKED-OUT\n" + + " Path -> Partition:\n" + + " pfile:MASKED-OUT\n" + + " Partition\n" + + " base file name: ds=today\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " partition values:\n" + + " ds today\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " \n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " name: default.u\n" + + " Truncated Path -> Alias:\n" + + " /u/ds=today [u]\n" + + " Needs Tagging: false\n" + + " Reduce Operator Tree:\n" + + " Extract\n" + + " File Output Operator\n" + + " compressed: false\n" + + " GlobalTableId: 1\n" + + " directory: pfile:MASKED-OUT\n" + + " NumFilesPerFileSink: 1\n" + + " Stats Publishing Key Prefix: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + " TotalFiles: 1\n" + + " GatherStats: true\n" + + " MultiFileSpray: false\n" + + "\n" + + " Stage: Stage-36\n" + + " Move Operator\n" + + " tables:\n" + + " partition:\n" + + " ds \n" + + " replace: false\n" + + " source: pfile:MASKED-OUT\n" + + " table:\n" + + " input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " properties:\n" + + " bucket_count -1\n" + + " columns a,b\n" + + " columns.comments default\u0000default\n" + + " columns.types string:string\n" + + " file.inputformat org.apache.hadoop.hive.ql.io.orc.OrcInputFormat\n" + + " file.outputformat org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat\n" + + " location pfile:MASKED-OUT\n" + + " name default.u\n" + + " partition_columns ds\n" + + " partition_columns.types string\n" + + " serialization.ddl struct u { string a, string b}\n" + + " serialization.format 1\n" + + " serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " transient_lastDdlTime MASKED-OUT\n" + + " serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\n" + + " name: default.u\n" + + "\n" + + " Stage: Stage-38\n" + + " Stats-Aggr Operator\n" + + " Stats Aggregation Key Prefix: pfile:MASKED-OUT\n" + + "\n", explain((SemanticAnalyzer)rc.sem, rc.plan, rc.ast.dump())); + } finally { + cleanupTables(); + } + } + + @Before + public void setup() { + conf = new HiveConf(); + conf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); + } + + public void cleanupTables() throws HiveException { + if (db != null) { + db.dropTable("T"); + db.dropTable("U"); + } + } + + private class ReturnInfo { + ASTNode ast; + BaseSemanticAnalyzer sem; + QueryPlan plan; + + ReturnInfo(ASTNode a, BaseSemanticAnalyzer s, QueryPlan p) { + ast = a; + sem = s; + plan = p; + } + } + + private ReturnInfo parseAndAnalyze(String query, String testName) + throws IOException, ParseException, HiveException { + + SessionState.start(conf); + Context ctx = new Context(conf); + ctx.setCmd(query); + ctx.setHDFSCleanup(true); + + ParseDriver pd = new ParseDriver(); + ASTNode tree = pd.parse(query, ctx); + tree = ParseUtils.findRootNonNullToken(tree); + + BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree); + db = sem.getDb(); + + // I have to create the tables here (rather than in setup()) because I need the Hive + // connection, which is conviently created by the semantic analyzer. + db.createTable("T", Arrays.asList("a", "b"), null, OrcInputFormat.class, OrcOutputFormat.class); + db.createTable("U", Arrays.asList("a", "b"), Arrays.asList("ds"), OrcInputFormat.class, + OrcOutputFormat.class); + Table u = db.getTable("U"); + Map partVals = new HashMap(2); + partVals.put("ds", "yesterday"); + db.createPartition(u, partVals); + partVals.clear(); + partVals.put("ds", "today"); + db.createPartition(u, partVals); + sem.analyze(tree, ctx); + // validate the plan + sem.validate(); + + QueryPlan plan = new QueryPlan(query, sem, 0L, testName); + + return new ReturnInfo(tree, sem, plan); + } + + private String explain(SemanticAnalyzer sem, QueryPlan plan, String astStringTree) throws + IOException { + FileSystem fs = FileSystem.get(conf); + File f = File.createTempFile("TestSemanticAnalyzer", "explain"); + Path tmp = new Path(f.getPath()); + fs.create(tmp); + fs.deleteOnExit(tmp); + ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(), + sem.getFetchTask(), astStringTree, sem, true, false, false, false, false); + ExplainTask task = new ExplainTask(); + task.setWork(work); + task.initialize(conf, plan, null); + task.execute(null); + FSDataInputStream in = fs.open(tmp); + StringBuilder builder = new StringBuilder(); + final int bufSz = 4096; + byte[] buf = new byte[bufSz]; + long pos = 0L; + while (true) { + int bytesRead = in.read(pos, buf, 0, bufSz); + if (bytesRead > 0) { + pos += bytesRead; + builder.append(new String(buf, 0, bytesRead)); + } else { + // Reached end of file + in.close(); + break; + } + } + return builder.toString() + .replaceAll("pfile:/.*\n", "pfile:MASKED-OUT\n") + .replaceAll("transient_lastDdlTime.*\n", "transient_lastDdlTime MASKED-OUT\n"); + } +}