diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 9c7603c..b4c0ba6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -2216,8 +2216,8 @@ insertClause @after { popMsg(state); } : KW_INSERT KW_OVERWRITE destination ifNotExists? -> ^(TOK_DESTINATION destination ifNotExists?) - | KW_INSERT KW_INTO KW_TABLE? tableOrPartition - -> ^(TOK_INSERT_INTO tableOrPartition) + | KW_INSERT KW_INTO KW_TABLE? tableOrPartition (LPAREN targetCols=columnNameList RPAREN)? + -> ^(TOK_INSERT_INTO tableOrPartition $targetCols?) ; destination diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java index e8f3284..33c2f18 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java @@ -103,6 +103,9 @@ public Integer getDestTypeForAlias(String alias) { return nameToDestType.get(alias.toLowerCase()); } + /** + * @param alias this is actually dest name, like insclause-0 + */ public Table getDestTableForAlias(String alias) { return nameToDestTable.get(alias.toLowerCase()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 3e51188..54ab25f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -43,7 +43,15 @@ private ASTNode joinExpr; private ASTNode hints; private final HashMap aliasToSrc; + /** + * insclause-0 -> TOK_TAB ASTNode + */ private final HashMap nameToDest; + /** + * For 'insert into FOO(x,y) select ...' this stores the + * insclause-0 -> x,y mapping + */ + private final Map> nameToDestSchema; private final HashMap nameToSample; private final Map exprToColumnAlias; private final Map destToSelExpr; @@ -111,6 +119,7 @@ public QBParseInfo(String alias, boolean isSubQ) { aliasToSrc = new HashMap(); nameToDest = new HashMap(); + nameToDestSchema = new HashMap>(); nameToSample = new HashMap(); exprToColumnAlias = new HashMap(); destToLateralView = new HashMap(); @@ -234,6 +243,13 @@ public void setDestForClause(String clause, ASTNode ast) { nameToDest.put(clause, ast); } + List setDestSchemaForClause(String clause, List columnList) { + return nameToDestSchema.put(clause, columnList); + } + List getDestSchemaForClause(String clause) { + return nameToDestSchema.get(clause); + } + /** * Set the Cluster By AST for the clause. * diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 2466d78..e1a3838 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1176,6 +1176,40 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan } qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); + ASTNode tabColName = (ASTNode)ast.getChild(1); + if(tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) { + //we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present + List targetColNames = new ArrayList(); + for(Node col : tabColName.getChildren()) { + assert ((ASTNode)col).getType() == HiveParser.Identifier : + "expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType(); + targetColNames.add(((ASTNode)col).getText()); + } + String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), + SessionState.get().getCurrentDatabase()); + qbp.setDestSchemaForClause(ctx_1.dest, targetColNames); + Set targetColumns = new HashSet(); + targetColumns.addAll(targetColNames); + if(targetColNames.size() != targetColumns.size()) { + throw new SemanticException(generateErrorMessage(tabColName, + "Duplicate column name detected in " + fullTableName + " table schema specification")); + } + try { + Table targetTable = db.getTable(fullTableName, true); + for(FieldSchema f : targetTable.getCols()) { + //parser only allows foo(a,b), not foo(foo.a, foo.b) + targetColumns.remove(f.getName()); + } + if(!targetColumns.isEmpty()) { + throw new SemanticException(generateErrorMessage(tabColName, + "'" + targetColumns + "' in table schema specification are not present in " + fullTableName)); + } + } + catch (HiveException ex) { + throw new SemanticException(generateErrorMessage(ast, + "Unable to access metadata for table " + fullTableName), ex); + } + } if (qbp.getClauseNamesForDest().size() > 1) { queryProperties.setMultiDestQuery(true); @@ -3493,7 +3527,7 @@ static boolean isRegex(String pattern, HiveConf conf) { private Operator genSelectPlan(String dest, QB qb, Operator input, Operator inputForSelectStar) throws SemanticException { ASTNode selExprList = qb.getParseInfo().getSelForClause(dest); - Operator op = genSelectPlan(selExprList, qb, input, inputForSelectStar, false); + Operator op = genSelectPlan(dest, selExprList, qb, input, inputForSelectStar, false); if (LOG.isDebugEnabled()) { LOG.debug("Created Select Plan for clause: " + dest); @@ -3503,7 +3537,7 @@ static boolean isRegex(String pattern, HiveConf conf) { } @SuppressWarnings("nls") - private Operator genSelectPlan(ASTNode selExprList, QB qb, Operator input, + private Operator genSelectPlan(String dest, ASTNode selExprList, QB qb, Operator input, Operator inputForSelectStar, boolean outerLV) throws SemanticException { if (LOG.isDebugEnabled()) { @@ -3741,6 +3775,8 @@ static boolean isRegex(String pattern, HiveConf conf) { } selectStar = selectStar && exprList.getChildCount() == posn + 1; + handleInsertStatementSpec(col_list, dest, out_rwsch, inputRR, qb, selExprList); + ArrayList columnNames = new ArrayList(); Map colExprMap = new HashMap(); for (int i = 0; i < col_list.size(); i++) { @@ -3768,6 +3804,78 @@ static boolean isRegex(String pattern, HiveConf conf) { return output; } + /** + * This modifies the Select projections when the Select is part of an insert statement and + * the insert statement specifies a column list for the target table, e.g. + * create table source (a int, b int); + * create table target (x int, y int, z int); + * insert into target(z,x) select * from source + * + * Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks + * as if the original query was written as + * insert into target select b, null, a from source + * + * if target schema is not specified, this is no-op + * @throws SemanticException + */ + private void handleInsertStatementSpec(List col_list, String dest, + RowResolver out_rwsch, RowResolver inputRR, QB qb, + ASTNode selExprList) throws SemanticException { + List targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest); + if(targetTableSchema == null) { + //no insert schema was specified + return; + } + /* + * 0. doPhase1() checks that target table schema specification makes sene + * 1. check that col_list has same cardinality as targetTableSchema.specifiedSchema + * 2. create a map of schema cols to projected cols: schemaColName->proj col by position + * 3. get list of cols on target table, and for for each not in map create NULL as X*/ + if(targetTableSchema.size() != col_list.size()) { + throw new SemanticException(generateErrorMessage(selExprList, + "Expected " + targetTableSchema.size() + " columns for " + dest + + "; select produces " + col_list.size() + " columns")); + } + Map targetCol2Projection = new HashMap(); + + Map targetCol2ColumnInfo = new HashMap(); + int colListPos = 0; + for(String targetCol : targetTableSchema) { + targetCol2ColumnInfo.put(targetCol, out_rwsch.getColumnInfos().get(colListPos)); + targetCol2Projection.put(targetCol, col_list.get(colListPos++)); + } + Table target = qb.getMetaData().getDestTableForAlias(dest); + ArrayList new_col_list = new ArrayList(); + ArrayList newSchema = new ArrayList(); + colListPos = 0; + for(FieldSchema f : target.getCols()) {//since there is target.getPartCols() I assume getCols() + //doesn't include partition columns. ToDo: check this + //also in general, dynamic partitions are not supported by insert into FOO(a,b) so far; ToDo: figure this out + if(targetCol2Projection.containsKey(f.getName())) { + //put existing column in new list to make sure it is in the right position + new_col_list.add(targetCol2Projection.get(f.getName())); + ColumnInfo ci = targetCol2ColumnInfo.get(f.getName());//todo: is this OK? + ci.setInternalName(getColumnInternalName(colListPos)); + newSchema.add(ci); + } + else { + //add new 'synthetic' columns for projections not provided by Select + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + CommonToken t = new CommonToken(HiveParser.TOK_NULL); + t.setText("TOK_NULL"); + ExprNodeDesc exp = genExprNodeDesc(new ASTNode(t), inputRR, tcCtx); + new_col_list.add(exp); + final String tableAlias = "";//is this OK? this column doesn't come from any table + ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos), + exp.getWritableObjectInspector(), tableAlias, false); + newSchema.add(colInfo); + } + colListPos++; + } + col_list.clear(); + col_list.addAll(new_col_list); + out_rwsch.setRowSchema(new RowSchema(newSchema)); + } String recommendName(ExprNodeDesc exp, String colAlias) { if (!colAlias.startsWith(autogenColAliasPrfxLbl)) { return null; @@ -9624,7 +9732,7 @@ private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) int allColumns = allPathRR.getColumnInfos().size(); // Get the UDTF Path QB blankQb = new QB(null, null, false); - Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree + Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree .getChild(0), blankQb, lvForward, null, lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER); // add udtf aliases to QB diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java index a8d03a9..eed162b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java @@ -207,6 +207,26 @@ public void testInsertIntoTableAsSelectFromNamedVirtTable() throws ParseExceptio "(TOK_WHERE (= (TOK_TABLE_OR_COL b) 9))))", ast.toStringTree()); } + /** + * same as testInsertIntoTableAsSelectFromNamedVirtTable but with column list on target table + * @throws ParseException + */ + @Test + public void testInsertIntoTableAsSelectFromNamedVirtTableNamedCol() throws ParseException { + ASTNode ast = parse("insert into page_view(c1,c2) select a,b as c from (values (1,2),(3,4)) as VC(a,b) where b = 9"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_VIRTUAL_TABLE " + + "(TOK_VIRTUAL_TABREF (TOK_TABNAME VC) (TOK_COL_NAME a b)) " + + "(TOK_VALUES_TABLE (TOK_VALUE_ROW 1 2) (TOK_VALUE_ROW 3 4)))) " + + "(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME page_view)) (TOK_TABCOLNAME c1 c2)) " + + "(TOK_SELECT " + + "(TOK_SELEXPR (TOK_TABLE_OR_COL a)) " + + "(TOK_SELEXPR (TOK_TABLE_OR_COL b) c)) " + + "(TOK_WHERE (= (TOK_TABLE_OR_COL b) 9))))", + ast.toStringTree()); + } @Test public void testInsertIntoTableFromAnonymousTable1Row() throws ParseException { ASTNode ast = parse("insert into page_view values(1,2)"); @@ -220,6 +240,32 @@ public void testInsertIntoTableFromAnonymousTable1Row() throws ParseException { "(TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))", ast.toStringTree()); } + /** + * Same as testInsertIntoTableFromAnonymousTable1Row but with column list on target table + * @throws ParseException + */ + @Test + public void testInsertIntoTableFromAnonymousTable1RowNamedCol() throws ParseException { + ASTNode ast = parse("insert into page_view(a,b) values(1,2)"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_VIRTUAL_TABLE " + + "(TOK_VIRTUAL_TABREF TOK_ANONYMOUS) " + + "(TOK_VALUES_TABLE (TOK_VALUE_ROW 1 2))" + + ")" + + ") " + + "(TOK_INSERT " + + "(TOK_INSERT_INTO " + + "(TOK_TAB (TOK_TABNAME page_view)) " + + "(TOK_TABCOLNAME a b)" +//this is "extra" piece we get vs previous query + ") " + + "(TOK_SELECT " + + "(TOK_SELEXPR TOK_ALLCOLREF)" + + ")" + + ")" + + ")", ast.toStringTree()); + } @Test public void testInsertIntoTableFromAnonymousTable() throws ParseException { ASTNode ast = parse("insert into table page_view values(-1,2),(3,+4)");