diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 149b788..fec8970 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -2277,8 +2277,8 @@ insertClause @after { popMsg(state); } : KW_INSERT KW_OVERWRITE destination ifNotExists? -> ^(TOK_DESTINATION destination ifNotExists?) - | KW_INSERT KW_INTO KW_TABLE? tableOrPartition - -> ^(TOK_INSERT_INTO tableOrPartition) + | KW_INSERT KW_INTO KW_TABLE? tableOrPartition (LPAREN targetCols=columnNameList RPAREN)? + -> ^(TOK_INSERT_INTO tableOrPartition $targetCols?) ; destination diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java index e8f3284..33c2f18 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java @@ -103,6 +103,9 @@ public Integer getDestTypeForAlias(String alias) { return nameToDestType.get(alias.toLowerCase()); } + /** + * @param alias this is actually dest name, like insclause-0 + */ public Table getDestTableForAlias(String alias) { return nameToDestTable.get(alias.toLowerCase()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 3e51188..54ab25f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -43,7 +43,15 @@ private ASTNode joinExpr; private ASTNode hints; private final HashMap aliasToSrc; + /** + * insclause-0 -> TOK_TAB ASTNode + */ private final HashMap nameToDest; + /** + * For 'insert into FOO(x,y) select ...' this stores the + * insclause-0 -> x,y mapping + */ + private final Map> nameToDestSchema; private final HashMap nameToSample; private final Map exprToColumnAlias; private final Map destToSelExpr; @@ -111,6 +119,7 @@ public QBParseInfo(String alias, boolean isSubQ) { aliasToSrc = new HashMap(); nameToDest = new HashMap(); + nameToDestSchema = new HashMap>(); nameToSample = new HashMap(); exprToColumnAlias = new HashMap(); destToLateralView = new HashMap(); @@ -234,6 +243,13 @@ public void setDestForClause(String clause, ASTNode ast) { nameToDest.put(clause, ast); } + List setDestSchemaForClause(String clause, List columnList) { + return nameToDestSchema.put(clause, columnList); + } + List getDestSchemaForClause(String clause) { + return nameToDestSchema.get(clause); + } + /** * Set the Cluster By AST for the clause. * diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index bdb9204..3396115 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1176,6 +1176,86 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan } qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0)); + //todo: refactor this into a separate method + ASTNode tabColName = (ASTNode)ast.getChild(1); + if(ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) { + //we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present + List targetColNames = new ArrayList(); + for(Node col : tabColName.getChildren()) { + assert ((ASTNode)col).getType() == HiveParser.Identifier : + "expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType(); + targetColNames.add(((ASTNode)col).getText()); + } + String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), + SessionState.get().getCurrentDatabase()); + qbp.setDestSchemaForClause(ctx_1.dest, targetColNames); + Set targetColumns = new HashSet(); + targetColumns.addAll(targetColNames); + if(targetColNames.size() != targetColumns.size()) { + throw new SemanticException(generateErrorMessage(tabColName, + "Duplicate column name detected in " + fullTableName + " table schema specification")); + } + try { + Table targetTable = db.getTable(fullTableName, false); + if(targetTable == null) { + throw new SemanticException(generateErrorMessage(ast, + "Unable to access metadata for table " + fullTableName)); + } + for(FieldSchema f : targetTable.getCols()) {//todo: for partitioned tables, should we use partition rather than table here? how do we know which it is here? + //parser only allows foo(a,b), not foo(foo.a, foo.b) + targetColumns.remove(f.getName()); + } + if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns + /* + We just checked the user specified schema columns among regular table column and found some which are not + 'regular'. Now check is they are dynamic partition columns + For dynamic partitioning, + Given "create table multipart(a int, b int) partitioned by (c int, d int);" + for "insert into multipart partition(c='1',d)(d,a) values(2,3);" we expect parse tree to look like this + (TOK_INSERT_INTO + (TOK_TAB + (TOK_TABNAME multipart) + (TOK_PARTSPEC + (TOK_PARTVAL c '1') + (TOK_PARTVAL d) + ) + ) + (TOK_TABCOLNAME d a) + ) + */ + List dynamicPartitionColumns = new ArrayList(); + if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) { + ASTNode tokTab = (ASTNode)ast.getChild(0); + ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC); + if(tokPartSpec != null) { + for(Node n : tokPartSpec.getChildren()) { + ASTNode tokPartVal = null; + if(n instanceof ASTNode) { + tokPartVal = (ASTNode)n; + } + if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) { + assert tokPartVal.getChild(0).getType() == HiveParser.Identifier : "Expected column name; found tokType=" + tokPartVal.getType(); + dynamicPartitionColumns.add(tokPartVal.getChild(0).getText()); + } + } + } + } + if(!dynamicPartitionColumns.isEmpty()) { + for(String colName : dynamicPartitionColumns) { + targetColumns.remove(colName); + } + } + if(!targetColumns.isEmpty()) { + throw new SemanticException(generateErrorMessage(tabColName, + "'" + targetColumns + "' in table schema specification are not present in " + fullTableName)); + } + } + } + catch (HiveException ex) { + LOG.error("Error processing HiveParser.TOK_DESTINATION: " + ex.getMessage(), ex); + throw new SemanticException(ex); + } + } if (qbp.getClauseNamesForDest().size() > 1) { queryProperties.setMultiDestQuery(true); @@ -1406,6 +1486,10 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan return phase1Result; } + private void handleInsertStatementSpecPhase1() { + + } + private void getMetaData(QBExpr qbexpr, ReadEntity parentInput) throws SemanticException { if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { @@ -3493,7 +3577,7 @@ static boolean isRegex(String pattern, HiveConf conf) { private Operator genSelectPlan(String dest, QB qb, Operator input, Operator inputForSelectStar) throws SemanticException { ASTNode selExprList = qb.getParseInfo().getSelForClause(dest); - Operator op = genSelectPlan(selExprList, qb, input, inputForSelectStar, false); + Operator op = genSelectPlan(dest, selExprList, qb, input, inputForSelectStar, false); if (LOG.isDebugEnabled()) { LOG.debug("Created Select Plan for clause: " + dest); @@ -3503,7 +3587,7 @@ static boolean isRegex(String pattern, HiveConf conf) { } @SuppressWarnings("nls") - private Operator genSelectPlan(ASTNode selExprList, QB qb, Operator input, + private Operator genSelectPlan(String dest, ASTNode selExprList, QB qb, Operator input, Operator inputForSelectStar, boolean outerLV) throws SemanticException { if (LOG.isDebugEnabled()) { @@ -3741,6 +3825,8 @@ static boolean isRegex(String pattern, HiveConf conf) { } selectStar = selectStar && exprList.getChildCount() == posn + 1; + handleInsertStatementSpec(col_list, dest, out_rwsch, inputRR, qb, selExprList); + ArrayList columnNames = new ArrayList(); Map colExprMap = new HashMap(); for (int i = 0; i < col_list.size(); i++) { @@ -3768,6 +3854,95 @@ static boolean isRegex(String pattern, HiveConf conf) { return output; } + /** + * This modifies the Select projections when the Select is part of an insert statement and + * the insert statement specifies a column list for the target table, e.g. + * create table source (a int, b int); + * create table target (x int, y int, z int); + * insert into target(z,x) select * from source + * + * Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks + * as if the original query was written as + * insert into target select b, null, a from source + * + * if target schema is not specified, this is no-op + * @throws SemanticException + */ + private void handleInsertStatementSpec(List col_list, String dest, + RowResolver out_rwsch, RowResolver inputRR, QB qb, + ASTNode selExprList) throws SemanticException { + List targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);//specified in the query + if(targetTableSchema == null) { + //no insert schema was specified + return; + } + /* + * 0. doPhase1() checks that target table schema specification makes sene + * 1. check that col_list has same cardinality as targetTableSchema.specifiedSchema + * 2. create a map of schema cols to projected cols: schemaColName->proj col by position + * 3. get list of cols on target table, and for for each not in map create NULL as X*/ + if(targetTableSchema.size() != col_list.size()) { + throw new SemanticException(generateErrorMessage(selExprList, + "Expected " + targetTableSchema.size() + " columns for " + dest + + "; select produces " + col_list.size() + " columns")); + } + Map targetCol2Projection = new HashMap(); + + Map targetCol2ColumnInfo = new HashMap(); + int colListPos = 0; + for(String targetCol : targetTableSchema) { + targetCol2ColumnInfo.put(targetCol, out_rwsch.getColumnInfos().get(colListPos)); + targetCol2Projection.put(targetCol, col_list.get(colListPos++)); + } + Table target = qb.getMetaData().getDestTableForAlias(dest); + Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null; + if(target == null && partition == null) { + throw new SemanticException(generateErrorMessage(selExprList, + "No table/partition found in QB metadata for dest='" + dest + "'")); + } + ArrayList new_col_list = new ArrayList(); + ArrayList newSchema = new ArrayList(); + colListPos = 0; + List targetTableCols = target != null ? target.getCols() : partition.getCols(); + List targetTableColNames = new ArrayList(); + for(FieldSchema fs : targetTableCols) { + targetTableColNames.add(fs.getName()); + } + Map partSpec = qb.getMetaData().getPartSpecForAlias(dest); + if(partSpec != null) { + //find dynamic partition columns + for(Map.Entry partKeyVal : partSpec.entrySet()) {//relies on consistent order via LinkedHashMap + if (partKeyVal.getValue() == null) { + targetTableColNames.add(partKeyVal.getKey());//these must be after non-partition cols + } + } + } + for(String f : targetTableColNames) { + if(targetCol2Projection.containsKey(f)) { + //put existing column in new list to make sure it is in the right position + new_col_list.add(targetCol2Projection.get(f)); + ColumnInfo ci = targetCol2ColumnInfo.get(f);//todo: is this OK? + ci.setInternalName(getColumnInternalName(colListPos)); + newSchema.add(ci); + } + else { + //add new 'synthetic' columns for projections not provided by Select + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + CommonToken t = new CommonToken(HiveParser.TOK_NULL); + t.setText("TOK_NULL"); + ExprNodeDesc exp = genExprNodeDesc(new ASTNode(t), inputRR, tcCtx); + new_col_list.add(exp); + final String tableAlias = "";//is this OK? this column doesn't come from any table + ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos), + exp.getWritableObjectInspector(), tableAlias, false); + newSchema.add(colInfo); + } + colListPos++; + } + col_list.clear(); + col_list.addAll(new_col_list); + out_rwsch.setRowSchema(new RowSchema(newSchema)); + } String recommendName(ExprNodeDesc exp, String colAlias) { if (!colAlias.startsWith(autogenColAliasPrfxLbl)) { return null; @@ -9624,7 +9799,7 @@ private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) int allColumns = allPathRR.getColumnInfos().size(); // Get the UDTF Path QB blankQb = new QB(null, null, false); - Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree + Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree .getChild(0), blankQb, lvForward, null, lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER); // add udtf aliases to QB diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java index a8d03a9..eed162b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java @@ -207,6 +207,26 @@ public void testInsertIntoTableAsSelectFromNamedVirtTable() throws ParseExceptio "(TOK_WHERE (= (TOK_TABLE_OR_COL b) 9))))", ast.toStringTree()); } + /** + * same as testInsertIntoTableAsSelectFromNamedVirtTable but with column list on target table + * @throws ParseException + */ + @Test + public void testInsertIntoTableAsSelectFromNamedVirtTableNamedCol() throws ParseException { + ASTNode ast = parse("insert into page_view(c1,c2) select a,b as c from (values (1,2),(3,4)) as VC(a,b) where b = 9"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_VIRTUAL_TABLE " + + "(TOK_VIRTUAL_TABREF (TOK_TABNAME VC) (TOK_COL_NAME a b)) " + + "(TOK_VALUES_TABLE (TOK_VALUE_ROW 1 2) (TOK_VALUE_ROW 3 4)))) " + + "(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME page_view)) (TOK_TABCOLNAME c1 c2)) " + + "(TOK_SELECT " + + "(TOK_SELEXPR (TOK_TABLE_OR_COL a)) " + + "(TOK_SELEXPR (TOK_TABLE_OR_COL b) c)) " + + "(TOK_WHERE (= (TOK_TABLE_OR_COL b) 9))))", + ast.toStringTree()); + } @Test public void testInsertIntoTableFromAnonymousTable1Row() throws ParseException { ASTNode ast = parse("insert into page_view values(1,2)"); @@ -220,6 +240,32 @@ public void testInsertIntoTableFromAnonymousTable1Row() throws ParseException { "(TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))", ast.toStringTree()); } + /** + * Same as testInsertIntoTableFromAnonymousTable1Row but with column list on target table + * @throws ParseException + */ + @Test + public void testInsertIntoTableFromAnonymousTable1RowNamedCol() throws ParseException { + ASTNode ast = parse("insert into page_view(a,b) values(1,2)"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_VIRTUAL_TABLE " + + "(TOK_VIRTUAL_TABREF TOK_ANONYMOUS) " + + "(TOK_VALUES_TABLE (TOK_VALUE_ROW 1 2))" + + ")" + + ") " + + "(TOK_INSERT " + + "(TOK_INSERT_INTO " + + "(TOK_TAB (TOK_TABNAME page_view)) " + + "(TOK_TABCOLNAME a b)" +//this is "extra" piece we get vs previous query + ") " + + "(TOK_SELECT " + + "(TOK_SELEXPR TOK_ALLCOLREF)" + + ")" + + ")" + + ")", ast.toStringTree()); + } @Test public void testInsertIntoTableFromAnonymousTable() throws ParseException { ASTNode ast = parse("insert into table page_view values(-1,2),(3,+4)"); diff --git ql/src/test/queries/clientpositive/insert_into_with_schema.q ql/src/test/queries/clientpositive/insert_into_with_schema.q new file mode 100644 index 0000000..167cdcf --- /dev/null +++ ql/src/test/queries/clientpositive/insert_into_with_schema.q @@ -0,0 +1,62 @@ +-- set of tests HIVE-9481 +--set hive.exec.dynamic.partition.mode=nonstrict; + +drop database if exists x314 cascade; +create database x314; +use x314; +create table source(s1 int, s2 int); +create table target1(x int, y int, z int); +create table target2(x int, y int, z int); + +insert into source(s2,s1) values(2,1); +-- expect source to contain 1 row (1,2) +select * from source; +insert into target1(z,x) select * from source; +-- expect target1 to contain 1 row (2,NULL,1) +select * from target1; + +-- note that schema spec for target1 and target2 are different +from source insert into target1(x,y) select * insert into target2(x,z) select s2,s1; +--expect target1 to have row1: (2,NULL,1), row2: (1,2,NULL) +select * from target1 order by x,y,z; +-- expect target2 to have 1 row: (2,NULL,1) +select * from target2; + + +from source insert into target1(x,y,z) select null as x, * insert into target2(x,y,z) select null as x, source.*; +-- expect target1 to have row1: (2,NULL,1), row2: (1,2,NULL), row3: (NULL, 1,2) +select * from target1 order by x,y,z; +-- expect target2 to have 1 row: (2,NULL,1), row2: (NULL, 1,2) +select * from target2; + +truncate table target1; +create table source2(s1 int, s2 int); +insert into target1 (x,z) select source.s1,source2.s2 from source left outer join source2 on source.s1=source2.s2; +--expect target1 to have 1 row (1,NULL,NULL) +select * from target1; + + +-- partitioned tables +CREATE TABLE pageviews (userid VARCHAR(64), link STRING, source STRING) PARTITIONED BY (datestamp STRING, i int) CLUSTERED BY (userid) INTO 256 BUCKETS STORED AS ORC; +INSERT INTO TABLE pageviews PARTITION (datestamp = '2014-09-23', i = 1)(userid,link) VALUES ('jsmith', 'mail.com'); +-- expect 1 row: ('jsmith', 'mail.com', NULL) in partition '2014-09-23'/'1' +select * from pageviews; + + +-- dynamic partitioning + + + +INSERT INTO TABLE pageviews PARTITION (datestamp='2014-09-23',i)(userid,i,link) VALUES ('jsmith', 7, '7mail.com'); + +set hive.exec.dynamic.partition.mode=nonstrict; + +INSERT INTO TABLE pageviews PARTITION (datestamp,i)(userid,i,link,datestamp) VALUES ('jsmith', 17, '17mail.com', '2014-09-23'); +INSERT INTO TABLE pageviews PARTITION (datestamp,i)(userid,i,link,datestamp) VALUES ('jsmith', 19, '19mail.com', '2014-09-24'); +-- expect row1: ('jsmith', 'mail.com', NULL) in partition '2014-09-23'/'1' +-- expect row2: ('jsmith', '7mail.com', NULL) in partition '2014-09-23'/'7' +-- expect row3: ('jsmith', '17mail.com', NULL) in partition '2014-09-23'/'17' +-- expect row4: ('jsmith', '19mail.com', NULL) in partition '2014-09-24'/'19' +select * from pageviews order by link; + +drop database if exists x314 cascade; diff --git ql/src/test/results/clientpositive/insert_into_with_schema.q.out ql/src/test/results/clientpositive/insert_into_with_schema.q.out new file mode 100644 index 0000000..995342f --- /dev/null +++ ql/src/test/results/clientpositive/insert_into_with_schema.q.out @@ -0,0 +1,340 @@ +PREHOOK: query: -- set of tests HIVE-9481 +--set hive.exec.dynamic.partition.mode=nonstrict; + +drop database if exists x314 cascade +PREHOOK: type: DROPDATABASE +POSTHOOK: query: -- set of tests HIVE-9481 +--set hive.exec.dynamic.partition.mode=nonstrict; + +drop database if exists x314 cascade +POSTHOOK: type: DROPDATABASE +PREHOOK: query: create database x314 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:x314 +POSTHOOK: query: create database x314 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:x314 +PREHOOK: query: use x314 +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:x314 +POSTHOOK: query: use x314 +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:x314 +PREHOOK: query: create table source(s1 int, s2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:x314 +PREHOOK: Output: x314@source +POSTHOOK: query: create table source(s1 int, s2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:x314 +POSTHOOK: Output: x314@source +PREHOOK: query: create table target1(x int, y int, z int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:x314 +PREHOOK: Output: x314@target1 +POSTHOOK: query: create table target1(x int, y int, z int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:x314 +POSTHOOK: Output: x314@target1 +PREHOOK: query: create table target2(x int, y int, z int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:x314 +PREHOOK: Output: x314@target2 +POSTHOOK: query: create table target2(x int, y int, z int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:x314 +POSTHOOK: Output: x314@target2 +PREHOOK: query: insert into source(s2,s1) values(2,1) +PREHOOK: type: QUERY +PREHOOK: Input: x314@values__tmp__table__1 +PREHOOK: Output: x314@source +POSTHOOK: query: insert into source(s2,s1) values(2,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@values__tmp__table__1 +POSTHOOK: Output: x314@source +POSTHOOK: Lineage: source.s1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: source.s2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- expect source to contain 1 row (1,2) +select * from source +PREHOOK: type: QUERY +PREHOOK: Input: x314@source +#### A masked pattern was here #### +POSTHOOK: query: -- expect source to contain 1 row (1,2) +select * from source +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@source +#### A masked pattern was here #### +1 2 +PREHOOK: query: insert into target1(z,x) select * from source +PREHOOK: type: QUERY +PREHOOK: Input: x314@source +PREHOOK: Output: x314@target1 +POSTHOOK: query: insert into target1(z,x) select * from source +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@source +POSTHOOK: Output: x314@target1 +POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ] +POSTHOOK: Lineage: target1.y EXPRESSION [] +POSTHOOK: Lineage: target1.z SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] +PREHOOK: query: -- expect target1 to contain 1 row (2,NULL,1) +select * from target1 +PREHOOK: type: QUERY +PREHOOK: Input: x314@target1 +#### A masked pattern was here #### +POSTHOOK: query: -- expect target1 to contain 1 row (2,NULL,1) +select * from target1 +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@target1 +#### A masked pattern was here #### +2 NULL 1 +PREHOOK: query: -- note that schema spec for target1 and target2 are different +from source insert into target1(x,y) select * insert into target2(x,z) select s2,s1 +PREHOOK: type: QUERY +PREHOOK: Input: x314@source +PREHOOK: Output: x314@target1 +PREHOOK: Output: x314@target2 +POSTHOOK: query: -- note that schema spec for target1 and target2 are different +from source insert into target1(x,y) select * insert into target2(x,z) select s2,s1 +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@source +POSTHOOK: Output: x314@target1 +POSTHOOK: Output: x314@target2 +POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] +POSTHOOK: Lineage: target1.y SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ] +POSTHOOK: Lineage: target1.z EXPRESSION [] +POSTHOOK: Lineage: target2.x SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ] +POSTHOOK: Lineage: target2.y EXPRESSION [] +POSTHOOK: Lineage: target2.z SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] +PREHOOK: query: --expect target1 to have row1: (2,NULL,1), row2: (1,2,NULL) +select * from target1 order by x,y,z +PREHOOK: type: QUERY +PREHOOK: Input: x314@target1 +#### A masked pattern was here #### +POSTHOOK: query: --expect target1 to have row1: (2,NULL,1), row2: (1,2,NULL) +select * from target1 order by x,y,z +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@target1 +#### A masked pattern was here #### +1 2 NULL +2 NULL 1 +PREHOOK: query: -- expect target2 to have 1 row: (2,NULL,1) +select * from target2 +PREHOOK: type: QUERY +PREHOOK: Input: x314@target2 +#### A masked pattern was here #### +POSTHOOK: query: -- expect target2 to have 1 row: (2,NULL,1) +select * from target2 +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@target2 +#### A masked pattern was here #### +2 NULL 1 +PREHOOK: query: from source insert into target1(x,y,z) select null as x, * insert into target2(x,y,z) select null as x, source.* +PREHOOK: type: QUERY +PREHOOK: Input: x314@source +PREHOOK: Output: x314@target1 +PREHOOK: Output: x314@target2 +POSTHOOK: query: from source insert into target1(x,y,z) select null as x, * insert into target2(x,y,z) select null as x, source.* +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@source +POSTHOOK: Output: x314@target1 +POSTHOOK: Output: x314@target2 +POSTHOOK: Lineage: target1.x EXPRESSION [] +POSTHOOK: Lineage: target1.y SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] +POSTHOOK: Lineage: target1.z SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ] +POSTHOOK: Lineage: target2.x EXPRESSION [] +POSTHOOK: Lineage: target2.y SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] +POSTHOOK: Lineage: target2.z SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ] +PREHOOK: query: -- expect target1 to have row1: (2,NULL,1), row2: (1,2,NULL), row3: (NULL, 1,2) +select * from target1 order by x,y,z +PREHOOK: type: QUERY +PREHOOK: Input: x314@target1 +#### A masked pattern was here #### +POSTHOOK: query: -- expect target1 to have row1: (2,NULL,1), row2: (1,2,NULL), row3: (NULL, 1,2) +select * from target1 order by x,y,z +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@target1 +#### A masked pattern was here #### +NULL 1 2 +1 2 NULL +2 NULL 1 +PREHOOK: query: -- expect target2 to have 1 row: (2,NULL,1), row2: (NULL, 1,2) +select * from target2 +PREHOOK: type: QUERY +PREHOOK: Input: x314@target2 +#### A masked pattern was here #### +POSTHOOK: query: -- expect target2 to have 1 row: (2,NULL,1), row2: (NULL, 1,2) +select * from target2 +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@target2 +#### A masked pattern was here #### +2 NULL 1 +NULL 1 2 +PREHOOK: query: truncate table target1 +PREHOOK: type: TRUNCATETABLE +PREHOOK: Output: x314@target1 +POSTHOOK: query: truncate table target1 +POSTHOOK: type: TRUNCATETABLE +POSTHOOK: Output: x314@target1 +PREHOOK: query: create table source2(s1 int, s2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:x314 +PREHOOK: Output: x314@source2 +POSTHOOK: query: create table source2(s1 int, s2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:x314 +POSTHOOK: Output: x314@source2 +PREHOOK: query: insert into target1 (x,z) select source.s1,source2.s2 from source left outer join source2 on source.s1=source2.s2 +PREHOOK: type: QUERY +PREHOOK: Input: x314@source +PREHOOK: Input: x314@source2 +PREHOOK: Output: x314@target1 +POSTHOOK: query: insert into target1 (x,z) select source.s1,source2.s2 from source left outer join source2 on source.s1=source2.s2 +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@source +POSTHOOK: Input: x314@source2 +POSTHOOK: Output: x314@target1 +POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] +POSTHOOK: Lineage: target1.y EXPRESSION [] +POSTHOOK: Lineage: target1.z SIMPLE [(source2)source2.FieldSchema(name:s2, type:int, comment:null), ] +PREHOOK: query: --expect target1 to have 1 row (1,NULL,NULL) +select * from target1 +PREHOOK: type: QUERY +PREHOOK: Input: x314@target1 +#### A masked pattern was here #### +POSTHOOK: query: --expect target1 to have 1 row (1,NULL,NULL) +select * from target1 +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@target1 +#### A masked pattern was here #### +1 NULL NULL +PREHOOK: query: -- partitioned tables +CREATE TABLE pageviews (userid VARCHAR(64), link STRING, source STRING) PARTITIONED BY (datestamp STRING, i int) CLUSTERED BY (userid) INTO 256 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:x314 +PREHOOK: Output: x314@pageviews +POSTHOOK: query: -- partitioned tables +CREATE TABLE pageviews (userid VARCHAR(64), link STRING, source STRING) PARTITIONED BY (datestamp STRING, i int) CLUSTERED BY (userid) INTO 256 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:x314 +POSTHOOK: Output: x314@pageviews +PREHOOK: query: INSERT INTO TABLE pageviews PARTITION (datestamp = '2014-09-23', i = 1)(userid,link) VALUES ('jsmith', 'mail.com') +PREHOOK: type: QUERY +PREHOOK: Input: x314@values__tmp__table__2 +PREHOOK: Output: x314@pageviews@datestamp=2014-09-23/i=1 +POSTHOOK: query: INSERT INTO TABLE pageviews PARTITION (datestamp = '2014-09-23', i = 1)(userid,link) VALUES ('jsmith', 'mail.com') +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@values__tmp__table__2 +POSTHOOK: Output: x314@pageviews@datestamp=2014-09-23/i=1 +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).link SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).source EXPRESSION [] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).userid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- expect 1 row: ('jsmith', 'mail.com', NULL) in partition '2014-09-23'/'1' +select * from pageviews +PREHOOK: type: QUERY +PREHOOK: Input: x314@pageviews +PREHOOK: Input: x314@pageviews@datestamp=2014-09-23/i=1 +#### A masked pattern was here #### +POSTHOOK: query: -- expect 1 row: ('jsmith', 'mail.com', NULL) in partition '2014-09-23'/'1' +select * from pageviews +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@pageviews +POSTHOOK: Input: x314@pageviews@datestamp=2014-09-23/i=1 +#### A masked pattern was here #### +jsmith mail.com NULL 2014-09-23 1 +PREHOOK: query: -- dynamic partitioning + + + +INSERT INTO TABLE pageviews PARTITION (datestamp='2014-09-23',i)(userid,i,link) VALUES ('jsmith', 7, '7mail.com') +PREHOOK: type: QUERY +PREHOOK: Input: x314@values__tmp__table__3 +PREHOOK: Output: x314@pageviews@datestamp=2014-09-23 +POSTHOOK: query: -- dynamic partitioning + + + +INSERT INTO TABLE pageviews PARTITION (datestamp='2014-09-23',i)(userid,i,link) VALUES ('jsmith', 7, '7mail.com') +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@values__tmp__table__3 +POSTHOOK: Output: x314@pageviews@datestamp=2014-09-23/i=7 +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).link SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).source EXPRESSION [] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).userid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE pageviews PARTITION (datestamp,i)(userid,i,link,datestamp) VALUES ('jsmith', 17, '17mail.com', '2014-09-23') +PREHOOK: type: QUERY +PREHOOK: Input: x314@values__tmp__table__4 +PREHOOK: Output: x314@pageviews +POSTHOOK: query: INSERT INTO TABLE pageviews PARTITION (datestamp,i)(userid,i,link,datestamp) VALUES ('jsmith', 17, '17mail.com', '2014-09-23') +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@values__tmp__table__4 +POSTHOOK: Output: x314@pageviews@datestamp=2014-09-23/i=17 +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).link SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).source EXPRESSION [] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).userid EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO TABLE pageviews PARTITION (datestamp,i)(userid,i,link,datestamp) VALUES ('jsmith', 19, '19mail.com', '2014-09-24') +PREHOOK: type: QUERY +PREHOOK: Input: x314@values__tmp__table__5 +PREHOOK: Output: x314@pageviews +POSTHOOK: query: INSERT INTO TABLE pageviews PARTITION (datestamp,i)(userid,i,link,datestamp) VALUES ('jsmith', 19, '19mail.com', '2014-09-24') +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@values__tmp__table__5 +POSTHOOK: Output: x314@pageviews@datestamp=2014-09-24/i=19 +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).link SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).source EXPRESSION [] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).userid EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- expect row1: ('jsmith', 'mail.com', NULL) in partition '2014-09-23'/'1' +-- expect row2: ('jsmith', '7mail.com', NULL) in partition '2014-09-23'/'7' +-- expect row3: ('jsmith', '17mail.com', NULL) in partition '2014-09-23'/'17' +-- expect row4: ('jsmith', '19mail.com', NULL) in partition '2014-09-24'/'19' +select * from pageviews order by link +PREHOOK: type: QUERY +PREHOOK: Input: x314@pageviews +PREHOOK: Input: x314@pageviews@datestamp=2014-09-23/i=1 +PREHOOK: Input: x314@pageviews@datestamp=2014-09-23/i=17 +PREHOOK: Input: x314@pageviews@datestamp=2014-09-23/i=7 +PREHOOK: Input: x314@pageviews@datestamp=2014-09-24/i=19 +#### A masked pattern was here #### +POSTHOOK: query: -- expect row1: ('jsmith', 'mail.com', NULL) in partition '2014-09-23'/'1' +-- expect row2: ('jsmith', '7mail.com', NULL) in partition '2014-09-23'/'7' +-- expect row3: ('jsmith', '17mail.com', NULL) in partition '2014-09-23'/'17' +-- expect row4: ('jsmith', '19mail.com', NULL) in partition '2014-09-24'/'19' +select * from pageviews order by link +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@pageviews +POSTHOOK: Input: x314@pageviews@datestamp=2014-09-23/i=1 +POSTHOOK: Input: x314@pageviews@datestamp=2014-09-23/i=17 +POSTHOOK: Input: x314@pageviews@datestamp=2014-09-23/i=7 +POSTHOOK: Input: x314@pageviews@datestamp=2014-09-24/i=19 +#### A masked pattern was here #### +jsmith 17mail.com NULL 2014-09-23 17 +jsmith 19mail.com NULL 2014-09-24 19 +jsmith 7mail.com NULL 2014-09-23 7 +jsmith mail.com NULL 2014-09-23 1 +PREHOOK: query: drop database if exists x314 cascade +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:x314 +PREHOOK: Output: database:x314 +PREHOOK: Output: x314@pageviews +PREHOOK: Output: x314@source +PREHOOK: Output: x314@source2 +PREHOOK: Output: x314@target1 +PREHOOK: Output: x314@target2 +PREHOOK: Output: x314@values__tmp__table__1 +PREHOOK: Output: x314@values__tmp__table__2 +PREHOOK: Output: x314@values__tmp__table__3 +PREHOOK: Output: x314@values__tmp__table__4 +PREHOOK: Output: x314@values__tmp__table__5 +POSTHOOK: query: drop database if exists x314 cascade +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:x314 +POSTHOOK: Output: database:x314 +POSTHOOK: Output: x314@pageviews +POSTHOOK: Output: x314@source +POSTHOOK: Output: x314@source2 +POSTHOOK: Output: x314@target1 +POSTHOOK: Output: x314@target2 +POSTHOOK: Output: x314@values__tmp__table__1 +POSTHOOK: Output: x314@values__tmp__table__2 +POSTHOOK: Output: x314@values__tmp__table__3 +POSTHOOK: Output: x314@values__tmp__table__4 +POSTHOOK: Output: x314@values__tmp__table__5