diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b80062a..d870eea 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -698,6 +698,7 @@ HIVE_DRIVER_RUN_HOOKS("hive.exec.driver.run.hooks", ""), HIVE_DDL_OUTPUT_FORMAT("hive.ddl.output.format", null), HIVE_ENTITY_SEPARATOR("hive.entity.separator", "@"), + HIVE_ENITITY_CAPTURE_INPUT_URI("hive.entity.capture.input.URI", false), HIVE_SERVER2_THRIFT_MIN_WORKER_THREADS("hive.server2.thrift.min.worker.threads", 5), HIVE_SERVER2_THRIFT_MAX_WORKER_THREADS("hive.server2.thrift.max.worker.threads", 100), diff --git ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java index 555faca..3e2a954 100644 --- ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java @@ -86,6 +86,23 @@ public ReadEntity(Partition p, ReadEntity parent) { } /** + * Constructor for URI readentity + * @param path + * @param isLocal + */ + public ReadEntity (String path, boolean isLocal) { + super(path, isLocal); + } + + /** + * Constructor for URI readentity + * @param path + */ + public ReadEntity (String path) { + super(path, false); + } + + /** * Equals function. */ @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index f72f01a..6c87f37 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -695,7 +695,7 @@ private void analyzeCreateDatabase(ASTNode ast) throws SemanticException { if (dbProps != null) { createDatabaseDesc.setDatabaseProperties(dbProps); } - + saveInputLocationEntity(dbLocation); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), createDatabaseDesc), conf)); } @@ -870,6 +870,7 @@ private void analyzeCreateIndex(ASTNode ast) throws SemanticException { shared.serde, shared.serdeProps, rowFormatParams.collItemDelim, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.lineDelim, rowFormatParams.mapKeyDelim, indexComment); + saveInputLocationEntity(location); Task createIndex = TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtIndexDesc), conf); rootTasks.add(createIndex); @@ -1215,6 +1216,7 @@ private void analyzeAlterTableLocation(ASTNode ast, String tableName, AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, newLocation, partSpec); addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc); + saveInputLocationEntity(newLocation); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); } @@ -2403,6 +2405,7 @@ private void analyzeAlterTableAddParts(CommonTree ast, boolean expectView) // add the last one if (currentPart != null) { validatePartitionValues(currentPart); + saveInputLocationEntity(currentLocation); AddPartitionDesc addPartitionDesc = new AddPartitionDesc( db.getCurrentDatabase(), tblName, currentPart, currentLocation, ifNotExists, expectView); @@ -3078,4 +3081,12 @@ private Partition getPartition(Table table, Map partSpec, boolea private String toMessage(ErrorMsg message, Object detail) { return detail == null ? message.getMsg() : message.getMsg(detail.toString()); } + + private void saveInputLocationEntity(String location) { + if (conf.getBoolVar(ConfVars.HIVE_ENITITY_CAPTURE_INPUT_URI) && + (location != null)) { + inputs.add(new ReadEntity(location)); + } + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 70975c6..461844b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -36,6 +36,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -46,6 +47,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; @@ -265,8 +267,9 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } } rootTasks.add(t); - //inputs.add(new ReadEntity(fromURI.toString(), - // fromURI.getScheme().equals("hdfs") ? true : false)); + if (conf.getBoolVar(ConfVars.HIVE_ENITITY_CAPTURE_INPUT_URI)) { + inputs.add(new ReadEntity(fromURI.toString(), "hdfs".equals(fromURI.getScheme()))); + } } } catch (SemanticException e) { throw e; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index bd8d252..4582e6d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -32,11 +32,13 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -189,6 +191,9 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e .getMessage()), e); } + if (conf.getBoolVar(ConfVars.HIVE_ENITITY_CAPTURE_INPUT_URI)) { + inputs.add(new ReadEntity(fromURI.toString(), isLocal)); + } // initialize destination table/partition tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 5a71929..b897296 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9354,6 +9354,10 @@ private ASTNode analyzeCreateTable(ASTNode ast, QB qb) } } + if (conf.getBoolVar(ConfVars.HIVE_ENITITY_CAPTURE_INPUT_URI) && + (location != null)) { + inputs.add(new ReadEntity(location)); + } // Handle different types of CREATE TABLE command CreateTableDesc crtTblDesc = null; switch (command_type) { diff --git ql/src/test/queries/clientpositive/input_uri.q ql/src/test/queries/clientpositive/input_uri.q new file mode 100644 index 0000000..66e44ab --- /dev/null +++ ql/src/test/queries/clientpositive/input_uri.q @@ -0,0 +1,25 @@ +SET hive.test.mode=true; +SET hive.test.mode.prefix=; +SET hive.entity.capture.input.URI=false; + +DROP DATABASE IF EXISTS db1 CASCADE; +CREATE DATABASE db1; + +USE db1; +CREATE TABLE tab1( dep_id INT); + +LOAD DATA LOCAL INPATH '../data/files/test.dat' INTO TABLE tab1; + +dfs -mkdir ../build/ql/test/data/exports/uri1/temp; +dfs -rmr ../build/ql/test/data/exports/uri1; +EXPORT TABLE tab1 TO 'ql/test/data/exports/uri1'; + +DROP TABLE tab1; +IMPORT TABLE tab2 FROM 'ql/test/data/exports/uri1'; + +CREATE TABLE tab3 (key INT, value STRING); +ALTER TABLE tab3 SET LOCATION "file:/test/test/"; + +CREATE TABLE ptab (key INT, value STRING) PARTITIONED BY (ds STRING); +ALTER TABLE ptab ADD PARTITION (ds='2010'); +ALTER TABLE ptab PARTITION(ds='2010') SET LOCATION "file:/test/test/ds=2010"; diff --git ql/src/test/results/clientpositive/input_uri.q.out ql/src/test/results/clientpositive/input_uri.q.out new file mode 100644 index 0000000..a83b86a --- /dev/null +++ ql/src/test/results/clientpositive/input_uri.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: DROP DATABASE IF EXISTS db1 CASCADE +PREHOOK: type: DROPDATABASE +POSTHOOK: query: DROP DATABASE IF EXISTS db1 CASCADE +POSTHOOK: type: DROPDATABASE +PREHOOK: query: CREATE DATABASE db1 +PREHOOK: type: CREATEDATABASE +POSTHOOK: query: CREATE DATABASE db1 +POSTHOOK: type: CREATEDATABASE +PREHOOK: query: USE db1 +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE db1 +POSTHOOK: type: SWITCHDATABASE +PREHOOK: query: CREATE TABLE tab1( dep_id INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE tab1( dep_id INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: db1@tab1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/test.dat' INTO TABLE tab1 +PREHOOK: type: LOAD +PREHOOK: Output: db1@tab1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/test.dat' INTO TABLE tab1 +POSTHOOK: type: LOAD +POSTHOOK: Output: db1@tab1 +#### A masked pattern was here #### +PREHOOK: query: EXPORT TABLE tab1 TO 'ql/test/data/exports/uri1' +PREHOOK: type: EXPORT +PREHOOK: Input: db1@tab1 +#### A masked pattern was here #### +POSTHOOK: query: EXPORT TABLE tab1 TO 'ql/test/data/exports/uri1' +POSTHOOK: type: EXPORT +POSTHOOK: Input: db1@tab1 +#### A masked pattern was here #### +PREHOOK: query: DROP TABLE tab1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: db1@tab1 +PREHOOK: Output: db1@tab1 +POSTHOOK: query: DROP TABLE tab1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: db1@tab1 +POSTHOOK: Output: db1@tab1 +PREHOOK: query: IMPORT TABLE tab2 FROM 'ql/test/data/exports/uri1' +PREHOOK: type: IMPORT +POSTHOOK: query: IMPORT TABLE tab2 FROM 'ql/test/data/exports/uri1' +POSTHOOK: type: IMPORT +POSTHOOK: Output: db1@tab2 +PREHOOK: query: CREATE TABLE tab3 (key INT, value STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE tab3 (key INT, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: db1@tab3 +#### A masked pattern was here #### +PREHOOK: type: ALTERTABLE_LOCATION +PREHOOK: Input: db1@tab3 +PREHOOK: Output: db1@tab3 +#### A masked pattern was here #### +POSTHOOK: type: ALTERTABLE_LOCATION +POSTHOOK: Input: db1@tab3 +POSTHOOK: Output: db1@tab3 +PREHOOK: query: CREATE TABLE ptab (key INT, value STRING) PARTITIONED BY (ds STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE ptab (key INT, value STRING) PARTITIONED BY (ds STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: db1@ptab +PREHOOK: query: ALTER TABLE ptab ADD PARTITION (ds='2010') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Input: db1@ptab +POSTHOOK: query: ALTER TABLE ptab ADD PARTITION (ds='2010') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Input: db1@ptab +POSTHOOK: Output: db1@ptab@ds=2010 +#### A masked pattern was here #### +PREHOOK: type: ALTERPARTITION_LOCATION +PREHOOK: Input: db1@ptab +PREHOOK: Output: db1@ptab@ds=2010 +#### A masked pattern was here #### +POSTHOOK: type: ALTERPARTITION_LOCATION +POSTHOOK: Input: db1@ptab +POSTHOOK: Input: db1@ptab@ds=2010 +POSTHOOK: Output: db1@ptab@ds=2010