diff --git a/data/files/symlink-with-regex.txt b/data/files/symlink-with-regex.txt new file mode 100644 index 0000000..02c97e4 --- /dev/null +++ b/data/files/symlink-with-regex.txt @@ -0,0 +1 @@ +../../data/files/T* diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java index feef854..c32b4ff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java @@ -75,8 +75,11 @@ public void rework(HiveConf job, MapredWork work) throws IOException { while ((line = reader.readLine()) != null) { // no check for the line? How to check? // if the line is invalid for any reason, the job will fail. - toAddPathToPart.put(line, partDesc); - pathToAliases.put(line, aliases); + FileStatus[] matches = fileSystem.globStatus(new Path(line)); + for(FileStatus fileStatus :matches) { + toAddPathToPart.put(fileStatus.getPath().toUri().getPath(), partDesc); + pathToAliases.put(fileStatus.getPath().toUri().getPath(), aliases); + } } } finally { org.apache.hadoop.io.IOUtils.closeStream(reader); diff --git a/ql/src/test/queries/clientpositive/symlink_text_input_format.q b/ql/src/test/queries/clientpositive/symlink_text_input_format.q index d633b97..dc513ea 100644 --- a/ql/src/test/queries/clientpositive/symlink_text_input_format.q +++ b/ql/src/test/queries/clientpositive/symlink_text_input_format.q @@ -21,3 +21,16 @@ EXPLAIN SELECT count(1) FROM symlink_text_input_format; SELECT count(1) FROM symlink_text_input_format; DROP TABLE symlink_text_input_format; + + + +CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'; +dfs -cp ../../data/files/symlink-with-regex.txt ${system:test.warehouse.dir}/symlink_text_input_format/symlink-with-regex.txt.txt; + +set hive.rework.mapredwork = true ; +set mapred.max.split.size= 0 ; +set mapred.min.split.size.per.node= 0 ; +set mapred.min.split.size.per.rack= 0 ; +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +SELECT count(1) FROM symlink_text_input_format; +DROP TABLE symlink_text_input_format;DROP TABLE symlink_text_input_format; diff --git a/ql/src/test/results/clientpositive/symlink_text_input_format.q.out b/ql/src/test/results/clientpositive/symlink_text_input_format.q.out index 6c2e2e6..5c15f29 100644 --- a/ql/src/test/results/clientpositive/symlink_text_input_format.q.out +++ b/ql/src/test/results/clientpositive/symlink_text_input_format.q.out @@ -223,3 +223,32 @@ POSTHOOK: query: DROP TABLE symlink_text_input_format POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@symlink_text_input_format POSTHOOK: Output: default@symlink_text_input_format +PREHOOK: query: CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@symlink_text_input_format +POSTHOOK: query: CREATE TABLE symlink_text_input_format (key STRING, value STRING) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@symlink_text_input_format +PREHOOK: query: SELECT count(1) FROM symlink_text_input_format +PREHOOK: type: QUERY +PREHOOK: Input: default@symlink_text_input_format +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(1) FROM symlink_text_input_format +POSTHOOK: type: QUERY +POSTHOOK: Input: default@symlink_text_input_format +#### A masked pattern was here #### +16 +PREHOOK: query: DROP TABLE symlink_text_input_format +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@symlink_text_input_format +PREHOOK: Output: default@symlink_text_input_format +POSTHOOK: query: DROP TABLE symlink_text_input_format +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@symlink_text_input_format +POSTHOOK: Output: default@symlink_text_input_format +PREHOOK: query: DROP TABLE symlink_text_input_format +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE symlink_text_input_format +POSTHOOK: type: DROPTABLE