diff --git common/src/java/org/apache/hadoop/hive/common/FileUtils.java common/src/java/org/apache/hadoop/hive/common/FileUtils.java index c3a2cd4..16d7c80 100644 --- common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.conf.HiveConf; @@ -297,7 +298,14 @@ public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus, List results) throws IOException { if (fileStatus.isDir()) { - for (FileStatus stat : fs.listStatus(fileStatus.getPath())) { + for (FileStatus stat : fs.listStatus(fileStatus.getPath(), new PathFilter() { + + @Override + public boolean accept(Path p) { + String name = p.getName(); + return !name.startsWith("_") && !name.startsWith("."); + } + })) { listStatusRecursively(fs, stat, results); } } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java index 520590f..d5e250f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.plan.PartitionDesc; @@ -91,7 +92,14 @@ public RecordReader getRecordReader(InputSplit split, JobConf job, List errors = new ArrayList(); FileSystem fs = dir.getFileSystem(job); - FileStatus[] matches = fs.globStatus(dir); + FileStatus[] matches = fs.globStatus(dir, new PathFilter() { + + @Override + public boolean accept(Path p) { + String name = p.getName(); + return !name.startsWith("_") && !name.startsWith("."); + } + }); if (matches == null) { errors.add(new IOException("Input path does not exist: " + dir)); } else if (matches.length == 0) { diff --git ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q new file mode 100644 index 0000000..956a61f --- /dev/null +++ ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q @@ -0,0 +1,15 @@ +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/bmjpathfilter; + +create table t1 (dt string) location '${system:test.tmp.dir}/bmjpathfilter/t1'; +Create table t2 (dt string) stored as orc; +dfs -touchz ${system:test.tmp.dir}/bmjpathfilter/t1/_SUCCESS; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +SET hive.optimize.bucketmapjoin=true; + +SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt); + +SET hive.optimize.bucketmapjoin=false; +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +dfs -rmr ${system:test.tmp.dir}/bmjpathfilter; diff --git ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out new file mode 100644 index 0000000..38792a6 --- /dev/null +++ ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out @@ -0,0 +1,26 @@ +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: Create table t2 (dt string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: Create table t2 (dt string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here ####