diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml
index 1a19610..766f7f8 100644
--- a/itests/qtest/pom.xml
+++ b/itests/qtest/pom.xml
@@ -36,7 +36,7 @@
false
false
- stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q
+ stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q,empty_dir_in_table.q
cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q
tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q
cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q
diff --git a/ql/src/test/queries/clientpositive/empty_dir_in_table.q b/ql/src/test/queries/clientpositive/empty_dir_in_table.q
new file mode 100644
index 0000000..630c4c1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/empty_dir_in_table.q
@@ -0,0 +1,10 @@
+dfs ${system:test.dfs.mkdir} hdfs:///target/tmp/test_empty_table;
+
+create external table roottable (key string) row format delimited fields terminated by '\\t' stored as textfile location 'hdfs:///target/tmp/test_empty_table';
+select count(*) from roottable;
+
+insert into table roottable select key from src where (key < 20) order by key;
+select count(*) from roottable;
+
+dfs ${system:test.dfs.mkdir} hdfs:///target/tmp/test_empty_table/empty;
+select count(*) from roottable;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/empty_dir_in_table.q.out b/ql/src/test/results/clientpositive/empty_dir_in_table.q.out
new file mode 100644
index 0000000..c835f13
--- /dev/null
+++ b/ql/src/test/results/clientpositive/empty_dir_in_table.q.out
@@ -0,0 +1,45 @@
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@roottable
+PREHOOK: query: select count(*) from roottable
+PREHOOK: type: QUERY
+PREHOOK: Input: default@roottable
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from roottable
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@roottable
+#### A masked pattern was here ####
+0
+PREHOOK: query: insert into table roottable select key from src where (key < 20) order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@roottable
+POSTHOOK: query: insert into table roottable select key from src where (key < 20) order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@roottable
+POSTHOOK: Lineage: roottable.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: select count(*) from roottable
+PREHOOK: type: QUERY
+PREHOOK: Input: default@roottable
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from roottable
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@roottable
+#### A masked pattern was here ####
+20
+PREHOOK: query: select count(*) from roottable
+PREHOOK: type: QUERY
+PREHOOK: Input: default@roottable
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from roottable
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@roottable
+#### A masked pattern was here ####
+20
diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
index 2cf14bd..50d2905 100644
--- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
+++ b/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
@@ -97,18 +97,32 @@ public InputSplitShim() {
_isShrinked = false;
}
- public InputSplitShim(CombineFileSplit old) throws IOException {
- super(old.getJob(), old.getPaths(), old.getStartOffsets(),
+ public InputSplitShim(CombineFileSplit old, JobConf conf) throws IOException {
+ super(old.getJob(), prune(old.getPaths(), conf), old.getStartOffsets(),
old.getLengths(), dedup(old.getLocations()));
_isShrinked = false;
}
- private static String[] dedup(String[] locations) {
+ private static String[] dedup(String[] locations) throws IOException {
Set dedup = new HashSet();
Collections.addAll(dedup, locations);
return dedup.toArray(new String[dedup.size()]);
}
+ /**
+ * CombineFileInputFormat sometimes returns directories as splits, need to prune them.
+ */
+ private static Path[] prune(Path[] paths, JobConf conf) throws IOException {
+ Set pruned = new HashSet();
+ for (Path p : paths) {
+ FileSystem fs = p.getFileSystem(conf);
+ if (fs.isFile(p)) {
+ pruned.add(p);
+ }
+ }
+ return pruned.toArray(new Path[pruned.size()]);
+ }
+
@Override
public void shrinkSplit(long length) {
_isShrinked = true;
@@ -338,12 +352,13 @@ public void createPool(JobConf conf, PathFilter... filters) {
InputSplit[] splits = (InputSplit[]) super.getSplits(job, numSplits);
- InputSplitShim[] isplits = new InputSplitShim[splits.length];
+ ArrayList inputSplitShims = new ArrayList();
for (int pos = 0; pos < splits.length; pos++) {
- isplits[pos] = new InputSplitShim((CombineFileSplit)splits[pos]);
+ if (isValidSplit((CombineFileSplit) splits[pos], job)) {
+ inputSplitShims.add(new InputSplitShim((CombineFileSplit) splits[pos], job));
+ }
}
-
- return isplits;
+ return inputSplitShims.toArray(new InputSplitShim[inputSplitShims.size()]);
}
public InputSplitShim getInputSplitShim() throws IOException {
@@ -623,4 +638,19 @@ protected void run(FsShell shell, String[] command) throws Exception {
int retval = shell.run(command);
LOG.debug("Return value is :" + retval);
}
+
+ /**
+ * CombineFileInputFormat sometimes returns directories as splits, need to prune them.
+ */
+ protected static boolean isValidSplit(CombineFileSplit split, JobConf job) throws IOException {
+ Path[] paths = split.getPaths();
+
+ for (Path p : paths) {
+ FileSystem fs = p.getFileSystem(job);
+ if (fs.isFile(p)) {
+ return true;
+ }
+ }
+ return false;
+ }
}