diff --git itests/qtest/pom.xml itests/qtest/pom.xml
index dc4519a..7ef42a9 100644
--- itests/qtest/pom.xml
+++ itests/qtest/pom.xml
@@ -36,7 +36,7 @@
false
false
- stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q
+ stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q,empty_dir_in_table.q
cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q
tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q
cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q
diff --git ql/src/test/queries/clientpositive/empty_dir_in_table.q ql/src/test/queries/clientpositive/empty_dir_in_table.q
new file mode 100644
index 0000000..630c4c1
--- /dev/null
+++ ql/src/test/queries/clientpositive/empty_dir_in_table.q
@@ -0,0 +1,10 @@
+dfs ${system:test.dfs.mkdir} hdfs:///target/tmp/test_empty_table;
+
+create external table roottable (key string) row format delimited fields terminated by '\\t' stored as textfile location 'hdfs:///target/tmp/test_empty_table';
+select count(*) from roottable;
+
+insert into table roottable select key from src where (key < 20) order by key;
+select count(*) from roottable;
+
+dfs ${system:test.dfs.mkdir} hdfs:///target/tmp/test_empty_table/empty;
+select count(*) from roottable;
\ No newline at end of file
diff --git ql/src/test/results/clientpositive/empty_dir_in_table.q.out ql/src/test/results/clientpositive/empty_dir_in_table.q.out
new file mode 100644
index 0000000..c835f13
--- /dev/null
+++ ql/src/test/results/clientpositive/empty_dir_in_table.q.out
@@ -0,0 +1,45 @@
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@roottable
+PREHOOK: query: select count(*) from roottable
+PREHOOK: type: QUERY
+PREHOOK: Input: default@roottable
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from roottable
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@roottable
+#### A masked pattern was here ####
+0
+PREHOOK: query: insert into table roottable select key from src where (key < 20) order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@roottable
+POSTHOOK: query: insert into table roottable select key from src where (key < 20) order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@roottable
+POSTHOOK: Lineage: roottable.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: select count(*) from roottable
+PREHOOK: type: QUERY
+PREHOOK: Input: default@roottable
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from roottable
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@roottable
+#### A masked pattern was here ####
+20
+PREHOOK: query: select count(*) from roottable
+PREHOOK: type: QUERY
+PREHOOK: Input: default@roottable
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from roottable
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@roottable
+#### A masked pattern was here ####
+20
diff --git shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
index 2cf14bd..032c2b5 100644
--- shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
+++ shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
@@ -26,6 +26,7 @@
import java.net.URISyntaxException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@@ -67,6 +68,8 @@
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.ToolRunner;
+import com.google.common.primitives.Longs;
+
/**
* Base implemention for shims against secure Hadoop 0.20.3/0.23.
*/
@@ -97,18 +100,12 @@ public InputSplitShim() {
_isShrinked = false;
}
- public InputSplitShim(CombineFileSplit old) throws IOException {
- super(old.getJob(), old.getPaths(), old.getStartOffsets(),
- old.getLengths(), dedup(old.getLocations()));
+ public InputSplitShim(JobConf conf, Path[] paths, long[] startOffsets,
+ long[] lengths, String[] locations) throws IOException {
+ super(conf, paths, startOffsets, lengths, dedup(locations));
_isShrinked = false;
}
- private static String[] dedup(String[] locations) {
- Set dedup = new HashSet();
- Collections.addAll(dedup, locations);
- return dedup.toArray(new String[dedup.size()]);
- }
-
@Override
public void shrinkSplit(long length) {
_isShrinked = true;
@@ -338,12 +335,22 @@ public void createPool(JobConf conf, PathFilter... filters) {
InputSplit[] splits = (InputSplit[]) super.getSplits(job, numSplits);
- InputSplitShim[] isplits = new InputSplitShim[splits.length];
+ ArrayList inputSplitShims = new ArrayList();
for (int pos = 0; pos < splits.length; pos++) {
- isplits[pos] = new InputSplitShim((CombineFileSplit)splits[pos]);
+ if (isValidSplit((CombineFileSplit) splits[pos], job)) {
+ CombineFileSplit split = (CombineFileSplit) splits[pos];
+ Set dirIndices = getDirIndices(split.getPaths(), job);
+ List prunedPaths = prune(dirIndices, Arrays.asList(split.getPaths()));
+ List prunedStartOffsets = prune(dirIndices, Arrays.asList(
+ ArrayUtils.toObject(split.getStartOffsets())));
+ List prunedLengths = prune(dirIndices, Arrays.asList(
+ ArrayUtils.toObject(split.getLengths())));
+ inputSplitShims.add(new InputSplitShim(job, prunedPaths.toArray(new Path[prunedPaths.size()]),
+ Longs.toArray(prunedStartOffsets),
+ Longs.toArray(prunedLengths), split.getLocations()));
+ }
}
-
- return isplits;
+ return inputSplitShims.toArray(new InputSplitShim[inputSplitShims.size()]);
}
public InputSplitShim getInputSplitShim() throws IOException {
@@ -623,4 +630,52 @@ protected void run(FsShell shell, String[] command) throws Exception {
int retval = shell.run(command);
LOG.debug("Return value is :" + retval);
}
+
+ /**
+ * CombineFileInputFormat sometimes returns directories as splits, need to prune them.
+ */
+ private static boolean isValidSplit(CombineFileSplit split, JobConf job) throws IOException {
+ Path[] paths = split.getPaths();
+
+ for (Path p : paths) {
+ FileSystem fs = p.getFileSystem(job);
+ if (fs.isFile(p)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * CombineFileInputFormat sometimes returns directories as splits, need to prune them.
+ */
+ private static Set getDirIndices(Path[] paths, JobConf conf) throws IOException {
+ Set result = new HashSet();
+ for (int i = 0; i < paths.length; i++) {
+ FileSystem fs = paths[i].getFileSystem(conf);
+ if (!fs.isFile(paths[i])) {
+ result.add(i);
+ }
+ }
+ return result;
+ }
+
+ private static List prune(Set indicesToPrune, List elms) {
+ List result = new ArrayList();
+ int i = 0;
+ for (K elm : elms) {
+ if (indicesToPrune.contains(i)) {
+ continue;
+ }
+ result.add(elm);
+ i++;
+ }
+ return result;
+ }
+
+ private static String[] dedup(String[] locations) throws IOException {
+ Set dedup = new HashSet();
+ Collections.addAll(dedup, locations);
+ return dedup.toArray(new String[dedup.size()]);
+ }
}