diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml index 1a19610..766f7f8 100644 --- a/itests/qtest/pom.xml +++ b/itests/qtest/pom.xml @@ -36,7 +36,7 @@ false false - stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q + stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q,empty_dir_in_table.q cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transform_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q diff --git a/ql/src/test/queries/clientpositive/empty_dir_in_table.q b/ql/src/test/queries/clientpositive/empty_dir_in_table.q new file mode 100644 index 0000000..630c4c1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/empty_dir_in_table.q @@ -0,0 +1,10 @@ +dfs ${system:test.dfs.mkdir} hdfs:///target/tmp/test_empty_table; + +create external table roottable (key string) row format delimited fields terminated by '\\t' stored as textfile location 'hdfs:///target/tmp/test_empty_table'; +select count(*) from roottable; + +insert into table roottable select key from src where (key < 20) order by key; +select count(*) from roottable; + +dfs ${system:test.dfs.mkdir} hdfs:///target/tmp/test_empty_table/empty; +select count(*) from roottable; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/empty_dir_in_table.q.out b/ql/src/test/results/clientpositive/empty_dir_in_table.q.out new file mode 100644 index 0000000..c835f13 --- /dev/null +++ b/ql/src/test/results/clientpositive/empty_dir_in_table.q.out @@ -0,0 +1,45 @@ +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@roottable +PREHOOK: query: select count(*) from roottable +PREHOOK: type: QUERY +PREHOOK: Input: default@roottable +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from roottable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@roottable +#### A masked pattern was here #### +0 +PREHOOK: query: insert into table roottable select key from src where (key < 20) order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@roottable +POSTHOOK: query: insert into table roottable select key from src where (key < 20) order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@roottable +POSTHOOK: Lineage: roottable.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select count(*) from roottable +PREHOOK: type: QUERY +PREHOOK: Input: default@roottable +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from roottable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@roottable +#### A masked pattern was here #### +20 +PREHOOK: query: select count(*) from roottable +PREHOOK: type: QUERY +PREHOOK: Input: default@roottable +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from roottable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@roottable +#### A masked pattern was here #### +20 diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java index 2cf14bd..50d2905 100644 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java +++ b/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java @@ -97,18 +97,32 @@ public InputSplitShim() { _isShrinked = false; } - public InputSplitShim(CombineFileSplit old) throws IOException { - super(old.getJob(), old.getPaths(), old.getStartOffsets(), + public InputSplitShim(CombineFileSplit old, JobConf conf) throws IOException { + super(old.getJob(), prune(old.getPaths(), conf), old.getStartOffsets(), old.getLengths(), dedup(old.getLocations())); _isShrinked = false; } - private static String[] dedup(String[] locations) { + private static String[] dedup(String[] locations) throws IOException { Set dedup = new HashSet(); Collections.addAll(dedup, locations); return dedup.toArray(new String[dedup.size()]); } + /** + * CombineFileInputFormat sometimes returns directories as splits, need to prune them. + */ + private static Path[] prune(Path[] paths, JobConf conf) throws IOException { + Set pruned = new HashSet(); + for (Path p : paths) { + FileSystem fs = p.getFileSystem(conf); + if (fs.isFile(p)) { + pruned.add(p); + } + } + return pruned.toArray(new Path[pruned.size()]); + } + @Override public void shrinkSplit(long length) { _isShrinked = true; @@ -338,12 +352,13 @@ public void createPool(JobConf conf, PathFilter... filters) { InputSplit[] splits = (InputSplit[]) super.getSplits(job, numSplits); - InputSplitShim[] isplits = new InputSplitShim[splits.length]; + ArrayList inputSplitShims = new ArrayList(); for (int pos = 0; pos < splits.length; pos++) { - isplits[pos] = new InputSplitShim((CombineFileSplit)splits[pos]); + if (isValidSplit((CombineFileSplit) splits[pos], job)) { + inputSplitShims.add(new InputSplitShim((CombineFileSplit) splits[pos], job)); + } } - - return isplits; + return inputSplitShims.toArray(new InputSplitShim[inputSplitShims.size()]); } public InputSplitShim getInputSplitShim() throws IOException { @@ -623,4 +638,19 @@ protected void run(FsShell shell, String[] command) throws Exception { int retval = shell.run(command); LOG.debug("Return value is :" + retval); } + + /** + * CombineFileInputFormat sometimes returns directories as splits, need to prune them. + */ + protected static boolean isValidSplit(CombineFileSplit split, JobConf job) throws IOException { + Path[] paths = split.getPaths(); + + for (Path p : paths) { + FileSystem fs = p.getFileSystem(job); + if (fs.isFile(p)) { + return true; + } + } + return false; + } }