diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java index 4acb810..76ecb33 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java @@ -22,7 +22,7 @@ import java.io.PrintStream; import java.text.DecimalFormat; import java.util.ArrayList; -import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Map; @@ -32,8 +32,10 @@ import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO; import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex; import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry; @@ -47,6 +49,9 @@ import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONWriter; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; + /** * A tool for printing out the file structure of ORC files. */ @@ -86,23 +91,55 @@ public static void main(String[] args) throws Exception { System.err.println("Error : ORC files are not specified"); return; } + + // if the specified path is directory, iterate through all files and print the file dump + List filesInPath = Lists.newArrayList(); + for (String filename : files) { + Path path = new Path(filename); + filesInPath.addAll(getAllFilesInPath(path, conf)); + } + if (dumpData) { - printData(Arrays.asList(files), conf); + printData(filesInPath, conf); } else { if (jsonFormat) { boolean prettyPrint = cli.hasOption('p'); - JsonFileDump.printJsonMetaData(Arrays.asList(files), conf, rowIndexCols, prettyPrint, + JsonFileDump.printJsonMetaData(filesInPath, conf, rowIndexCols, prettyPrint, printTimeZone); } else { - printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone); + printMetaData(filesInPath, conf, rowIndexCols, printTimeZone); + } + } + } + + private static Collection getAllFilesInPath(final Path path, + final Configuration conf) throws IOException { + List filesInPath = Lists.newArrayList(); + FileSystem fs = path.getFileSystem(conf); + FileStatus fileStatus = fs.getFileStatus(path); + if (fileStatus.isDir()) { + FileStatus[] fileStatuses = fs.listStatus(path, AcidUtils.hiddenFileFilter); + for (FileStatus fileInPath : fileStatuses) { + if (fileInPath.isDir()) { + filesInPath.addAll(getAllFilesInPath(fileInPath.getPath(), conf)); + } else { + filesInPath.add(fileInPath.getPath().toString()); + } } + } else { + filesInPath.add(path.toString()); } + + return filesInPath; } private static void printData(List files, Configuration conf) throws IOException, JSONException { for (String file : files) { printJsonData(conf, file); + if (files.size() > 1) { + System.out.println(Strings.repeat("=", 80) + "\n"); + } } } @@ -204,6 +241,9 @@ private static void printMetaData(List files, Configuration conf, System.out.println("Padding length: " + paddedBytes + " bytes"); System.out.println("Padding ratio: " + format.format(percentPadding) + "%"); rows.close(); + if (files.size() > 1) { + System.out.println(Strings.repeat("=", 80) + "\n"); + } } }