diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java index 0397ee0..8581cf3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; import org.apache.hadoop.hive.serde2.columnar.LazyDecompressionCallback; +import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile.Metadata; @@ -721,7 +722,8 @@ public class RCFile { public Writer(FileSystem fs, Configuration conf, Path name, Progressable progress, Metadata metadata, CompressionCodec codec) throws IOException { this(fs, conf, name, fs.getConf().getInt("io.file.buffer.size", 4096), - fs.getDefaultReplication(), fs.getDefaultBlockSize(), progress, + ShimLoader.getHadoopShims().getDefaultReplication(fs, name), + ShimLoader.getHadoopShims().getDefaultBlockSize(fs, name), progress, metadata, codec); } diff --git a/shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java b/shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java index e6dcd88..d695ff5 100644 --- a/shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java +++ b/shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java @@ -589,6 +589,16 @@ public class Hadoop20Shims implements HadoopShims { } @Override + public long getDefaultBlockSize(FileSystem fs, Path path) { + return fs.getDefaultBlockSize(); + } + + @Override + public short getDefaultReplication(FileSystem fs, Path path) { + return fs.getDefaultReplication(); + } + + @Override public void closeAllForUGI(UserGroupInformation ugi) { // No such functionality in ancient hadoop return; diff --git a/shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java b/shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java index c2d0310..be8d703 100644 --- a/shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java +++ b/shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java @@ -21,6 +21,8 @@ import java.net.MalformedURLException; import java.net.URL; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.shims.HadoopShimsSecure; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapreduce.Job; @@ -93,4 +95,14 @@ public class Hadoop20SShims extends HadoopShimsSecure { public String getJobLauncherHttpAddress(Configuration conf) { return conf.get("mapred.job.tracker.http.address"); } + + @Override + public long getDefaultBlockSize(FileSystem fs, Path path) { + return fs.getDefaultBlockSize(); + } + + @Override + public short getDefaultReplication(FileSystem fs, Path path) { + return fs.getDefaultReplication(); + } } diff --git a/shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index 413bbd7..6b44e09 100644 --- a/shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -21,6 +21,8 @@ import java.net.MalformedURLException; import java.net.URL; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.shims.HadoopShims.JobTrackerState; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobConf; @@ -111,4 +113,14 @@ public class Hadoop23Shims extends HadoopShimsSecure { return conf.get("yarn.resourcemanager.webapp.address"); } + @Override + public long getDefaultBlockSize(FileSystem fs, Path path) { + return fs.getDefaultBlockSize(path); + } + + @Override + public short getDefaultReplication(FileSystem fs, Path path) { + return fs.getDefaultReplication(path); + } + } diff --git a/shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java index 4bec20a..bf62f08 100644 --- a/shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java +++ b/shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java @@ -553,4 +553,10 @@ public abstract class HadoopShimsSecure implements HadoopShims { @Override abstract public String getJobLauncherRpcAddress(Configuration conf); + + @Override + abstract public short getDefaultReplication(FileSystem fs, Path path); + + @Override + abstract public long getDefaultBlockSize(FileSystem fs, Path path); } diff --git a/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java index 437c94f..1519a6e 100644 --- a/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ b/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -277,6 +277,24 @@ public interface HadoopShims { public String getJobLauncherHttpAddress(Configuration conf); /** + * Get the default block size for the path. FileSystem alone is not sufficient to + * determine the same, as in case of CSMT the underlying file system determines that. + * @param fs + * @param path + * @return + */ + public long getDefaultBlockSize(FileSystem fs, Path path); + + /** + * Get the default replication for a path. In case of CSMT the given path will be used to + * locate the actual filesystem. + * @param fs + * @param path + * @return + */ + public short getDefaultReplication(FileSystem fs, Path path); + + /** * InputSplitShim. * */