diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java index cf4ad34..afa982c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; import org.apache.hadoop.hive.serde2.columnar.LazyDecompressionCallback; +import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile.Metadata; @@ -732,7 +733,8 @@ public class RCFile { public Writer(FileSystem fs, Configuration conf, Path name, Progressable progress, Metadata metadata, CompressionCodec codec) throws IOException { this(fs, conf, name, fs.getConf().getInt("io.file.buffer.size", 4096), - fs.getDefaultReplication(), fs.getDefaultBlockSize(), progress, + ShimLoader.getHadoopShims().getDefaultReplication(fs, name), + ShimLoader.getHadoopShims().getDefaultBlockSize(fs, name), progress, metadata, codec); } diff --git a/shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java b/shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java index 02ac55a..d9f530d 100644 --- a/shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java +++ b/shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java @@ -624,4 +624,14 @@ public class Hadoop20Shims implements HadoopShims { public String getJobLauncherHttpAddress(Configuration conf) { return conf.get("mapred.job.tracker.http.address"); } + + @Override + public long getDefaultBlockSize(FileSystem fs, Path path) { + return fs.getDefaultBlockSize(); + } + + @Override + public short getDefaultReplication(FileSystem fs, Path path) { + return fs.getDefaultReplication(); + } } diff --git a/shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java b/shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java index 7d04f57..3385054 100644 --- a/shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java +++ b/shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java @@ -21,6 +21,8 @@ import java.net.MalformedURLException; import java.net.URL; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.shims.HadoopShimsSecure; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobConf; @@ -93,4 +95,14 @@ public class Hadoop20SShims extends HadoopShimsSecure { public String getJobLauncherHttpAddress(Configuration conf) { return conf.get("mapred.job.tracker.http.address"); } + + @Override + public long getDefaultBlockSize(FileSystem fs, Path path) { + return fs.getDefaultBlockSize(); + } + + @Override + public short getDefaultReplication(FileSystem fs, Path path) { + return fs.getDefaultReplication(); + } } diff --git a/shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index fdd16dd..dd149c6 100644 --- a/shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -22,6 +22,8 @@ import java.net.MalformedURLException; import java.net.URL; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.shims.HadoopShims.JobTrackerState; import org.apache.hadoop.hive.shims.HadoopShimsSecure; import org.apache.hadoop.mapred.ClusterStatus; @@ -113,4 +115,15 @@ public class Hadoop23Shims extends HadoopShimsSecure { return conf.get("yarn.resourcemanager.webapp.address"); } + @Override + public long getDefaultBlockSize(FileSystem fs, Path path) { + return fs.getDefaultBlockSize(path); + } + + @Override + public short getDefaultReplication(FileSystem fs, Path path) { + return fs.getDefaultReplication(path); + } + + } diff --git a/shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java index 9959aca..965ac99 100644 --- a/shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java +++ b/shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java @@ -576,4 +576,10 @@ public abstract class HadoopShimsSecure implements HadoopShims { @Override abstract public String getJobLauncherRpcAddress(Configuration conf); + + @Override + abstract public short getDefaultReplication(FileSystem fs, Path path); + + @Override + abstract public long getDefaultBlockSize(FileSystem fs, Path path); } diff --git a/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java index e7d6619..76cb550 100644 --- a/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ b/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -290,6 +290,24 @@ public interface HadoopShims { public String getJobLauncherHttpAddress(Configuration conf); /** + * Get the default block size for the path. FileSystem alone is not sufficient to + * determine the same, as in case of CSMT the underlying file system determines that. + * @param fs + * @param path + * @return + */ + public long getDefaultBlockSize(FileSystem fs, Path path); + + /** + * Get the default replication for a path. In case of CSMT the given path will be used to + * locate the actual filesystem. + * @param fs + * @param path + * @return + */ + public short getDefaultReplication(FileSystem fs, Path path); + + /** * InputSplitShim. * */