diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java index 0c1fa23..3146cb4 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java @@ -20,15 +20,13 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.io.RCFile; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.mapred.OutputFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; import java.util.Map; /** @@ -40,6 +38,7 @@ * class that allows us to still be as generic as possible * in the main codeflow path, and call attention to the special * cases here. + * * Note : For all methods introduced here, please document why * the special case is necessary, providing a jira number if * possible. @@ -53,6 +52,11 @@ * instantiating a storage handler to write. We set any parameters * we want to be visible to the job in jobProperties, and this will * be available to the job via jobconf at run time. + * + * This is mostly intended to be used by StorageHandlers that wrap + * File-based OutputFormats such as FosterStorageHandler that wraps + * RCFile, ORC, etc. + * * @param jobProperties : map to write to * @param jobInfo : information about this output job to read from * @param ofclass : the output format in use @@ -81,5 +85,26 @@ public static void addSpecialCasesParametersToOutputJobProperties( } } + /** + * Method to do any storage-handler specific special casing while instantiating a + * HCatLoader + * + * @param conf : configuration to write to + * @param tableInfo : the table definition being used + */ + public static void addSpecialCasesParametersForHCatLoader( + Configuration conf, HCatTableInfo tableInfo) { + if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){ + return; + } + String shClass = tableInfo.getStorerInfo().getStorageHandlerClass(); + if ((shClass != null) && shClass.equals("org.apache.hadoop.hive.hbase.HBaseStorageHandler")){ + // NOTE: The reason we use a string name of the hive hbase handler here is + // because we do not want to introduce a compile-dependency on the hive-hbase-handler + // module from within hive-hcatalog. + // This parameter was added due to the requirement in HIVE-7072 + conf.set("pig.noSplitCombination", "true"); + } + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java index f272b15..4de51f3 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java @@ -43,6 +43,7 @@ import org.apache.hive.hcatalog.data.schema.HCatSchema; import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.SpecialCases; import org.apache.pig.Expression; import org.apache.pig.Expression.BinaryExpression; import org.apache.pig.PigException; @@ -126,6 +127,12 @@ public void setLocation(String location, Job job) throws IOException { Job clone = new Job(job.getConfiguration()); HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString()); + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize( + job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); + + SpecialCases.addSpecialCasesParametersForHCatLoader(job.getConfiguration(), + inputJobInfo.getTableInfo()); + // We will store all the new /changed properties in the job in the // udf context, so the the HCatInputFormat.setInput method need not //be called many times.