Index: hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java =================================================================== --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java (revision 1616005) +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java (working copy) @@ -20,6 +20,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcFile; @@ -37,6 +38,7 @@ * class that allows us to still be as generic as possible * in the main codeflow path, and call attention to the special * cases here. + * * Note : For all methods introduced here, please document why * the special case is necessary, providing a jira number if * possible. @@ -50,6 +52,11 @@ * instantiating a storage handler to write. We set any parameters * we want to be visible to the job in jobProperties, and this will * be available to the job via jobconf at run time. + * + * This is mostly intended to be used by StorageHandlers that wrap + * File-based OutputFormats such as FosterStorageHandler that wraps + * RCFile, ORC, etc. + * * @param jobProperties : map to write to * @param jobInfo : information about this output job to read from * @param ofclass : the output format in use @@ -78,5 +85,26 @@ } } + /** + * Method to do any storage-handler specific special casing while instantiating a + * HCatLoader + * + * @param conf : configuration to write to + * @param tableInfo : the table definition being used + */ + public static void addSpecialCasesParametersForHCatLoader( + Configuration conf, HCatTableInfo tableInfo) { + if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){ + return; + } + String shClass = tableInfo.getStorerInfo().getStorageHandlerClass(); + if ((shClass != null) && shClass.equals("org.apache.hadoop.hive.hbase.HBaseStorageHandler")){ + // NOTE: The reason we use a string name of the hive hbase handler here is + // because we do not want to introduce a compile-dependency on the hive-hbase-handler + // module from within hive-hcatalog. + // This parameter was added due to the requirement in HIVE-7072 + conf.set("pig.noSplitCombination", "true"); + } + } } Index: hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java =================================================================== --- hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java (revision 1616005) +++ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java (working copy) @@ -43,6 +43,7 @@ import org.apache.hive.hcatalog.data.schema.HCatSchema; import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.SpecialCases; import org.apache.pig.Expression; import org.apache.pig.Expression.BinaryExpression; import org.apache.pig.PigException; @@ -125,6 +126,12 @@ Job clone = new Job(job.getConfiguration()); HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString()); + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize( + job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); + + SpecialCases.addSpecialCasesParametersForHCatLoader(job.getConfiguration(), + inputJobInfo.getTableInfo()); + // We will store all the new /changed properties in the job in the // udf context, so the the HCatInputFormat.setInput method need not //be called many times.