diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 4fb30bc..4f3d504 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -44,7 +44,6 @@ import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.HivePartitioner; -import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.ql.io.RecordUpdater; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -329,7 +328,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { taskId = Utilities.getTaskId(hconf); initializeSpecPath(); fs = specPath.getFileSystem(hconf); - hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance(); + hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo()); isCompressed = conf.getCompressed(); parent = Utilities.toTempPath(conf.getDirName()); statsCollectRawDataSize = conf.isStatsCollectRawDataSize(); @@ -338,6 +337,11 @@ protected void initializeOp(Configuration hconf) throws HiveException { serializer.initialize(null, conf.getTableInfo().getProperties()); outputClass = serializer.getSerializedClass(); + if (isLogInfoEnabled) { + LOG.info("Using serializer : " + serializer + " and formatter : " + hiveOutputFormat + + (isCompressed ? " with compression" : "")); + } + // Timeout is chosen to make sure that even if one iteration takes more than // half of the script.timeout but less than script.timeout, we will still // be able to report progress. @@ -1046,26 +1050,13 @@ public void augmentPlan() { public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { if (hiveOutputFormat == null) { + Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), job); try { - if (getConf().getTableInfo().getJobProperties() != null) { - //Setting only for Storage Handler - if (getConf().getTableInfo().getJobProperties().get(HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY) != null) { - job.set(HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY,getConf().getTableInfo().getJobProperties().get(HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY)); - hiveOutputFormat = ReflectionUtils.newInstance(conf.getTableInfo().getOutputFileFormatClass(),job); - } - else { - hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance(); - } - } - else { - hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance(); - } + hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(job, getConf().getTableInfo()); } catch (Exception ex) { throw new IOException(ex); } } - Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), job); - if (conf.getTableInfo().isNonNative()) { //check the ouput specs only if it is a storage handler (native tables's outputformats does //not set the job's output properties correctly) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 913288f..368474d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -1841,7 +1841,7 @@ private static void createEmptyBuckets(Configuration hconf, ArrayList pa Serializer serializer = (Serializer) tableInfo.getDeserializerClass().newInstance(); serializer.initialize(null, tableInfo.getProperties()); outputClass = serializer.getSerializedClass(); - hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance(); + hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo()); } catch (SerDeException e) { throw new HiveException(e); } catch (InstantiationException e) { @@ -3309,7 +3309,7 @@ public static double getHighestSamplePercentage (MapWork work) { @SuppressWarnings({"rawtypes", "unchecked"}) private static Path createEmptyFile(Path hiveScratchDir, - Class outFileFormat, JobConf job, + HiveOutputFormat outFileFormat, JobConf job, int sequenceNumber, Properties props, boolean dummyRow) throws IOException, InstantiationException, IllegalAccessException { @@ -3325,7 +3325,7 @@ private static Path createEmptyFile(Path hiveScratchDir, String newFile = newDir + Path.SEPARATOR + "emptyFile"; Path newFilePath = new Path(newFile); - RecordWriter recWriter = outFileFormat.newInstance().getHiveRecordWriter(job, newFilePath, + RecordWriter recWriter = outFileFormat.getHiveRecordWriter(job, newFilePath, Text.class, false, props, null); if (dummyRow) { // empty files are omitted at CombineHiveInputFormat. @@ -3341,7 +3341,7 @@ private static Path createEmptyFile(Path hiveScratchDir, @SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, MapWork work, Path hiveScratchDir, String alias, int sequenceNumber) - throws IOException, InstantiationException, IllegalAccessException { + throws Exception { String strPath = path.toString(); @@ -3351,7 +3351,7 @@ private static Path createDummyFileForEmptyPartition(Path path, JobConf job, Map boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; Properties props = SerDeUtils.createOverlayedProperties( partDesc.getTableDesc().getProperties(), partDesc.getProperties()); - Class outFileFormat = partDesc.getOutputFileFormatClass(); + HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc); if (nonNative) { // if this isn't a hive table we can't create an empty file for it. @@ -3384,12 +3384,12 @@ private static Path createDummyFileForEmptyPartition(Path path, JobConf job, Map @SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, Path hiveScratchDir, String alias, int sequenceNumber) - throws IOException, InstantiationException, IllegalAccessException { + throws Exception { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); Properties props = tableDesc.getProperties(); boolean nonNative = tableDesc.isNonNative(); - Class outFileFormat = tableDesc.getOutputFileFormatClass(); + HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc); if (nonNative) { // if this isn't a hive table we can't create an empty file for it. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java index 5271e91..b07fdcf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java @@ -523,7 +523,7 @@ protected void setupWriter() throws HiveException { tmpFile.deleteOnExit(); // rFile = new RandomAccessFile(tmpFile, "rw"); - HiveOutputFormat hiveOutputFormat = tblDesc.getOutputFileFormatClass().newInstance(); + HiveOutputFormat hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(jc, tblDesc); tempOutPath = new Path(tmpFile.toString()); JobConf localJc = getLocalFSJobConfClone(jc); rw = HiveFileFormatUtils.getRecordWriter(this.jobCloneUsingLocalFs, diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java index 2ac60c0..7d0ca50 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.io; import java.io.IOException; -import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -27,7 +26,9 @@ import java.util.Map.Entry; import java.util.Properties; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -35,7 +36,6 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -67,23 +67,16 @@ static { outputFormatSubstituteMap = - new HashMap, Class>(); + new ConcurrentHashMap, Class>(); HiveFileFormatUtils.registerOutputFormatSubstitute( IgnoreKeyTextOutputFormat.class, HiveIgnoreKeyTextOutputFormat.class); HiveFileFormatUtils.registerOutputFormatSubstitute( SequenceFileOutputFormat.class, HiveSequenceFileOutputFormat.class); } - private static ThreadLocal tRealOutputFormat = new ThreadLocal() { - @Override - protected String initialValue() { - return null; - } - }; - @SuppressWarnings("unchecked") - private static Map, Class> - outputFormatSubstituteMap; + private static Map, Class> + outputFormatSubstituteMap; /** * register a substitute. @@ -93,8 +86,7 @@ protected String initialValue() { * @param substitute */ @SuppressWarnings("unchecked") - public static synchronized void registerOutputFormatSubstitute( - Class origin, + public static void registerOutputFormatSubstitute(Class origin, Class substitute) { outputFormatSubstituteMap.put(origin, substitute); } @@ -103,44 +95,19 @@ public static synchronized void registerOutputFormatSubstitute( * get a OutputFormat's substitute HiveOutputFormat. */ @SuppressWarnings("unchecked") - public static synchronized Class getOutputFormatSubstitute( - Class origin, boolean storagehandlerflag) { - if (HiveOutputFormat.class.isAssignableFrom(origin)) { - return (Class) origin; + public static Class getOutputFormatSubstitute( + Class origin) { + if (origin == null || HiveOutputFormat.class.isAssignableFrom(origin)) { + return (Class) origin; // hive native } - Class result = outputFormatSubstituteMap - .get(origin); - if ((storagehandlerflag == true) && (result == null || result == HivePassThroughOutputFormat.class)) { - HiveFileFormatUtils.setRealOutputFormatClassName(origin.getName()); - result = HivePassThroughOutputFormat.class; + Class substitute = outputFormatSubstituteMap.get(origin); + if (substitute != null) { + return substitute; // substituted } - return result; - } - - /** - * get a RealOutputFormatClassName corresponding to the HivePassThroughOutputFormat - */ - @SuppressWarnings("unchecked") - public static String getRealOutputFormatClassName() - { - return tRealOutputFormat.get(); + return (Class) origin; } /** - * set a RealOutputFormatClassName corresponding to the HivePassThroughOutputFormat - */ - public static void setRealOutputFormatClassName( - String destination) { - if (destination != null){ - tRealOutputFormat.set(destination); - } - else { - return; - } - } - - - /** * get the final output path of a given FileOutputFormat. * * @param parent @@ -279,39 +246,34 @@ public static RecordWriter getHiveRecordWriter(JobConf jc, } public static RecordWriter getRecordWriter(JobConf jc, - HiveOutputFormat hiveOutputFormat, - final Class valueClass, boolean isCompressed, + OutputFormat outputFormat, + Class valueClass, boolean isCompressed, Properties tableProp, Path outPath, Reporter reporter ) throws IOException, HiveException { - if (hiveOutputFormat != null) { - return hiveOutputFormat.getHiveRecordWriter(jc, outPath, valueClass, - isCompressed, tableProp, reporter); + if (!(outputFormat instanceof HiveOutputFormat)) { + outputFormat = new HivePassThroughOutputFormat(outputFormat); } - return null; + return ((HiveOutputFormat)outputFormat).getHiveRecordWriter( + jc, outPath, valueClass, isCompressed, tableProp, reporter); } - private static HiveOutputFormat getHiveOutputFormat(JobConf jc, TableDesc tableInfo) + public static HiveOutputFormat getHiveOutputFormat(Configuration conf, TableDesc tableDesc) throws HiveException { - boolean storagehandlerofhivepassthru = false; - HiveOutputFormat hiveOutputFormat; - try { - if (tableInfo.getJobProperties() != null) { - if (tableInfo.getJobProperties().get( - HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY) != null) { - jc.set(HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY, - tableInfo.getJobProperties() - .get(HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY)); - storagehandlerofhivepassthru = true; - } - } - if (storagehandlerofhivepassthru) { - return ReflectionUtils.newInstance(tableInfo.getOutputFileFormatClass(), jc); - } else { - return tableInfo.getOutputFileFormatClass().newInstance(); - } - } catch (Exception e) { - throw new HiveException(e); + return getHiveOutputFormat(conf, tableDesc.getOutputFileFormatClass()); + } + + public static HiveOutputFormat getHiveOutputFormat(Configuration conf, PartitionDesc partDesc) + throws HiveException { + return getHiveOutputFormat(conf, partDesc.getOutputFileFormatClass()); + } + + private static HiveOutputFormat getHiveOutputFormat( + Configuration conf, Class outputClass) throws HiveException { + OutputFormat outputFormat = ReflectionUtils.newInstance(outputClass, conf); + if (!(outputFormat instanceof HiveOutputFormat)) { + outputFormat = new HivePassThroughOutputFormat(outputFormat); } + return (HiveOutputFormat) outputFormat; } public static RecordUpdater getAcidRecordUpdater(JobConf jc, TableDesc tableInfo, int bucket, diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HivePassThroughOutputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HivePassThroughOutputFormat.java index 04eff93..5855288 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/HivePassThroughOutputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HivePassThroughOutputFormat.java @@ -21,80 +21,35 @@ import java.io.IOException; import java.util.Properties; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.JavaUtils; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.RecordWriter; import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.ReflectionUtils; /** * This pass through class is used to wrap OutputFormat implementations such that new OutputFormats not derived from * HiveOutputFormat gets through the checker */ +public class HivePassThroughOutputFormat implements HiveOutputFormat{ -public class HivePassThroughOutputFormat implements Configurable, HiveOutputFormat{ + private final OutputFormat actualOutputFormat; - private OutputFormat, ? super Writable> actualOutputFormat; - private String actualOutputFormatClass = ""; - private Configuration conf; - private boolean initialized; - public static final String HIVE_PASSTHROUGH_OF_CLASSNAME = - "org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat"; - - public static final String HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY = - "hive.passthrough.storagehandler.of"; - - public HivePassThroughOutputFormat() { - //construct this class through ReflectionUtils from FileSinkOperator - this.actualOutputFormat = null; - this.initialized = false; - } - - private void createActualOF() throws IOException { - Class cls; - try { - int e; - if (actualOutputFormatClass != null) - { - cls = - (Class) Class.forName(actualOutputFormatClass, true, - Utilities.getSessionSpecifiedClassLoader()); - } else { - throw new RuntimeException("Null pointer detected in actualOutputFormatClass"); - } - } catch (ClassNotFoundException e) { - throw new IOException(e); - } - OutputFormat, ? super Writable> actualOF = - ReflectionUtils.newInstance(cls, this.getConf()); - this.actualOutputFormat = actualOF; + public HivePassThroughOutputFormat(OutputFormat outputFormat) { + actualOutputFormat = outputFormat; } @Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { - if (this.initialized == false) { - createActualOF(); - this.initialized = true; - } - this.actualOutputFormat.checkOutputSpecs(ignored, job); + actualOutputFormat.checkOutputSpecs(ignored, job); } @Override public org.apache.hadoop.mapred.RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { - if (this.initialized == false) { - createActualOF(); - this.initialized = true; - } - return (RecordWriter) this.actualOutputFormat.getRecordWriter(ignored, + return (RecordWriter) actualOutputFormat.getRecordWriter(ignored, job, name, progress); } @@ -102,31 +57,12 @@ public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( JobConf jc, Path finalOutPath, Class valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { - if (this.initialized == false) { - createActualOF(); - } - if (this.actualOutputFormat instanceof HiveOutputFormat) { - return ((HiveOutputFormat) this.actualOutputFormat).getHiveRecordWriter(jc, + if (actualOutputFormat instanceof HiveOutputFormat) { + return ((HiveOutputFormat) actualOutputFormat).getHiveRecordWriter(jc, finalOutPath, valueClass, isCompressed, tableProperties, progress); } - else { - FileSystem fs = finalOutPath.getFileSystem(jc); - HivePassThroughRecordWriter hivepassthroughrecordwriter = new HivePassThroughRecordWriter( - this.actualOutputFormat.getRecordWriter(fs, jc, null, progress)); - return hivepassthroughrecordwriter; - } - } - - @Override - public Configuration getConf() { - return conf; - } - - @Override - public void setConf(Configuration config) { - if (config.get(HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY) != null) { - actualOutputFormatClass = config.get(HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY); - } - this.conf = config; + FileSystem fs = finalOutPath.getFileSystem(jc); + RecordWriter recordWriter = actualOutputFormat.getRecordWriter(fs, jc, null, progress); + return new HivePassThroughRecordWriter(recordWriter); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 13277a9..4ea79ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -32,7 +32,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.ProtectMode; import org.apache.hadoop.hive.metastore.Warehouse; @@ -43,9 +42,9 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.transport.TMemoryBuffer; @@ -70,7 +69,7 @@ * These fields are cached. The information comes from tPartition. */ private Deserializer deserializer; - private Class outputFormatClass; + private Class outputFormatClass; private Class inputFormatClass; /** @@ -287,18 +286,15 @@ public void setInputFormatClass(Class inputFormatClass) { public void setOutputFormatClass(Class outputFormatClass) { this.outputFormatClass = outputFormatClass; tPartition.getSd().setOutputFormat(HiveFileFormatUtils - .getOutputFormatSubstitute(outputFormatClass, false).toString()); + .getOutputFormatSubstitute(outputFormatClass).toString()); } final public Class getInputFormatClass() throws HiveException { if (inputFormatClass == null) { - String clsName = null; - if (tPartition != null && tPartition.getSd() != null) { - clsName = tPartition.getSd().getInputFormat(); - } + String clsName = tPartition.getSd().getInputFormat(); if (clsName == null) { - clsName = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName(); + return inputFormatClass = table.getInputFormatClass(); } try { inputFormatClass = ((Class) Class.forName(clsName, true, @@ -310,25 +306,17 @@ public void setOutputFormatClass(Class outputFormatC return inputFormatClass; } - final public Class getOutputFormatClass() + final public Class getOutputFormatClass() throws HiveException { if (outputFormatClass == null) { - String clsName = null; - if (tPartition != null && tPartition.getSd() != null) { - clsName = tPartition.getSd().getOutputFormat(); - } + String clsName = tPartition.getSd().getOutputFormat(); if (clsName == null) { - clsName = HiveSequenceFileOutputFormat.class.getName(); + return outputFormatClass = table.getOutputFormatClass(); } try { - Class c = (Class.forName(clsName, true, - Utilities.getSessionSpecifiedClassLoader())); + Class c = Class.forName(clsName, true, Utilities.getSessionSpecifiedClassLoader()); // Replace FileOutputFormat for backward compatibility - if (!HiveOutputFormat.class.isAssignableFrom(c)) { - outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(c,false); - } else { - outputFormatClass = (Class)c; - } + outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(c); } catch (ClassNotFoundException e) { throw new HiveException("Class not found: " + clsName, e); } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 211ab6c..35d60f8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -49,8 +49,6 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -64,6 +62,7 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.SequenceFileInputFormat; /** @@ -85,10 +84,11 @@ * These fields are all cached fields. The information comes from tTable. */ private Deserializer deserializer; - private Class outputFormatClass; + private Class outputFormatClass; private Class inputFormatClass; private Path path; - private HiveStorageHandler storageHandler; + + private transient HiveStorageHandler storageHandler; /** * Used only for serialization. @@ -222,7 +222,7 @@ public void setInputFormatClass(Class inputFormatClass) { tTable.getSd().setInputFormat(inputFormatClass.getName()); } - public void setOutputFormatClass(Class outputFormatClass) { + public void setOutputFormatClass(Class outputFormatClass) { this.outputFormatClass = outputFormatClass; tTable.getSd().setOutputFormat(outputFormatClass.getName()); } @@ -279,7 +279,7 @@ final public Deserializer getDeserializerFromMetaStore(boolean skipConfError) { } public HiveStorageHandler getStorageHandler() { - if (storageHandler != null) { + if (storageHandler != null || !isNonNative()) { return storageHandler; } try { @@ -313,9 +313,7 @@ public HiveStorageHandler getStorageHandler() { return inputFormatClass; } - final public Class getOutputFormatClass() { - // Replace FileOutputFormat for backward compatibility - boolean storagehandler = false; + final public Class getOutputFormatClass() { if (outputFormatClass == null) { try { String className = tTable.getSd().getOutputFormat(); @@ -326,34 +324,10 @@ public HiveStorageHandler getStorageHandler() { } c = getStorageHandler().getOutputFormatClass(); } else { - // if HivePassThroughOutputFormat - if (className.equals( - HivePassThroughOutputFormat.HIVE_PASSTHROUGH_OF_CLASSNAME)) { - if (getStorageHandler() != null) { - // get the storage handler real output format class - c = getStorageHandler().getOutputFormatClass(); - } - else { - //should not happen - return null; - } - } - else { - c = Class.forName(className, true, - Utilities.getSessionSpecifiedClassLoader()); - } - } - if (!HiveOutputFormat.class.isAssignableFrom(c)) { - if (getStorageHandler() != null) { - storagehandler = true; - } - else { - storagehandler = false; - } - outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(c,storagehandler); - } else { - outputFormatClass = (Class)c; + c = Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); } + // Replace FileOutputFormat for backward compatibility + outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(c); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } @@ -708,8 +682,7 @@ public void setOutputFormatClass(String name) throws HiveException { } try { Class origin = Class.forName(name, true, Utilities.getSessionSpecifiedClassLoader()); - setOutputFormatClass(HiveFileFormatUtils - .getOutputFormatSubstitute(origin,false)); + setOutputFormatClass(HiveFileFormatUtils.getOutputFormatSubstitute(origin)); } catch (ClassNotFoundException e) { throw new HiveException("Class not found: " + name, e); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 4891518..eda7984 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -36,7 +36,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; @@ -51,7 +50,6 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Table; @@ -63,6 +61,7 @@ import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.mapred.OutputFormat; /** * ImportSemanticAnalyzer. @@ -462,8 +461,8 @@ private static void checkTable(Table table, CreateTableDesc tableDesc) */ try { Class origin = Class.forName(importedofc, true, Utilities.getSessionSpecifiedClassLoader()); - Class replaced = HiveFileFormatUtils - .getOutputFormatSubstitute(origin,false); + Class replaced = HiveFileFormatUtils + .getOutputFormatSubstitute(origin); if (replaced == null) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE .getMsg()); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 016a6d8..9097529 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -20,7 +20,6 @@ import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; @@ -217,6 +216,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; import org.eigenbase.rel.AggregateCall; import org.eigenbase.rel.AggregateRelBase; import org.eigenbase.rel.Aggregation; @@ -1736,7 +1736,8 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException } Class outputFormatClass = ts.tableHandle.getOutputFormatClass(); - if (!HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) { + if (!ts.tableHandle.isNonNative() && + !HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE .getMsg(ast, "The class is " + outputFormatClass.toString())); } @@ -12455,7 +12456,7 @@ private boolean isAcidTable(Table tab) { return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true"); } - private boolean isAcidOutputFormat(Class of) { + private boolean isAcidOutputFormat(Class of) { Class[] interfaces = of.getInterfaces(); for (Class iface : interfaces) { if (iface.equals(AcidOutputFormat.class)) { @@ -12473,7 +12474,7 @@ private boolean isAcidOutputFormat(Class of) { AcidUtils.Operation.INSERT); } - private AcidUtils.Operation getAcidType(Class of) { + private AcidUtils.Operation getAcidType(Class of) { if (SessionState.get() == null || !SessionState.get().getTxnMgr().supportsAcid()) { return AcidUtils.Operation.NOT_ACID; } else if (isAcidOutputFormat(of)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java index deba198..8cadb96 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java @@ -26,7 +26,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; @@ -40,6 +39,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.mapred.OutputFormat; /** * CreateTableDesc. @@ -420,14 +420,14 @@ public void validate(HiveConf conf) try { Class origin = Class.forName(this.getOutputFormat(), true, Utilities.getSessionSpecifiedClassLoader()); - Class replaced = HiveFileFormatUtils - .getOutputFormatSubstitute(origin,false); - if (replaced == null) { + Class replaced = HiveFileFormatUtils + .getOutputFormatSubstitute(origin); + if (!HiveOutputFormat.class.isAssignableFrom(replaced)) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE .getMsg()); } } catch (ClassNotFoundException e) { - throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); + throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg(), e); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 10c38d9..b77add4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.util.ReflectionUtils; /** @@ -58,7 +59,7 @@ private TableDesc tableDesc; private LinkedHashMap partSpec; private Class inputFileFormatClass; - private Class outputFileFormatClass; + private Class outputFileFormatClass; private Properties properties; private String baseFileName; @@ -148,7 +149,7 @@ public void setInputFileFormatClass( } } - public Class getOutputFileFormatClass() { + public Class getOutputFileFormatClass() { if (outputFileFormatClass == null && tableDesc != null) { setOutputFileFormatClass(tableDesc.getOutputFileFormatClass()); } @@ -156,8 +157,8 @@ public void setInputFileFormatClass( } public void setOutputFileFormatClass(final Class outputFileFormatClass) { - Class outputClass = outputFileFormatClass == null ? null : - HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass,false); + Class outputClass = outputFileFormatClass == null ? null : + HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass); if (outputClass != null) { this.outputFileFormatClass = (Class) CLASS_INTERNER.intern(outputClass); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index fdc1f62..ae99bd3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -42,9 +42,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; @@ -827,13 +825,6 @@ private static void configureJobPropertiesForStorageHandler(boolean input, "using configureTableJobProperties",e); storageHandler.configureTableJobProperties(tableDesc, jobProperties); } - if (tableDesc.getOutputFileFormatClass().getName() - == HivePassThroughOutputFormat.HIVE_PASSTHROUGH_OF_CLASSNAME) { - // get the real output format when we register this for the table - jobProperties.put( - HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY, - HiveFileFormatUtils.getRealOutputFormatClassName()); - } } // Job properties are only relevant for non-native tables, so // for native tables, leave it null to avoid cluttering up diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index 408ddf5..374e8b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -28,12 +28,11 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; /** * TableDesc. @@ -42,7 +41,7 @@ public class TableDesc implements Serializable, Cloneable { private static final long serialVersionUID = 1L; private Class inputFileFormatClass; - private Class outputFileFormatClass; + private Class outputFileFormatClass; private java.util.Properties properties; private Map jobProperties; @@ -59,7 +58,7 @@ public TableDesc( final Class outputFormatClass, final Properties properties) { this.inputFileFormatClass = inputFormatClass; outputFileFormatClass = HiveFileFormatUtils - .getOutputFormatSubstitute(outputFormatClass, false); + .getOutputFormatSubstitute(outputFormatClass); this.properties = properties; } @@ -94,13 +93,13 @@ public void setInputFileFormatClass( this.inputFileFormatClass = inputFileFormatClass; } - public Class getOutputFileFormatClass() { + public Class getOutputFileFormatClass() { return outputFileFormatClass; } - public void setOutputFileFormatClass(final Class outputFileFormatClass) { + public void setOutputFileFormatClass(Class outputFileFormatClass) { this.outputFileFormatClass = HiveFileFormatUtils - .getOutputFormatSubstitute(outputFileFormatClass, false); + .getOutputFormatSubstitute(outputFileFormatClass); } @Explain(displayName = "properties", normalExplain = false) @@ -142,12 +141,7 @@ public String getInputFileFormatClassName() { @Explain(displayName = "output format") public String getOutputFileFormatClassName() { - if (getOutputFileFormatClass().getName() == HivePassThroughOutputFormat.HIVE_PASSTHROUGH_OF_CLASSNAME) { - return HiveFileFormatUtils.getRealOutputFormatClassName(); - } - else { - return getOutputFileFormatClass().getName(); - } + return getOutputFileFormatClass().getName(); } public boolean isNonNative() {