diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 920bb1c..66ba0ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -294,7 +294,10 @@ public int execute(DriverContext driverContext) { throw new HiveException( "addFiles: filesystem error in check phase", e); } - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVECHECKFILEFORMAT)) { + + // handle file format check for table level + if (tbd.getPartitionSpec().isEmpty() && + HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVECHECKFILEFORMAT)) { // Check if the file format of the file matches that of the table. boolean flag = HiveFileFormatUtils.checkInputFormat( srcFs, conf, tbd.getTable().getInputFileFormatClass(), files); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java index 06d3df7..a82e5e6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java @@ -44,7 +44,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -121,26 +121,6 @@ public static void registerOutputFormatSubstitute(Class origin, return (Class) origin; } - /** - * get the final output path of a given FileOutputFormat. - * - * @param parent - * parent dir of the expected final output path - * @param jc - * job configuration - * @deprecated - */ - @Deprecated - public static Path getOutputFormatFinalPath(Path parent, String taskId, JobConf jc, - HiveOutputFormat hiveOutputFormat, boolean isCompressed, - Path defaultFinalPath) throws IOException { - if (hiveOutputFormat instanceof HiveIgnoreKeyTextOutputFormat) { - return new Path(parent, taskId - + Utilities.getFileExtension(jc, isCompressed)); - } - return defaultFinalPath; - } - static { inputFormatCheckerMap = new HashMap, Class>(); @@ -148,6 +128,8 @@ public static Path getOutputFormatFinalPath(Path parent, String taskId, JobConf SequenceFileInputFormat.class, SequenceFileInputFormatChecker.class); HiveFileFormatUtils.registerInputFormatChecker(RCFileInputFormat.class, RCFileInputFormat.class); + HiveFileFormatUtils.registerInputFormatChecker(OrcInputFormat.class, + OrcInputFormat.class); inputFormatCheckerInstanceCache = new HashMap, InputFormatChecker>(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index c488029..7b6ab0b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -41,9 +41,7 @@ import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import org.apache.hadoop.hive.ql.io.FileFormatException; -import org.apache.hadoop.hive.ql.io.orc.OrcFile; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -52,6 +50,8 @@ import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.mapred.InputFormat; +import com.google.common.collect.Lists; + /** * LoadSemanticAnalyzer. * @@ -128,7 +128,7 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti return new URI(fromScheme, fromAuthority, path, null, null); } - private FileStatus[] applyConstraintsAndGetFiles(URI fromURI, URI toURI, Tree ast, + private List applyConstraintsAndGetFiles(URI fromURI, Tree ast, boolean isLocal) throws SemanticException { FileStatus[] srcs = null; @@ -159,7 +159,7 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e); } - return srcs; + return Lists.newArrayList(srcs); } @Override @@ -209,9 +209,6 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg()); } - URI toURI = ((ts.partHandle != null) ? ts.partHandle.getDataLocation() - : ts.tableHandle.getDataLocation()).toUri(); - List parts = ts.tableHandle.getPartitionKeys(); if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) { @@ -219,11 +216,11 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } // make sure the arguments make sense - FileStatus[] files = applyConstraintsAndGetFiles(fromURI, toURI, fromTree, isLocal); + List files = applyConstraintsAndGetFiles(fromURI, fromTree, isLocal); // for managed tables, make sure the file formats match if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType())) { - ensureFileFormatsMatch(ts, files); + ensureFileFormatsMatch(ts, files, fromURI); } inputs.add(toReadEntity(new Path(fromURI))); Task rTask = null; @@ -317,7 +314,9 @@ else if (statTask != null) { } } - private void ensureFileFormatsMatch(TableSpec ts, FileStatus[] fileStatuses) throws SemanticException { + private void ensureFileFormatsMatch(TableSpec ts, List fileStatuses, + final URI fromURI) + throws SemanticException { final Class destInputFormat; try { if (ts.getPartSpec() == null || ts.getPartSpec().isEmpty()) { @@ -329,23 +328,16 @@ private void ensureFileFormatsMatch(TableSpec ts, FileStatus[] fileStatuses) thr throw new SemanticException(e); } - // Other file formats should do similar check to make sure file formats match - // when doing LOAD DATA .. INTO TABLE - if (OrcInputFormat.class.equals(destInputFormat)) { - for (FileStatus fileStatus : fileStatuses) { - try { - Path filePath = fileStatus.getPath(); - FileSystem fs = FileSystem.get(filePath.toUri(), conf); - // just creating orc reader is going to do sanity checks to make sure its valid ORC file - OrcFile.createReader(fs, filePath); - } catch (FileFormatException e) { - throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" + - " table is stored as ORC but the file being loaded is not a valid ORC file.")); - } catch (IOException e) { - throw new SemanticException("Unable to load data to destination table." + - " Error: " + e.getMessage()); - } + try { + FileSystem fs = FileSystem.get(fromURI, conf); + boolean validFormat = HiveFileFormatUtils.checkInputFormat(fs, conf, destInputFormat, + fileStatuses); + if (!validFormat) { + throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg()); } + } catch (Exception e) { + throw new SemanticException("Unable to load data to destination table." + + " Error: " + e.getMessage()); } } } diff --git a/ql/src/test/results/clientnegative/load_orc_negative1.q.out b/ql/src/test/results/clientnegative/load_orc_negative1.q.out index ca15a30..d103546 100644 --- a/ql/src/test/results/clientnegative/load_orc_negative1.q.out +++ b/ql/src/test/results/clientnegative/load_orc_negative1.q.out @@ -6,4 +6,4 @@ POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype d POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@orc_test -FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file. +FAILED: SemanticException Unable to load data to destination table. Error: The file that you are trying to load does not match the file format of the destination table. diff --git a/ql/src/test/results/clientnegative/load_orc_negative2.q.out b/ql/src/test/results/clientnegative/load_orc_negative2.q.out index 77fb50e..9b0cb69 100644 --- a/ql/src/test/results/clientnegative/load_orc_negative2.q.out +++ b/ql/src/test/results/clientnegative/load_orc_negative2.q.out @@ -22,4 +22,4 @@ POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype d POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@orc_test -FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file. +FAILED: SemanticException Unable to load data to destination table. Error: The file that you are trying to load does not match the file format of the destination table. diff --git a/ql/src/test/results/clientnegative/load_orc_negative3.q.out b/ql/src/test/results/clientnegative/load_orc_negative3.q.out index 77fb50e..9b0cb69 100644 --- a/ql/src/test/results/clientnegative/load_orc_negative3.q.out +++ b/ql/src/test/results/clientnegative/load_orc_negative3.q.out @@ -22,4 +22,4 @@ POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype d POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@orc_test -FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file. +FAILED: SemanticException Unable to load data to destination table. Error: The file that you are trying to load does not match the file format of the destination table. diff --git a/ql/src/test/results/clientnegative/load_orc_negative_part.q.out b/ql/src/test/results/clientnegative/load_orc_negative_part.q.out index 32dd627..2e8068d 100644 --- a/ql/src/test/results/clientnegative/load_orc_negative_part.q.out +++ b/ql/src/test/results/clientnegative/load_orc_negative_part.q.out @@ -49,4 +49,4 @@ POSTHOOK: query: alter table orc_test add partition(ds='11') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Output: default@orc_test POSTHOOK: Output: default@orc_test@ds=11 -FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file. +FAILED: SemanticException Unable to load data to destination table. Error: The file that you are trying to load does not match the file format of the destination table.