diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 85fa9c9..9d2702f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -128,9 +128,11 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti return new URI(fromScheme, fromAuthority, path, null, null); } - private void applyConstraints(URI fromURI, URI toURI, Tree ast, + private FileStatus[] applyConstraintsAndGetFiles(URI fromURI, URI toURI, Tree ast, boolean isLocal) throws SemanticException { + FileStatus[] srcs = null; + // local mode implies that scheme should be "file" // we can change this going forward if (isLocal && !fromURI.getScheme().equals("file")) { @@ -139,7 +141,7 @@ private void applyConstraints(URI fromURI, URI toURI, Tree ast, } try { - FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI)); + srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI)); if (srcs == null || srcs.length == 0) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "No files matching path " + fromURI)); @@ -168,6 +170,8 @@ private void applyConstraints(URI fromURI, URI toURI, Tree ast, + "\"hive.metastore.warehouse.dir\" do not conflict."; throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason)); } + + return srcs; } @Override @@ -227,11 +231,11 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } // make sure the arguments make sense - applyConstraints(fromURI, toURI, fromTree, isLocal); + FileStatus[] files = applyConstraintsAndGetFiles(fromURI, toURI, fromTree, isLocal); // for managed tables, make sure the file formats match if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType())) { - ensureFileFormatsMatch(ts, fromURI); + ensureFileFormatsMatch(ts, files); } inputs.add(toReadEntity(new Path(fromURI))); Task rTask = null; @@ -325,7 +329,7 @@ else if (statTask != null) { } } - private void ensureFileFormatsMatch(TableSpec ts, URI fromURI) throws SemanticException { + private void ensureFileFormatsMatch(TableSpec ts, FileStatus[] fileStatuses) throws SemanticException { final Class destInputFormat; try { if (ts.getPartSpec() == null || ts.getPartSpec().isEmpty()) { @@ -340,17 +344,19 @@ private void ensureFileFormatsMatch(TableSpec ts, URI fromURI) throws SemanticEx // Other file formats should do similar check to make sure file formats match // when doing LOAD DATA .. INTO TABLE if (OrcInputFormat.class.equals(destInputFormat)) { - Path inputFilePath = new Path(fromURI); - try { - FileSystem fs = FileSystem.get(fromURI, conf); - // just creating orc reader is going to do sanity checks to make sure its valid ORC file - OrcFile.createReader(fs, inputFilePath); - } catch (FileFormatException e) { - throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" + - " table is stored as ORC but the file being loaded is not a valid ORC file.")); - } catch (IOException e) { - throw new SemanticException("Unable to load data to destination table." + - " Error: " + e.getMessage()); + for (FileStatus fileStatus : fileStatuses) { + try { + Path filePath = fileStatus.getPath(); + FileSystem fs = FileSystem.get(filePath.toUri(), conf); + // just creating orc reader is going to do sanity checks to make sure its valid ORC file + OrcFile.createReader(fs, filePath); + } catch (FileFormatException e) { + throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" + + " table is stored as ORC but the file being loaded is not a valid ORC file.")); + } catch (IOException e) { + throw new SemanticException("Unable to load data to destination table." + + " Error: " + e.getMessage()); + } } } } diff --git a/ql/src/test/queries/clientnegative/load_orc_negative3.q b/ql/src/test/queries/clientnegative/load_orc_negative3.q new file mode 100644 index 0000000..9a4116e --- /dev/null +++ b/ql/src/test/queries/clientnegative/load_orc_negative3.q @@ -0,0 +1,6 @@ +create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp); +load data local inpath '../../data/files/kv1.txt' into table text_test; + +set hive.default.fileformat=ORC; +create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp); +load data inpath '${hiveconf:hive.metastore.warehouse.dir}/text_test/' into table orc_test; diff --git a/ql/src/test/queries/clientpositive/load_orc_part.q b/ql/src/test/queries/clientpositive/load_orc_part.q index 0927ea4..2ff884d 100644 --- a/ql/src/test/queries/clientpositive/load_orc_part.q +++ b/ql/src/test/queries/clientpositive/load_orc_part.q @@ -9,6 +9,10 @@ load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/orc_split load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10'); dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/; +load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging; +load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/' overwrite into table orc_test partition (ds='10'); +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/; + alter table orc_test add partition(ds='11'); alter table orc_test partition(ds='11') set fileformat textfile; load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11'); diff --git a/ql/src/test/results/clientnegative/load_orc_negative3.q.out b/ql/src/test/results/clientnegative/load_orc_negative3.q.out new file mode 100644 index 0000000..77fb50e --- /dev/null +++ b/ql/src/test/results/clientnegative/load_orc_negative3.q.out @@ -0,0 +1,25 @@ +PREHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@text_test +POSTHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@text_test +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@text_test +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@text_test +PREHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_test +POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_test +FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file. diff --git a/ql/src/test/results/clientpositive/load_orc_part.q.out b/ql/src/test/results/clientpositive/load_orc_part.q.out index 34ca493..2e02c2e 100644 --- a/ql/src/test/results/clientpositive/load_orc_part.q.out +++ b/ql/src/test/results/clientpositive/load_orc_part.q.out @@ -42,6 +42,24 @@ POSTHOOK: type: LOAD POSTHOOK: Output: default@orc_test@ds=10 Found 2 items #### A masked pattern was here #### +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_staging +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_staging +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_test@ds=10 +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_test@ds=10 +Found 1 items +#### A masked pattern was here #### PREHOOK: query: alter table orc_test add partition(ds='11') PREHOOK: type: ALTERTABLE_ADDPARTS PREHOOK: Output: default@orc_test