diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 550fe5075e..2b88ea651b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -62,6 +62,7 @@ import org.apache.hadoop.mapred.InputFormat; import com.google.common.collect.Lists; +import org.apache.hadoop.mapred.TextInputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -172,8 +173,6 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti if (oneSrc.isDir()) { reparseAndSuperAnalyze(table, fromURI); return null; -/* throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, - "source contains directory: " + oneSrc.getPath().toString()));*/ } } validateAcidFiles(table, srcs, fileSystem); @@ -449,8 +448,9 @@ private void reparseAndSuperAnalyze(Table table, URI fromURI) throws SemanticExc // wipe out partition columns tempTableObj.setPartCols(new ArrayList<>()); - // Set data location + // Set data location and input format, it must be text tempTableObj.setDataLocation(new Path(fromURI)); + tempTableObj.setInputFormatClass(TextInputFormat.class); // Step 2 : create the Insert query StringBuilder rewrittenQueryStr = new StringBuilder(); diff --git a/ql/src/test/queries/clientpositive/load_data_using_job.q b/ql/src/test/queries/clientpositive/load_data_using_job.q index dceca48a93..3928f1fa07 100644 --- a/ql/src/test/queries/clientpositive/load_data_using_job.q +++ b/ql/src/test/queries/clientpositive/load_data_using_job.q @@ -81,3 +81,10 @@ explain load data local inpath '../../data/files/load_data_job/partitions/load_d load data local inpath '../../data/files/load_data_job/partitions/load_data_2_partitions.txt' INTO TABLE srcbucket_mapjoin; select * from srcbucket_mapjoin; drop table srcbucket_mapjoin; + +-- Load into ORC table using text files +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) STORED AS ORC; +explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin; +select * from srcbucket_mapjoin; +drop table srcbucket_mapjoin; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out index 80773103e0..116630c237 100644 --- a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out +++ b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out @@ -2767,3 +2767,211 @@ POSTHOOK: query: drop table srcbucket_mapjoin POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@srcbucket_mapjoin POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: QUERY +POSTHOOK: query: explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin__temp_table_for_load_data__ + Statistics: Num rows: 68 Data size: 24552 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 68 Data size: 24552 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 68 Data size: 24552 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcbucket_mapjoin + Execution mode: vectorized, llap + LLAP IO: no inputs + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcbucket_mapjoin + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin__TEMP_TABLE_FOR_LOAD_DATA__ +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin__TEMP_TABLE_FOR_LOAD_DATA__ +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Lineage: srcbucket_mapjoin PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin__TEMP_TABLE_FOR_LOAD_DATA__)srcbucket_mapjoin__temp_table_for_load_data__.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: srcbucket_mapjoin PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin__TEMP_TABLE_FOR_LOAD_DATA__)srcbucket_mapjoin__temp_table_for_load_data__.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +#### A masked pattern was here #### +165 val_165 2008-04-08 +484 val_484 2008-04-08 +150 val_150 2008-04-08 +224 val_224 2008-04-08 +66 val_66 2008-04-08 +213 val_213 2008-04-08 +374 val_374 2008-04-08 +495 val_495 2008-04-08 +37 val_37 2008-04-08 +327 val_327 2008-04-08 +15 val_15 2008-04-08 +338 val_338 2008-04-08 +459 val_459 2008-04-08 +466 val_466 2008-04-08 +396 val_396 2008-04-08 +309 val_309 2008-04-08 +367 val_367 2008-04-08 +0 val_0 2008-04-08 +455 val_455 2008-04-08 +316 val_316 2008-04-08 +345 val_345 2008-04-08 +129 val_129 2008-04-08 +378 val_378 2008-04-08 +4 val_4 2008-04-08 +356 val_356 2008-04-08 +169 val_169 2008-04-08 +125 val_125 2008-04-08 +437 val_437 2008-04-08 +286 val_286 2008-04-08 +187 val_187 2008-04-08 +176 val_176 2008-04-08 +459 val_459 2008-04-08 +51 val_51 2008-04-08 +103 val_103 2008-04-08 +239 val_239 2008-04-08 +213 val_213 2008-04-08 +176 val_176 2008-04-08 +275 val_275 2008-04-08 +260 val_260 2008-04-08 +404 val_404 2008-04-08 +217 val_217 2008-04-08 +84 val_84 2008-04-08 +466 val_466 2008-04-08 +8 val_8 2008-04-08 +411 val_411 2008-04-08 +172 val_172 2008-04-08 +129 val_129 2008-04-08 +158 val_158 2008-04-08 +0 val_0 2008-04-08 +26 val_26 2008-04-08 +165 val_165 2008-04-08 +327 val_327 2008-04-08 +51 val_51 2008-04-08 +404 val_404 2008-04-08 +95 val_95 2008-04-08 +282 val_282 2008-04-08 +187 val_187 2008-04-08 +316 val_316 2008-04-08 +169 val_169 2008-04-08 +77 val_77 2008-04-08 +0 val_0 2008-04-08 +118 val_118 2008-04-08 +282 val_282 2008-04-08 +419 val_419 2008-04-08 +15 val_15 2008-04-08 +118 val_118 2008-04-08 +19 val_19 2008-04-08 +224 val_224 2008-04-08 +309 val_309 2008-04-08 +389 val_389 2008-04-08 +327 val_327 2008-04-08 +242 val_242 2008-04-08 +392 val_392 2008-04-08 +242 val_242 2008-04-08 +396 val_396 2008-04-08 +95 val_95 2008-04-08 +11 val_11 2008-04-08 +143 val_143 2008-04-08 +228 val_228 2008-04-08 +33 val_33 2008-04-08 +103 val_103 2008-04-08 +367 val_367 2008-04-08 +239 val_239 2008-04-08 +480 val_480 2008-04-08 +202 val_202 2008-04-08 +316 val_316 2008-04-08 +235 val_235 2008-04-08 +80 val_80 2008-04-08 +44 val_44 2008-04-08 +466 val_466 2008-04-08 +257 val_257 2008-04-08 +190 val_190 2008-04-08 +114 val_114 2008-04-08 +396 val_396 2008-04-08 +217 val_217 2008-04-08 +125 val_125 2008-04-08 +187 val_187 2008-04-08 +480 val_480 2008-04-08 +491 val_491 2008-04-08 +305 val_305 2008-04-08 +444 val_444 2008-04-08 +169 val_169 2008-04-08 +323 val_323 2008-04-08 +480 val_480 2008-04-08 +136 val_136 2008-04-08 +172 val_172 2008-04-08 +462 val_462 2008-04-08 +26 val_26 2008-04-08 +462 val_462 2008-04-08 +341 val_341 2008-04-08 +183 val_183 2008-04-08 +84 val_84 2008-04-08 +37 val_37 2008-04-08 +448 val_448 2008-04-08 +194 val_194 2008-04-08 +477 val_477 2008-04-08 +169 val_169 2008-04-08 +400 val_400 2008-04-08 +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin