diff --git a/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 new file mode 100644 index 0000000000000000000000000000000000000000..020bdcc239ce40849ba3b69557448df173de8a92 GIT binary patch literal 501 zcmV;07e022LJ#-0RRU8073x32LJ#=Q$-B`0A2wP!VkcN001q^!3uyd3N{9g^gn$w}T+GhFu0kL}f(IzZ57f%Z raX?<+ki0~beE@^Q1U~=>jsXw>7=VBfA_4#mC{Zv5g7gCePf|k|Jt#UN(#1I^^* zI3h1_L|)>Ed;kMG1A`xf2sZG3X6C^ z42Qq77@{vO)j+{|CI%THP(Mi*1%+@+OX;M^x($S!qvm%xpIdYKa$eVyOIR`S1 z9y%y-AmQlYgBnMUDJm%_DKb6jEHu#KEZA73@@z>Mm!k%!-hv$h0dv~BZ(806<#6%X zz()); + // Reset table params, we only need bucketing version to perform the task + Map params = new HashMap<>(); + params.put(hive_metastoreConstants.TABLE_BUCKETING_VERSION, Integer.toString(table.getBucketingVersion())); + tempTableObj.setParameters(params); + // Set data location and input format, it must be text tempTableObj.setDataLocation(new Path(fromURI)); if (inputFormatClassName != null && serDeClassName != null) { diff --git a/ql/src/test/queries/clientpositive/load_data_using_job.q b/ql/src/test/queries/clientpositive/load_data_using_job.q index b760d9bc7e..970a7521f5 100644 --- a/ql/src/test/queries/clientpositive/load_data_using_job.q +++ b/ql/src/test/queries/clientpositive/load_data_using_job.q @@ -91,4 +91,20 @@ load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; select * from srcbucket_mapjoin_n8; -drop table srcbucket_mapjoin_n8; \ No newline at end of file +drop table srcbucket_mapjoin_n8; + +-- Load into ACID table using ORC files +set hive.mapred.mode=nonstrict; +set hive.optimize.ppd=true; +set hive.optimize.index.filter=true; +set hive.tez.bucket.pruning=true; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true'); +explain load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn; +load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn; + +select * from orc_test_txn; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out index 21fd9334ea..58db2c57de 100644 --- a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out +++ b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out @@ -960,16 +960,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_mapjoin_n8__temp_table_for_load_data__ - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -979,10 +979,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2983,3 +2983,103 @@ POSTHOOK: query: drop table srcbucket_mapjoin_n8 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@srcbucket_mapjoin_n8 POSTHOOK: Output: default@srcbucket_mapjoin_n8 +PREHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_test_txn +POSTHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_test_txn +#### A masked pattern was here #### +PREHOOK: type: QUERY +#### A masked pattern was here #### +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_test_txn__temp_table_for_load_data__ + Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), name (type: string), dept (type: string), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_test_txn + Write Type: INSERT + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + year + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_test_txn + Write Type: INSERT + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_test_txn__temp_table_for_load_data__ +PREHOOK: Output: default@orc_test_txn +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_test_txn__temp_table_for_load_data__ +POSTHOOK: Output: default@orc_test_txn@year=2016 +POSTHOOK: Output: default@orc_test_txn@year=2017 +POSTHOOK: Output: default@orc_test_txn@year=2018 +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: select * from orc_test_txn +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_test_txn +PREHOOK: Input: default@orc_test_txn@year=2016 +PREHOOK: Input: default@orc_test_txn@year=2017 +PREHOOK: Input: default@orc_test_txn@year=2018 +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_test_txn +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_test_txn +POSTHOOK: Input: default@orc_test_txn@year=2016 +POSTHOOK: Input: default@orc_test_txn@year=2017 +POSTHOOK: Input: default@orc_test_txn@year=2018 +#### A masked pattern was here #### +9 Harris CSE 2017 +8 Henry CSE 2016 +10 Haley CSE 2018