diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 8c948a98cc98d7997c86310d0fe32c61f72cc744..a84123d08391ff73ab6ea00877ea3fae6848b772 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1400,7 +1400,7 @@ public Partition loadPartition(Path loadPath, Table tbl, } else { newFiles = new ArrayList(); FileSystem fs = tbl.getDataLocation().getFileSystem(conf); - Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles); + Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles, tbl); } boolean forceCreate = (!holdDDLTime) ? true : false; @@ -1642,7 +1642,7 @@ public void loadTable(Path loadPath, String tableName, boolean replace, FileSystem fs; try { fs = tbl.getDataLocation().getFileSystem(sessionConf); - copyFiles(sessionConf, loadPath, tbl.getPath(), fs, isSrcLocal, isAcid, newFiles); + copyFiles(sessionConf, loadPath, tbl.getPath(), fs, isSrcLocal, isAcid, newFiles, tbl); } catch (IOException e) { throw new HiveException("addFiles: filesystem error in check phase", e); } @@ -2665,7 +2665,7 @@ public static boolean moveFile(HiveConf conf, Path srcf, Path destf, * @throws HiveException */ static protected void copyFiles(HiveConf conf, Path srcf, Path destf, - FileSystem fs, boolean isSrcLocal, boolean isAcid, List newFiles) throws HiveException { + FileSystem fs, boolean isSrcLocal, boolean isAcid, List newFiles, Table tlb) throws HiveException { boolean inheritPerms = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS); try { @@ -2701,10 +2701,18 @@ static protected void copyFiles(HiveConf conf, Path srcf, Path destf, } else { // check that source and target paths exist List> result = checkPaths(conf, fs, srcs, srcFs, destf, false); + boolean isEnforceBucketing = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEENFORCEBUCKETING) + && tlb.getNumBuckets() > 0; // move it, move it try { for (List sdpairs : result) { for (Path[] sdpair : sdpairs) { + if(isEnforceBucketing && sdpair[0]!=null && sdpair[1] != null && (!sdpair[0].getName().equalsIgnoreCase(sdpair[1].getName()))) { + LOG.error(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE. + getMsg("Table: " + tlb.getTableName())); + throw new HiveException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE. + getMsg("Table: " + tlb.getTableName())); + } if (!moveFile(conf, sdpair[0], sdpair[1], fs, false, isSrcLocal)) { throw new IOException("Cannot move " + sdpair[0] + " to " + sdpair[1]); diff --git a/ql/src/test/queries/clientnegative/insertinto_nonemptybucket.q b/ql/src/test/queries/clientnegative/insertinto_nonemptybucket.q new file mode 100644 index 0000000000000000000000000000000000000000..f6364f1b133df7bb2652057f36237a2c0623feb7 --- /dev/null +++ b/ql/src/test/queries/clientnegative/insertinto_nonemptybucket.q @@ -0,0 +1,8 @@ +drop table if exists buckettest1; +create table if not exists buckettest1 (data int) partitioned by (state string) clustered by (data) into 2 buckets; +set hive.enforce.bucketing = true; +set hive.enforce.sorting=true; +set hive.exec.dynamic.partition = true; +set hive.exec.dynamic.partition.mode = nonstrict; +insert into table buckettest1 partition(state) select key, 'MA' from src where key < 100; +insert into table buckettest1 partition(state) select key, 'MA' from src where key > 100 and key < 200; diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q b/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q index 78816ae94cd903a1ea966878021f5480b52b8a24..ac7ae0a2b9cd1f42e296ad89cfc6ca43e50d3d22 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q @@ -68,6 +68,27 @@ insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i, insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27; insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27; +drop table over1k_part_buck_orc; +drop table over1k_part_buck_sort_orc; + +create table over1k_part_buck_orc( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) into 4 buckets stored as orc; + +create table over1k_part_buck_sort_orc( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) + sorted by (f) into 4 buckets stored as orc; + + set hive.enforce.bucketing=true; set hive.enforce.sorting=true; diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q index e4595838d58c7849dadb889b1a8e8cf83dc1c20b..7741cfaa051e799ebf41314a39997ce34a7b6690 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q @@ -62,6 +62,25 @@ insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f, insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27; insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27; +drop table over1k_part_buck; +drop table over1k_part_buck_sort; +create table over1k_part_buck( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) into 4 buckets; + +create table over1k_part_buck_sort( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) + sorted by (f) into 4 buckets; + set hive.enforce.bucketing=true; set hive.enforce.sorting=true; diff --git a/ql/src/test/queries/clientpositive/insert_into_with_schema2.q b/ql/src/test/queries/clientpositive/insert_into_with_schema2.q index a5352ec2f1d51cf0a2f863656d0b168c8487b6b4..38ab1d30b53d25a71eb150ef2e614dfc0bf9b221 100644 --- a/ql/src/test/queries/clientpositive/insert_into_with_schema2.q +++ b/ql/src/test/queries/clientpositive/insert_into_with_schema2.q @@ -12,12 +12,26 @@ insert into student_acid(age) select * from studenttab10k; select * from student_acid; +drop table student_acid; +create table student_acid (age int, grade int) + clustered by (age) into 1 buckets; + insert into student_acid(grade, age) select 3 g, * from studenttab10k; select * from student_acid; +drop table student_acid; +create table student_acid (age int, grade int) + clustered by (age) into 1 buckets; + insert into student_acid(grade, age) values(20, 2); +select * from student_acid; + +drop table student_acid; +create table student_acid (age int, grade int) + clustered by (age) into 1 buckets; + insert into student_acid(age) values(22); select * from student_acid; diff --git a/ql/src/test/results/clientnegative/insertinto_nonemptybucket.q.out b/ql/src/test/results/clientnegative/insertinto_nonemptybucket.q.out new file mode 100644 index 0000000000000000000000000000000000000000..a0b605583b170b36659f5f5cea4ae33cbb30da15 --- /dev/null +++ b/ql/src/test/results/clientnegative/insertinto_nonemptybucket.q.out @@ -0,0 +1,27 @@ +PREHOOK: query: drop table if exists buckettest1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists buckettest1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists buckettest1 (data int) partitioned by (state string) clustered by (data) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@buckettest1 +POSTHOOK: query: create table if not exists buckettest1 (data int) partitioned by (state string) clustered by (data) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@buckettest1 +PREHOOK: query: insert into table buckettest1 partition(state) select key, 'MA' from src where key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@buckettest1 +POSTHOOK: query: insert into table buckettest1 partition(state) select key, 'MA' from src where key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@buckettest1@state=MA +POSTHOOK: Lineage: buckettest1 PARTITION(state=MA).data EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table buckettest1 partition(state) select key, 'MA' from src where key > 100 and key < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@buckettest1 +Failed with exception Bucketized tables do not support INSERT INTO: Table: buckettest1 +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask diff --git a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out index d000a4c9fac4173d146446acd15ed4385214c8a4..acd7d4cd0d808711ccfa1386fc115e6726e13499 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out @@ -508,6 +508,64 @@ POSTHOOK: Lineage: over1k_part_buck_sort_orc PARTITION(t=__HIVE_DEFAULT_PARTITIO POSTHOOK: Lineage: over1k_part_buck_sort_orc PARTITION(t=__HIVE_DEFAULT_PARTITION__).f SIMPLE [(over1k_orc)over1k_orc.FieldSchema(name:f, type:float, comment:null), ] POSTHOOK: Lineage: over1k_part_buck_sort_orc PARTITION(t=__HIVE_DEFAULT_PARTITION__).i SIMPLE [(over1k_orc)over1k_orc.FieldSchema(name:i, type:int, comment:null), ] POSTHOOK: Lineage: over1k_part_buck_sort_orc PARTITION(t=__HIVE_DEFAULT_PARTITION__).si SIMPLE [(over1k_orc)over1k_orc.FieldSchema(name:si, type:smallint, comment:null), ] +PREHOOK: query: drop table over1k_part_buck_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@over1k_part_buck_orc +PREHOOK: Output: default@over1k_part_buck_orc +POSTHOOK: query: drop table over1k_part_buck_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@over1k_part_buck_orc +POSTHOOK: Output: default@over1k_part_buck_orc +PREHOOK: query: drop table over1k_part_buck_sort_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@over1k_part_buck_sort_orc +PREHOOK: Output: default@over1k_part_buck_sort_orc +POSTHOOK: query: drop table over1k_part_buck_sort_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@over1k_part_buck_sort_orc +POSTHOOK: Output: default@over1k_part_buck_sort_orc +PREHOOK: query: create table over1k_part_buck_orc( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) into 4 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part_buck_orc +POSTHOOK: query: create table over1k_part_buck_orc( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) into 4 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part_buck_orc +PREHOOK: query: create table over1k_part_buck_sort_orc( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) + sorted by (f) into 4 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part_buck_sort_orc +POSTHOOK: query: create table over1k_part_buck_sort_orc( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) + sorted by (f) into 4 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part_buck_sort_orc PREHOOK: query: -- map-reduce jobs modified by hive.optimize.sort.dynamic.partition optimization explain insert into table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si PREHOOK: type: QUERY @@ -1068,10 +1126,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 32 - rawDataSize 640 - totalSize 4644 + numFiles 4 + numRows 16 + rawDataSize 320 + totalSize 2322 #### A masked pattern was here #### # Storage Information @@ -1111,10 +1169,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 6 - rawDataSize 120 - totalSize 2262 + numFiles 4 + numRows 3 + rawDataSize 60 + totalSize 1131 #### A masked pattern was here #### # Storage Information @@ -1154,10 +1212,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 32 - rawDataSize 640 - totalSize 4630 + numFiles 4 + numRows 16 + rawDataSize 320 + totalSize 2315 #### A masked pattern was here #### # Storage Information @@ -1197,10 +1255,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 6 - rawDataSize 120 - totalSize 2262 + numFiles 4 + numRows 3 + rawDataSize 60 + totalSize 1131 #### A masked pattern was here #### # Storage Information @@ -1251,7 +1309,7 @@ POSTHOOK: Input: default@over1k_part_buck_orc POSTHOOK: Input: default@over1k_part_buck_orc@t=27 POSTHOOK: Input: default@over1k_part_buck_orc@t=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### -38 +19 PREHOOK: query: select count(*) from over1k_part_buck_sort_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort_orc @@ -1264,7 +1322,7 @@ POSTHOOK: Input: default@over1k_part_buck_sort_orc POSTHOOK: Input: default@over1k_part_buck_sort_orc@t=27 POSTHOOK: Input: default@over1k_part_buck_sort_orc@t=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### -38 +19 PREHOOK: query: -- tests for HIVE-6883 create table over1k_part2_orc( si smallint, diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out index 9e947bbe2589ea15f25d7f6058afee2b23788dfe..00fc5827760f9db835c02f7ceb7d55588e4ddf91 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out @@ -439,6 +439,64 @@ POSTHOOK: Lineage: over1k_part_buck_sort PARTITION(t=__HIVE_DEFAULT_PARTITION__) POSTHOOK: Lineage: over1k_part_buck_sort PARTITION(t=__HIVE_DEFAULT_PARTITION__).f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] POSTHOOK: Lineage: over1k_part_buck_sort PARTITION(t=__HIVE_DEFAULT_PARTITION__).i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] POSTHOOK: Lineage: over1k_part_buck_sort PARTITION(t=__HIVE_DEFAULT_PARTITION__).si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] +PREHOOK: query: drop table over1k_part_buck +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@over1k_part_buck +PREHOOK: Output: default@over1k_part_buck +POSTHOOK: query: drop table over1k_part_buck +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@over1k_part_buck +POSTHOOK: Output: default@over1k_part_buck +PREHOOK: query: drop table over1k_part_buck_sort +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@over1k_part_buck_sort +PREHOOK: Output: default@over1k_part_buck_sort +POSTHOOK: query: drop table over1k_part_buck_sort +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@over1k_part_buck_sort +POSTHOOK: Output: default@over1k_part_buck_sort +PREHOOK: query: create table over1k_part_buck( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) into 4 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part_buck +POSTHOOK: query: create table over1k_part_buck( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) into 4 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part_buck +PREHOOK: query: create table over1k_part_buck_sort( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) + sorted by (f) into 4 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part_buck_sort +POSTHOOK: query: create table over1k_part_buck_sort( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) + sorted by (f) into 4 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part_buck_sort PREHOOK: query: -- map-reduce jobs modified by hive.optimize.sort.dynamic.partition optimization explain insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -973,10 +1031,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 32 - rawDataSize 830 - totalSize 862 + numFiles 4 + numRows 16 + rawDataSize 415 + totalSize 431 #### A masked pattern was here #### # Storage Information @@ -1016,10 +1074,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 6 - rawDataSize 156 - totalSize 162 + numFiles 4 + numRows 3 + rawDataSize 78 + totalSize 81 #### A masked pattern was here #### # Storage Information @@ -1059,10 +1117,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 32 - rawDataSize 830 - totalSize 862 + numFiles 4 + numRows 16 + rawDataSize 415 + totalSize 431 #### A masked pattern was here #### # Storage Information @@ -1102,10 +1160,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 6 - rawDataSize 156 - totalSize 162 + numFiles 4 + numRows 3 + rawDataSize 78 + totalSize 81 #### A masked pattern was here #### # Storage Information @@ -1156,7 +1214,7 @@ POSTHOOK: Input: default@over1k_part_buck POSTHOOK: Input: default@over1k_part_buck@t=27 POSTHOOK: Input: default@over1k_part_buck@t=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### -38 +19 PREHOOK: query: select count(*) from over1k_part_buck_sort PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort @@ -1169,7 +1227,7 @@ POSTHOOK: Input: default@over1k_part_buck_sort POSTHOOK: Input: default@over1k_part_buck_sort@t=27 POSTHOOK: Input: default@over1k_part_buck_sort@t=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### -38 +19 PREHOOK: query: -- tests for HIVE-6883 create table over1k_part2( si smallint, diff --git a/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out b/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out index 32e6e92bba3cdf8bd411371ec4e6d1c955c33bdb..6638494cc5b0f99aa4ab627984978034905319cb 100644 --- a/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out +++ b/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out @@ -44,6 +44,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@student_acid #### A masked pattern was here #### 1 NULL +PREHOOK: query: drop table student_acid +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@student_acid +PREHOOK: Output: default@student_acid +POSTHOOK: query: drop table student_acid +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@student_acid +POSTHOOK: Output: default@student_acid +PREHOOK: query: create table student_acid (age int, grade int) + clustered by (age) into 1 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@student_acid +POSTHOOK: query: create table student_acid (age int, grade int) + clustered by (age) into 1 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@student_acid PREHOOK: query: insert into student_acid(grade, age) select 3 g, * from studenttab10k PREHOOK: type: QUERY PREHOOK: Input: default@studenttab10k @@ -63,7 +81,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@student_acid #### A masked pattern was here #### 1 3 -1 NULL +PREHOOK: query: drop table student_acid +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@student_acid +PREHOOK: Output: default@student_acid +POSTHOOK: query: drop table student_acid +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@student_acid +POSTHOOK: Output: default@student_acid +PREHOOK: query: create table student_acid (age int, grade int) + clustered by (age) into 1 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@student_acid +POSTHOOK: query: create table student_acid (age int, grade int) + clustered by (age) into 1 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@student_acid PREHOOK: query: insert into student_acid(grade, age) values(20, 2) PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__2 @@ -74,6 +109,33 @@ POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@student_acid POSTHOOK: Lineage: student_acid.age EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] POSTHOOK: Lineage: student_acid.grade EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from student_acid +PREHOOK: type: QUERY +PREHOOK: Input: default@student_acid +#### A masked pattern was here #### +POSTHOOK: query: select * from student_acid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@student_acid +#### A masked pattern was here #### +2 20 +PREHOOK: query: drop table student_acid +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@student_acid +PREHOOK: Output: default@student_acid +POSTHOOK: query: drop table student_acid +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@student_acid +POSTHOOK: Output: default@student_acid +PREHOOK: query: create table student_acid (age int, grade int) + clustered by (age) into 1 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@student_acid +POSTHOOK: query: create table student_acid (age int, grade int) + clustered by (age) into 1 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@student_acid PREHOOK: query: insert into student_acid(age) values(22) PREHOOK: type: QUERY PREHOOK: Input: default@values__tmp__table__3 @@ -92,9 +154,6 @@ POSTHOOK: query: select * from student_acid POSTHOOK: type: QUERY POSTHOOK: Input: default@student_acid #### A masked pattern was here #### -1 3 -1 NULL -2 20 22 NULL PREHOOK: query: drop table if exists acid_partitioned PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out index 04927b82398bc15462e00e88a8862c883f12e97d..e67f1198f8b44518b539c84505868829659116d4 100644 --- a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out @@ -534,6 +534,64 @@ POSTHOOK: Lineage: over1k_part_buck_sort_orc PARTITION(t=__HIVE_DEFAULT_PARTITIO POSTHOOK: Lineage: over1k_part_buck_sort_orc PARTITION(t=__HIVE_DEFAULT_PARTITION__).f SIMPLE [(over1k_orc)over1k_orc.FieldSchema(name:f, type:float, comment:null), ] POSTHOOK: Lineage: over1k_part_buck_sort_orc PARTITION(t=__HIVE_DEFAULT_PARTITION__).i SIMPLE [(over1k_orc)over1k_orc.FieldSchema(name:i, type:int, comment:null), ] POSTHOOK: Lineage: over1k_part_buck_sort_orc PARTITION(t=__HIVE_DEFAULT_PARTITION__).si SIMPLE [(over1k_orc)over1k_orc.FieldSchema(name:si, type:smallint, comment:null), ] +PREHOOK: query: drop table over1k_part_buck_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@over1k_part_buck_orc +PREHOOK: Output: default@over1k_part_buck_orc +POSTHOOK: query: drop table over1k_part_buck_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@over1k_part_buck_orc +POSTHOOK: Output: default@over1k_part_buck_orc +PREHOOK: query: drop table over1k_part_buck_sort_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@over1k_part_buck_sort_orc +PREHOOK: Output: default@over1k_part_buck_sort_orc +POSTHOOK: query: drop table over1k_part_buck_sort_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@over1k_part_buck_sort_orc +POSTHOOK: Output: default@over1k_part_buck_sort_orc +PREHOOK: query: create table over1k_part_buck_orc( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) into 4 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part_buck_orc +POSTHOOK: query: create table over1k_part_buck_orc( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) into 4 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part_buck_orc +PREHOOK: query: create table over1k_part_buck_sort_orc( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) + sorted by (f) into 4 buckets stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part_buck_sort_orc +POSTHOOK: query: create table over1k_part_buck_sort_orc( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) + sorted by (f) into 4 buckets stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part_buck_sort_orc PREHOOK: query: -- map-reduce jobs modified by hive.optimize.sort.dynamic.partition optimization explain insert into table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si PREHOOK: type: QUERY @@ -1120,10 +1178,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 32 - rawDataSize 640 - totalSize 4644 + numFiles 4 + numRows 16 + rawDataSize 320 + totalSize 2322 #### A masked pattern was here #### # Storage Information @@ -1163,10 +1221,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 6 - rawDataSize 120 - totalSize 2262 + numFiles 4 + numRows 3 + rawDataSize 60 + totalSize 1131 #### A masked pattern was here #### # Storage Information @@ -1206,10 +1264,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 32 - rawDataSize 640 - totalSize 4630 + numFiles 4 + numRows 16 + rawDataSize 320 + totalSize 2315 #### A masked pattern was here #### # Storage Information @@ -1249,10 +1307,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 6 - rawDataSize 120 - totalSize 2262 + numFiles 4 + numRows 3 + rawDataSize 60 + totalSize 1131 #### A masked pattern was here #### # Storage Information @@ -1303,7 +1361,7 @@ POSTHOOK: Input: default@over1k_part_buck_orc POSTHOOK: Input: default@over1k_part_buck_orc@t=27 POSTHOOK: Input: default@over1k_part_buck_orc@t=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### -38 +19 PREHOOK: query: select count(*) from over1k_part_buck_sort_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort_orc @@ -1316,7 +1374,7 @@ POSTHOOK: Input: default@over1k_part_buck_sort_orc POSTHOOK: Input: default@over1k_part_buck_sort_orc@t=27 POSTHOOK: Input: default@over1k_part_buck_sort_orc@t=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### -38 +19 PREHOOK: query: -- tests for HIVE-6883 create table over1k_part2_orc( si smallint, diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out index 8a16645547abf8dc993d0bbc79e241e09fbc2dd8..513f4a74d99c6eecca96f86d5aaeed04744d4c1a 100644 --- a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out @@ -469,6 +469,64 @@ POSTHOOK: Lineage: over1k_part_buck_sort PARTITION(t=__HIVE_DEFAULT_PARTITION__) POSTHOOK: Lineage: over1k_part_buck_sort PARTITION(t=__HIVE_DEFAULT_PARTITION__).f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] POSTHOOK: Lineage: over1k_part_buck_sort PARTITION(t=__HIVE_DEFAULT_PARTITION__).i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] POSTHOOK: Lineage: over1k_part_buck_sort PARTITION(t=__HIVE_DEFAULT_PARTITION__).si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] +PREHOOK: query: drop table over1k_part_buck +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@over1k_part_buck +PREHOOK: Output: default@over1k_part_buck +POSTHOOK: query: drop table over1k_part_buck +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@over1k_part_buck +POSTHOOK: Output: default@over1k_part_buck +PREHOOK: query: drop table over1k_part_buck_sort +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@over1k_part_buck_sort +PREHOOK: Output: default@over1k_part_buck_sort +POSTHOOK: query: drop table over1k_part_buck_sort +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@over1k_part_buck_sort +POSTHOOK: Output: default@over1k_part_buck_sort +PREHOOK: query: create table over1k_part_buck( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) into 4 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part_buck +POSTHOOK: query: create table over1k_part_buck( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) into 4 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part_buck +PREHOOK: query: create table over1k_part_buck_sort( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) + sorted by (f) into 4 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part_buck_sort +POSTHOOK: query: create table over1k_part_buck_sort( + si smallint, + i int, + b bigint, + f float) + partitioned by (t tinyint) + clustered by (si) + sorted by (f) into 4 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part_buck_sort PREHOOK: query: -- map-reduce jobs modified by hive.optimize.sort.dynamic.partition optimization explain insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -1033,10 +1091,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 32 - rawDataSize 830 - totalSize 862 + numFiles 4 + numRows 16 + rawDataSize 415 + totalSize 431 #### A masked pattern was here #### # Storage Information @@ -1076,10 +1134,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 6 - rawDataSize 156 - totalSize 162 + numFiles 4 + numRows 3 + rawDataSize 78 + totalSize 81 #### A masked pattern was here #### # Storage Information @@ -1119,10 +1177,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 32 - rawDataSize 830 - totalSize 862 + numFiles 4 + numRows 16 + rawDataSize 415 + totalSize 431 #### A masked pattern was here #### # Storage Information @@ -1162,10 +1220,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 8 - numRows 6 - rawDataSize 156 - totalSize 162 + numFiles 4 + numRows 3 + rawDataSize 78 + totalSize 81 #### A masked pattern was here #### # Storage Information @@ -1216,7 +1274,7 @@ POSTHOOK: Input: default@over1k_part_buck POSTHOOK: Input: default@over1k_part_buck@t=27 POSTHOOK: Input: default@over1k_part_buck@t=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### -38 +19 PREHOOK: query: select count(*) from over1k_part_buck_sort PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort @@ -1229,7 +1287,7 @@ POSTHOOK: Input: default@over1k_part_buck_sort POSTHOOK: Input: default@over1k_part_buck_sort@t=27 POSTHOOK: Input: default@over1k_part_buck_sort@t=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### -38 +19 PREHOOK: query: -- tests for HIVE-6883 create table over1k_part2( si smallint,