diff --git cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index 7f06e76..4239392 100644 --- cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -32,12 +32,12 @@ import java.util.Set; import jline.ArgumentCompletor; -import jline.ArgumentCompletor.AbstractArgumentDelimiter; -import jline.ArgumentCompletor.ArgumentDelimiter; import jline.Completor; import jline.ConsoleReader; import jline.History; import jline.SimpleCompletor; +import jline.ArgumentCompletor.AbstractArgumentDelimiter; +import jline.ArgumentCompletor.ArgumentDelimiter; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; @@ -398,18 +398,6 @@ public void handle(Signal signal) { int lastRet = 0, ret = 0; String command = ""; - String cmdLine = ""; - for (String cmd : line.split("\n")) { - String[] cmdArr = cmd.split("-- "); - if (cmdArr.length > 2) { - console.printInfo("Cannot have nested comments"); - return -2; - } - - cmdLine += cmdArr[0] + "\n"; - } - - line = cmdLine; for (String oneCmd : line.split(";")) { if (StringUtils.endsWith(oneCmd, "\\")) { @@ -763,9 +751,6 @@ public static int run(String[] args) throws Exception { if (!prefix.equals("")) { prefix += '\n'; } - if (prefix.equals("") && line.startsWith("--")) { - continue; - } if (line.trim().endsWith(";") && !line.trim().endsWith("\\;")) { line = prefix + line; ret = cli.processLine(line, true); diff --git ql/src/test/queries/clientpositive/alter_table_serde.q ql/src/test/queries/clientpositive/alter_table_serde.q index 67b27ac..a80693e 100644 --- ql/src/test/queries/clientpositive/alter_table_serde.q +++ ql/src/test/queries/clientpositive/alter_table_serde.q @@ -10,7 +10,7 @@ describe extended test_table; drop table test_table; --- test partitioned table +--- test partitioned table create table test_table (id int, query string, name string) partitioned by (dt string); alter table test_table add partition (dt = '2011'); diff --git ql/src/test/queries/clientpositive/comments.q ql/src/test/queries/clientpositive/comments.q deleted file mode 100644 index d0c7300..0000000 --- ql/src/test/queries/clientpositive/comments.q +++ /dev/null @@ -1,15 +0,0 @@ -select * from src1; -- this is a comment --- this is a comment; -set hive.auto.join.convert=true; - -select "--" from src1; -- this is a comment -select * from src1 -- this is a comment; --- this is a comment; --- this is a comment; -; -select "ke --- -y" from src1; - -select * from src1; - diff --git ql/src/test/results/clientpositive/add_part_exist.q.out ql/src/test/results/clientpositive/add_part_exist.q.out index b879138..e64cc3c 100644 --- ql/src/test/results/clientpositive/add_part_exist.q.out +++ ql/src/test/results/clientpositive/add_part_exist.q.out @@ -83,9 +83,11 @@ src_thrift srcbucket srcbucket2 srcpart -PREHOOK: query: CREATE DATABASE add_part_test_db +PREHOOK: query: -- Test ALTER TABLE ADD PARTITION in non-default Database +CREATE DATABASE add_part_test_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE add_part_test_db +POSTHOOK: query: -- Test ALTER TABLE ADD PARTITION in non-default Database +CREATE DATABASE add_part_test_db POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE add_part_test_db PREHOOK: type: SWITCHDATABASE diff --git ql/src/test/results/clientpositive/add_partition_no_whitelist.q.out ql/src/test/results/clientpositive/add_partition_no_whitelist.q.out index 9ec2e44..84ee355 100644 --- ql/src/test/results/clientpositive/add_partition_no_whitelist.q.out +++ ql/src/test/results/clientpositive/add_partition_no_whitelist.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE part_nowhitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) +PREHOOK: query: -- Test with no partition name whitelist pattern + +CREATE TABLE part_nowhitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part_nowhitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) +POSTHOOK: query: -- Test with no partition name whitelist pattern + +CREATE TABLE part_nowhitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@part_nowhitelist_test PREHOOK: query: SHOW PARTITIONS part_nowhitelist_test diff --git ql/src/test/results/clientpositive/add_partition_with_whitelist.q.out ql/src/test/results/clientpositive/add_partition_with_whitelist.q.out index b9dc949..75c60ba 100644 --- ql/src/test/results/clientpositive/add_partition_with_whitelist.q.out +++ ql/src/test/results/clientpositive/add_partition_with_whitelist.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE part_whitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) +PREHOOK: query: -- This pattern matches only letters. + +CREATE TABLE part_whitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part_whitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) +POSTHOOK: query: -- This pattern matches only letters. + +CREATE TABLE part_whitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@part_whitelist_test PREHOOK: query: SHOW PARTITIONS part_whitelist_test diff --git ql/src/test/results/clientpositive/alias_casted_column.q.out ql/src/test/results/clientpositive/alias_casted_column.q.out index 4a19a81..89853f9 100644 --- ql/src/test/results/clientpositive/alias_casted_column.q.out +++ ql/src/test/results/clientpositive/alias_casted_column.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: explain select key from (select cast(key as int) from src )t +PREHOOK: query: -- HIVE-2477 Use name of original expression for name of CAST output +explain select key from (select cast(key as int) from src )t PREHOOK: type: QUERY -POSTHOOK: query: explain select key from (select cast(key as int) from src )t +POSTHOOK: query: -- HIVE-2477 Use name of original expression for name of CAST output +explain select key from (select cast(key as int) from src )t POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) diff --git ql/src/test/results/clientpositive/allcolref_in_udf.q.out ql/src/test/results/clientpositive/allcolref_in_udf.q.out index d39b33b..af7fc5e 100644 --- ql/src/test/results/clientpositive/allcolref_in_udf.q.out +++ ql/src/test/results/clientpositive/allcolref_in_udf.q.out @@ -60,12 +60,18 @@ POSTHOOK: Input: default@src 17val_17 ["17","val_17"] 0val_0 ["0","val_0"] 57val_57 ["57","val_57"] -PREHOOK: query: explain +PREHOOK: query: -- The order of columns is decided by row schema of prev operator +-- Like join which has two or more aliases, it's from left most aias to right aliases. + +explain select stack(2, *) as (e1,e2,e3) from ( select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The order of columns is decided by row schema of prev operator +-- Like join which has two or more aliases, it's from left most aias to right aliases. + +explain select stack(2, *) as (e1,e2,e3) from ( select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*) from src a join src b on a.key+1=b.key where a.key < 100) x limit 10 diff --git ql/src/test/results/clientpositive/alter1.q.out ql/src/test/results/clientpositive/alter1.q.out index dc2818e..4e5bbdc 100644 --- ql/src/test/results/clientpositive/alter1.q.out +++ ql/src/test/results/clientpositive/alter1.q.out @@ -174,11 +174,13 @@ POSTHOOK: type: DESCTABLE a int None b int None c string None -PREHOOK: query: DROP TABLE alter1 +PREHOOK: query: -- Cleanup +DROP TABLE alter1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@alter1 PREHOOK: Output: default@alter1 -POSTHOOK: query: DROP TABLE alter1 +POSTHOOK: query: -- Cleanup +DROP TABLE alter1 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@alter1 POSTHOOK: Output: default@alter1 @@ -194,9 +196,13 @@ src_thrift srcbucket srcbucket2 srcpart -PREHOOK: query: CREATE DATABASE alter1_db +PREHOOK: query: -- With non-default Database + +CREATE DATABASE alter1_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE alter1_db +POSTHOOK: query: -- With non-default Database + +CREATE DATABASE alter1_db POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE alter1_db PREHOOK: type: SWITCHDATABASE diff --git ql/src/test/results/clientpositive/alter2.q.out ql/src/test/results/clientpositive/alter2.q.out index 5ebff6b..9923ab8 100644 --- ql/src/test/results/clientpositive/alter2.q.out +++ ql/src/test/results/clientpositive/alter2.q.out @@ -170,11 +170,13 @@ POSTHOOK: query: show partitions alter2 POSTHOOK: type: SHOWPARTITIONS insertdate=2008-01-01 insertdate=2008-01-02 -PREHOOK: query: DROP TABLE alter2 +PREHOOK: query: -- Cleanup +DROP TABLE alter2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@alter2 PREHOOK: Output: default@alter2 -POSTHOOK: query: DROP TABLE alter2 +POSTHOOK: query: -- Cleanup +DROP TABLE alter2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@alter2 POSTHOOK: Output: default@alter2 @@ -190,9 +192,13 @@ src_thrift srcbucket srcbucket2 srcpart -PREHOOK: query: CREATE DATABASE alter2_db +PREHOOK: query: -- Using non-default Database + +CREATE DATABASE alter2_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE alter2_db +POSTHOOK: query: -- Using non-default Database + +CREATE DATABASE alter2_db POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE alter2_db PREHOOK: type: SWITCHDATABASE diff --git ql/src/test/results/clientpositive/alter3.q.out ql/src/test/results/clientpositive/alter3.q.out index 7e19201..9312118 100644 --- ql/src/test/results/clientpositive/alter3.q.out +++ ql/src/test/results/clientpositive/alter3.q.out @@ -152,11 +152,13 @@ pcol1 string None pcol2 string None #### A masked pattern was here #### -PREHOOK: query: DROP TABLE alter3_src +PREHOOK: query: -- Cleanup +DROP TABLE alter3_src PREHOOK: type: DROPTABLE PREHOOK: Input: default@alter3_src PREHOOK: Output: default@alter3_src -POSTHOOK: query: DROP TABLE alter3_src +POSTHOOK: query: -- Cleanup +DROP TABLE alter3_src POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@alter3_src POSTHOOK: Output: default@alter3_src @@ -196,9 +198,13 @@ src_thrift srcbucket srcbucket2 srcpart -PREHOOK: query: CREATE DATABASE alter3_db +PREHOOK: query: -- With non-default Database + +CREATE DATABASE alter3_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE alter3_db +POSTHOOK: query: -- With non-default Database + +CREATE DATABASE alter3_db POSTHOOK: type: CREATEDATABASE POSTHOOK: Lineage: alter3 PARTITION(pcol1=test_part:,pcol2=test_part:).col1 SIMPLE [(alter3_src)alter3_src.FieldSchema(name:col1, type:string, comment:null), ] POSTHOOK: Lineage: alter3_like PARTITION(pcol1=test_part:,pcol2=test_part:).col1 SIMPLE [(alter3_src)alter3_src.FieldSchema(name:col1, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/alter4.q.out ql/src/test/results/clientpositive/alter4.q.out index 4c79150..d618fa6 100644 --- ql/src/test/results/clientpositive/alter4.q.out +++ ql/src/test/results/clientpositive/alter4.q.out @@ -31,11 +31,13 @@ key int None value string None #### A masked pattern was here #### -PREHOOK: query: DROP TABLE set_bucketing_test +PREHOOK: query: -- Cleanup +DROP TABLE set_bucketing_test PREHOOK: type: DROPTABLE PREHOOK: Input: default@set_bucketing_test PREHOOK: Output: default@set_bucketing_test -POSTHOOK: query: DROP TABLE set_bucketing_test +POSTHOOK: query: -- Cleanup +DROP TABLE set_bucketing_test POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@set_bucketing_test POSTHOOK: Output: default@set_bucketing_test @@ -51,9 +53,13 @@ src_thrift srcbucket srcbucket2 srcpart -PREHOOK: query: CREATE DATABASE alter4_db +PREHOOK: query: -- with non-default Database + +CREATE DATABASE alter4_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE alter4_db +POSTHOOK: query: -- with non-default Database + +CREATE DATABASE alter4_db POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE alter4_db PREHOOK: type: SWITCHDATABASE diff --git ql/src/test/results/clientpositive/alter5.q.out ql/src/test/results/clientpositive/alter5.q.out index b8596eb..fc808d3 100644 --- ql/src/test/results/clientpositive/alter5.q.out +++ ql/src/test/results/clientpositive/alter5.q.out @@ -1,11 +1,11 @@ PREHOOK: query: -- - +-- Added to validate the fix for HIVE-2117 - explicit partition location -- create table alter5_src ( col1 string ) stored as textfile PREHOOK: type: CREATETABLE POSTHOOK: query: -- - +-- Added to validate the fix for HIVE-2117 - explicit partition location -- create table alter5_src ( col1 string ) stored as textfile @@ -23,15 +23,15 @@ POSTHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@alter5 PREHOOK: query: -- - - +-- Here's the interesting bit for HIVE-2117 - partition subdir should be +-- named "parta". -- alter table alter5 add partition (dt='a') location 'parta' PREHOOK: type: ALTERTABLE_ADDPARTS PREHOOK: Input: default@alter5 POSTHOOK: query: -- - - +-- Here's the interesting bit for HIVE-2117 - partition subdir should be +-- named "parta". -- alter table alter5 add partition (dt='a') location 'parta' POSTHOOK: type: ALTERTABLE_ADDPARTS @@ -94,11 +94,13 @@ dt string None dt string None #### A masked pattern was here #### -PREHOOK: query: DROP TABLE alter5_src +PREHOOK: query: -- Cleanup +DROP TABLE alter5_src PREHOOK: type: DROPTABLE PREHOOK: Input: default@alter5_src PREHOOK: Output: default@alter5_src -POSTHOOK: query: DROP TABLE alter5_src +POSTHOOK: query: -- Cleanup +DROP TABLE alter5_src POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@alter5_src POSTHOOK: Output: default@alter5_src @@ -125,9 +127,13 @@ src_thrift srcbucket srcbucket2 srcpart -PREHOOK: query: CREATE DATABASE alter5_db +PREHOOK: query: -- With non-default Database + +CREATE DATABASE alter5_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE alter5_db +POSTHOOK: query: -- With non-default Database + +CREATE DATABASE alter5_db POSTHOOK: type: CREATEDATABASE POSTHOOK: Lineage: alter5 PARTITION(dt=a).col1 SIMPLE [(alter5_src)alter5_src.FieldSchema(name:col1, type:string, comment:null), ] PREHOOK: query: USE alter5_db diff --git ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out index dbed989..de8affa 100644 --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out @@ -95,11 +95,15 @@ Bucket Columns: [key] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table tst1 clustered by (key) into 12 buckets +PREHOOK: query: -- Test changing bucket number + +alter table tst1 clustered by (key) into 12 buckets PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: alter table tst1 clustered by (key) into 12 buckets +POSTHOOK: query: -- Test changing bucket number + +alter table tst1 clustered by (key) into 12 buckets POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -203,10 +207,14 @@ Bucket Columns: [key] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table tst1 into 4 buckets +PREHOOK: query: -- Test changing bucket number of (table/partition) + +alter table tst1 into 4 buckets PREHOOK: type: ALTERTABLE_BUCKETNUM PREHOOK: Input: default@tst1 -POSTHOOK: query: alter table tst1 into 4 buckets +POSTHOOK: query: -- Test changing bucket number of (table/partition) + +alter table tst1 into 4 buckets POSTHOOK: type: ALTERTABLE_BUCKETNUM POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -399,11 +407,15 @@ Bucket Columns: [key] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets +PREHOOK: query: -- Test adding sort order + +alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets +POSTHOOK: query: -- Test adding sort order + +alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -455,11 +467,15 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets +PREHOOK: query: -- Test changing sort order + +alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets +POSTHOOK: query: -- Test changing sort order + +alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -511,11 +527,15 @@ Bucket Columns: [key] Sort Columns: [Order(col:value, order:0)] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table tst1 clustered by (value) into 12 buckets +PREHOOK: query: -- Test removing test order + +alter table tst1 clustered by (value) into 12 buckets PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: alter table tst1 clustered by (value) into 12 buckets +POSTHOOK: query: -- Test removing test order + +alter table tst1 clustered by (value) into 12 buckets POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -567,11 +587,15 @@ Bucket Columns: [value] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table tst1 not clustered +PREHOOK: query: -- Test removing buckets + +alter table tst1 not clustered PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: alter table tst1 not clustered +POSTHOOK: query: -- Test removing buckets + +alter table tst1 not clustered POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 diff --git ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2.q.out ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2.q.out index 50cec61..ea900c1 100644 --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2.q.out +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING) +PREHOOK: query: -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata +-- the partition metadata is updated as well. + +CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING) +POSTHOOK: query: -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata +-- the partition metadata is updated as well. + +CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tst1 PREHOOK: query: DESCRIBE FORMATTED tst1 @@ -87,11 +93,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: ALTER TABLE tst1 CLUSTERED BY (key) INTO 8 BUCKETS +PREHOOK: query: -- Test an unbucketed partition gets converted to bucketed +ALTER TABLE tst1 CLUSTERED BY (key) INTO 8 BUCKETS PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: ALTER TABLE tst1 CLUSTERED BY (key) INTO 8 BUCKETS +POSTHOOK: query: -- Test an unbucketed partition gets converted to bucketed +ALTER TABLE tst1 CLUSTERED BY (key) INTO 8 BUCKETS POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -193,11 +201,13 @@ Bucket Columns: [key] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: ALTER TABLE tst1 CLUSTERED BY (key) SORTED BY (key DESC) INTO 8 BUCKETS +PREHOOK: query: -- Test an unsorted partition gets converted to sorted +ALTER TABLE tst1 CLUSTERED BY (key) SORTED BY (key DESC) INTO 8 BUCKETS PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: ALTER TABLE tst1 CLUSTERED BY (key) SORTED BY (key DESC) INTO 8 BUCKETS +POSTHOOK: query: -- Test an unsorted partition gets converted to sorted +ALTER TABLE tst1 CLUSTERED BY (key) SORTED BY (key DESC) INTO 8 BUCKETS POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -307,11 +317,13 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:0)] Storage Desc Params: serialization.format 1 -PREHOOK: query: ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 8 BUCKETS +PREHOOK: query: -- Test changing the bucket columns +ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 8 BUCKETS PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 8 BUCKETS +POSTHOOK: query: -- Test changing the bucket columns +ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 8 BUCKETS POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -429,11 +441,13 @@ Bucket Columns: [value] Sort Columns: [Order(col:key, order:0)] Storage Desc Params: serialization.format 1 -PREHOOK: query: ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 4 BUCKETS +PREHOOK: query: -- Test changing the number of buckets +ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 4 BUCKETS PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 4 BUCKETS +POSTHOOK: query: -- Test changing the number of buckets +ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (key DESC) INTO 4 BUCKETS POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -559,11 +573,13 @@ Bucket Columns: [value] Sort Columns: [Order(col:key, order:0)] Storage Desc Params: serialization.format 1 -PREHOOK: query: ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value DESC) INTO 4 BUCKETS +PREHOOK: query: -- Test changing the sort columns +ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value DESC) INTO 4 BUCKETS PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value DESC) INTO 4 BUCKETS +POSTHOOK: query: -- Test changing the sort columns +ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value DESC) INTO 4 BUCKETS POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -697,11 +713,13 @@ Bucket Columns: [value] Sort Columns: [Order(col:value, order:0)] Storage Desc Params: serialization.format 1 -PREHOOK: query: ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value ASC) INTO 4 BUCKETS +PREHOOK: query: -- Test changing the sort order +ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value ASC) INTO 4 BUCKETS PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value ASC) INTO 4 BUCKETS +POSTHOOK: query: -- Test changing the sort order +ALTER TABLE tst1 CLUSTERED BY (value) SORTED BY (value ASC) INTO 4 BUCKETS POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -843,11 +861,13 @@ Bucket Columns: [value] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: ALTER TABLE tst1 CLUSTERED BY (value) INTO 4 BUCKETS +PREHOOK: query: -- Test a sorted partition gets converted to unsorted +ALTER TABLE tst1 CLUSTERED BY (value) INTO 4 BUCKETS PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: ALTER TABLE tst1 CLUSTERED BY (value) INTO 4 BUCKETS +POSTHOOK: query: -- Test a sorted partition gets converted to unsorted +ALTER TABLE tst1 CLUSTERED BY (value) INTO 4 BUCKETS POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 @@ -997,11 +1017,13 @@ Bucket Columns: [value] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: ALTER TABLE tst1 NOT CLUSTERED +PREHOOK: query: -- Test a bucketed partition gets converted to unbucketed +ALTER TABLE tst1 NOT CLUSTERED PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: ALTER TABLE tst1 NOT CLUSTERED +POSTHOOK: query: -- Test a bucketed partition gets converted to unbucketed +ALTER TABLE tst1 NOT CLUSTERED POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 diff --git ql/src/test/results/clientpositive/alter_partition_clusterby_sortby.q.out ql/src/test/results/clientpositive/alter_partition_clusterby_sortby.q.out index e49e95d..0d785b1 100644 --- ql/src/test/results/clientpositive/alter_partition_clusterby_sortby.q.out +++ ql/src/test/results/clientpositive/alter_partition_clusterby_sortby.q.out @@ -10,11 +10,15 @@ POSTHOOK: query: alter table alter_table_partition_clusterby_sortby add partitio POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Input: default@alter_table_partition_clusterby_sortby POSTHOOK: Output: default@alter_table_partition_clusterby_sortby@c=abc -PREHOOK: query: alter table alter_table_partition_clusterby_sortby partition(c='abc') not sorted +PREHOOK: query: -- Turn off sorting for a partition + +alter table alter_table_partition_clusterby_sortby partition(c='abc') not sorted PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@alter_table_partition_clusterby_sortby PREHOOK: Output: default@alter_table_partition_clusterby_sortby@c=abc -POSTHOOK: query: alter table alter_table_partition_clusterby_sortby partition(c='abc') not sorted +POSTHOOK: query: -- Turn off sorting for a partition + +alter table alter_table_partition_clusterby_sortby partition(c='abc') not sorted POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@alter_table_partition_clusterby_sortby POSTHOOK: Input: default@alter_table_partition_clusterby_sortby@c=abc @@ -53,11 +57,15 @@ Bucket Columns: [a, b] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table alter_table_partition_clusterby_sortby partition(c='abc') clustered by (b) sorted by (b desc) into 4 buckets +PREHOOK: query: -- Modify clustering for a partition + +alter table alter_table_partition_clusterby_sortby partition(c='abc') clustered by (b) sorted by (b desc) into 4 buckets PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@alter_table_partition_clusterby_sortby PREHOOK: Output: default@alter_table_partition_clusterby_sortby@c=abc -POSTHOOK: query: alter table alter_table_partition_clusterby_sortby partition(c='abc') clustered by (b) sorted by (b desc) into 4 buckets +POSTHOOK: query: -- Modify clustering for a partition + +alter table alter_table_partition_clusterby_sortby partition(c='abc') clustered by (b) sorted by (b desc) into 4 buckets POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@alter_table_partition_clusterby_sortby POSTHOOK: Input: default@alter_table_partition_clusterby_sortby@c=abc @@ -96,11 +104,15 @@ Bucket Columns: [b] Sort Columns: [Order(col:b, order:0)] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table alter_table_partition_clusterby_sortby partition(c='abc') not clustered +PREHOOK: query: -- Turn off clustering for a partition + +alter table alter_table_partition_clusterby_sortby partition(c='abc') not clustered PREHOOK: type: ALTERTABLE_CLUSTER_SORT PREHOOK: Input: default@alter_table_partition_clusterby_sortby PREHOOK: Output: default@alter_table_partition_clusterby_sortby@c=abc -POSTHOOK: query: alter table alter_table_partition_clusterby_sortby partition(c='abc') not clustered +POSTHOOK: query: -- Turn off clustering for a partition + +alter table alter_table_partition_clusterby_sortby partition(c='abc') not clustered POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@alter_table_partition_clusterby_sortby POSTHOOK: Input: default@alter_table_partition_clusterby_sortby@c=abc @@ -139,9 +151,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: desc formatted alter_table_partition_clusterby_sortby +PREHOOK: query: -- Table properties should be unchanged + +desc formatted alter_table_partition_clusterby_sortby PREHOOK: type: DESCTABLE -POSTHOOK: query: desc formatted alter_table_partition_clusterby_sortby +POSTHOOK: query: -- Table properties should be unchanged + +desc formatted alter_table_partition_clusterby_sortby POSTHOOK: type: DESCTABLE # col_name data_type comment diff --git ql/src/test/results/clientpositive/alter_partition_coltype.q.out ql/src/test/results/clientpositive/alter_partition_coltype.q.out index f23cc0a..2a8d185 100644 --- ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -1,13 +1,17 @@ -PREHOOK: query: create table alter_coltype(key string, value string) partitioned by (dt string, ts string) +PREHOOK: query: -- create testing table. +create table alter_coltype(key string, value string) partitioned by (dt string, ts string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table alter_coltype(key string, value string) partitioned by (dt string, ts string) +POSTHOOK: query: -- create testing table. +create table alter_coltype(key string, value string) partitioned by (dt string, ts string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@alter_coltype -PREHOOK: query: insert overwrite table alter_coltype partition(dt='100x', ts='6:30pm') select * from src1 +PREHOOK: query: -- insert and create a partition. +insert overwrite table alter_coltype partition(dt='100x', ts='6:30pm') select * from src1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Output: default@alter_coltype@dt=100x/ts=6%3A30pm -POSTHOOK: query: insert overwrite table alter_coltype partition(dt='100x', ts='6:30pm') select * from src1 +POSTHOOK: query: -- insert and create a partition. +insert overwrite table alter_coltype partition(dt='100x', ts='6:30pm') select * from src1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src1 POSTHOOK: Output: default@alter_coltype@dt=100x/ts=6%3A30pm @@ -31,12 +35,14 @@ ts string None dt string None ts string None -PREHOOK: query: select count(*) from alter_coltype where dt = '100x' +PREHOOK: query: -- select with paritition predicate. +select count(*) from alter_coltype where dt = '100x' PREHOOK: type: QUERY PREHOOK: Input: default@alter_coltype PREHOOK: Input: default@alter_coltype@dt=100x/ts=6%3A30pm #### A masked pattern was here #### -POSTHOOK: query: select count(*) from alter_coltype where dt = '100x' +POSTHOOK: query: -- select with paritition predicate. +select count(*) from alter_coltype where dt = '100x' POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_coltype POSTHOOK: Input: default@alter_coltype@dt=100x/ts=6%3A30pm @@ -44,20 +50,24 @@ POSTHOOK: Input: default@alter_coltype@dt=100x/ts=6%3A30pm POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] 25 -PREHOOK: query: alter table alter_coltype partition column (dt int) +PREHOOK: query: -- alter partition key column data type for dt column. +alter table alter_coltype partition column (dt int) PREHOOK: type: null PREHOOK: Input: default@alter_coltype -POSTHOOK: query: alter table alter_coltype partition column (dt int) +POSTHOOK: query: -- alter partition key column data type for dt column. +alter table alter_coltype partition column (dt int) POSTHOOK: type: null POSTHOOK: Input: default@alter_coltype POSTHOOK: Output: default@alter_coltype POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table alter_coltype partition(dt=10, ts='3.0') select * from src1 +PREHOOK: query: -- load a new partition using new data type. +insert overwrite table alter_coltype partition(dt=10, ts='3.0') select * from src1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Output: default@alter_coltype@dt=10/ts=3.0 -POSTHOOK: query: insert overwrite table alter_coltype partition(dt=10, ts='3.0') select * from src1 +POSTHOOK: query: -- load a new partition using new data type. +insert overwrite table alter_coltype partition(dt=10, ts='3.0') select * from src1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src1 POSTHOOK: Output: default@alter_coltype@dt=10/ts=3.0 @@ -65,12 +75,14 @@ POSTHOOK: Lineage: alter_coltype PARTITION(dt=10,ts=3.0).key SIMPLE [(src1)src1. POSTHOOK: Lineage: alter_coltype PARTITION(dt=10,ts=3.0).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(*) from alter_coltype where dt = '100x' +PREHOOK: query: -- make sure the partition predicate still works. +select count(*) from alter_coltype where dt = '100x' PREHOOK: type: QUERY PREHOOK: Input: default@alter_coltype PREHOOK: Input: default@alter_coltype@dt=100x/ts=6%3A30pm #### A masked pattern was here #### -POSTHOOK: query: select count(*) from alter_coltype where dt = '100x' +POSTHOOK: query: -- make sure the partition predicate still works. +select count(*) from alter_coltype where dt = '100x' POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_coltype POSTHOOK: Input: default@alter_coltype@dt=100x/ts=6%3A30pm @@ -218,10 +230,12 @@ POSTHOOK: Lineage: alter_coltype PARTITION(dt=10,ts=3.0).value SIMPLE [(src1)src POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] 0 -PREHOOK: query: alter table alter_coltype partition column (ts double) +PREHOOK: query: -- alter partition key column data type for ts column. +alter table alter_coltype partition column (ts double) PREHOOK: type: null PREHOOK: Input: default@alter_coltype -POSTHOOK: query: alter table alter_coltype partition column (ts double) +POSTHOOK: query: -- alter partition key column data type for ts column. +alter table alter_coltype partition column (ts double) POSTHOOK: type: null POSTHOOK: Input: default@alter_coltype POSTHOOK: Output: default@alter_coltype @@ -229,11 +243,13 @@ POSTHOOK: Lineage: alter_coltype PARTITION(dt=10,ts=3.0).key SIMPLE [(src1)src1. POSTHOOK: Lineage: alter_coltype PARTITION(dt=10,ts=3.0).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table alter_coltype partition(dt='100x', ts=3.0) select * from src1 +PREHOOK: query: -- load a new partition using new data type. +insert overwrite table alter_coltype partition(dt='100x', ts=3.0) select * from src1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Output: default@alter_coltype@dt=100x/ts=3.0 -POSTHOOK: query: insert overwrite table alter_coltype partition(dt='100x', ts=3.0) select * from src1 +POSTHOOK: query: -- load a new partition using new data type. +insert overwrite table alter_coltype partition(dt='100x', ts=3.0) select * from src1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src1 POSTHOOK: Output: default@alter_coltype@dt=100x/ts=3.0 @@ -243,12 +259,14 @@ POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=3.0).key SIMPLE [(src1)src POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=3.0).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(*) from alter_coltype where ts = '6:30pm' +PREHOOK: query: -- validate partition key column predicate can still work. +select count(*) from alter_coltype where ts = '6:30pm' PREHOOK: type: QUERY PREHOOK: Input: default@alter_coltype PREHOOK: Input: default@alter_coltype@dt=100x/ts=6%3A30pm #### A masked pattern was here #### -POSTHOOK: query: select count(*) from alter_coltype where ts = '6:30pm' +POSTHOOK: query: -- validate partition key column predicate can still work. +select count(*) from alter_coltype where ts = '6:30pm' POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_coltype POSTHOOK: Input: default@alter_coltype@dt=100x/ts=6%3A30pm @@ -385,14 +403,18 @@ STAGE PLANS: limit: -1 -PREHOOK: query: select count(*) from alter_coltype where ts = 3.0 and dt=10 +PREHOOK: query: -- validate partition key column predicate on two different partition column data type +-- can still work. +select count(*) from alter_coltype where ts = 3.0 and dt=10 PREHOOK: type: QUERY PREHOOK: Input: default@alter_coltype PREHOOK: Input: default@alter_coltype@dt=10/ts=3.0 PREHOOK: Input: default@alter_coltype@dt=100x/ts=3.0 PREHOOK: Input: default@alter_coltype@dt=100x/ts=6%3A30pm #### A masked pattern was here #### -POSTHOOK: query: select count(*) from alter_coltype where ts = 3.0 and dt=10 +POSTHOOK: query: -- validate partition key column predicate on two different partition column data type +-- can still work. +select count(*) from alter_coltype where ts = 3.0 and dt=10 POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_coltype POSTHOOK: Input: default@alter_coltype@dt=10/ts=3.0 @@ -630,14 +652,16 @@ STAGE PLANS: limit: -1 -PREHOOK: query: select key, value, dt, ts from alter_coltype where dt is not null +PREHOOK: query: -- query where multiple partition values (of different datatypes) are being selected +select key, value, dt, ts from alter_coltype where dt is not null PREHOOK: type: QUERY PREHOOK: Input: default@alter_coltype PREHOOK: Input: default@alter_coltype@dt=10/ts=3.0 PREHOOK: Input: default@alter_coltype@dt=100x/ts=3.0 PREHOOK: Input: default@alter_coltype@dt=100x/ts=6%3A30pm #### A masked pattern was here #### -POSTHOOK: query: select key, value, dt, ts from alter_coltype where dt is not null +POSTHOOK: query: -- query where multiple partition values (of different datatypes) are being selected +select key, value, dt, ts from alter_coltype where dt is not null POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_coltype POSTHOOK: Input: default@alter_coltype@dt=10/ts=3.0 @@ -951,14 +975,16 @@ POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=3.0).value SIMPLE [(src1)s POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: alter_coltype PARTITION(dt=100x,ts=6:30pm).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] 50 -PREHOOK: query: select count(*) from alter_coltype where dt = '100x' or dt = '10' +PREHOOK: query: -- make sure the partition predicate still works. +select count(*) from alter_coltype where dt = '100x' or dt = '10' PREHOOK: type: QUERY PREHOOK: Input: default@alter_coltype PREHOOK: Input: default@alter_coltype@dt=10/ts=3.0 PREHOOK: Input: default@alter_coltype@dt=100x/ts=3.0 PREHOOK: Input: default@alter_coltype@dt=100x/ts=6%3A30pm #### A masked pattern was here #### -POSTHOOK: query: select count(*) from alter_coltype where dt = '100x' or dt = '10' +POSTHOOK: query: -- make sure the partition predicate still works. +select count(*) from alter_coltype where dt = '100x' or dt = '10' POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_coltype POSTHOOK: Input: default@alter_coltype@dt=10/ts=3.0 diff --git ql/src/test/results/clientpositive/alter_partition_protect_mode.q.out ql/src/test/results/clientpositive/alter_partition_protect_mode.q.out index f7e3092..45cfe20 100644 --- ql/src/test/results/clientpositive/alter_partition_protect_mode.q.out +++ ql/src/test/results/clientpositive/alter_partition_protect_mode.q.out @@ -1,12 +1,16 @@ -PREHOOK: query: create table if not exists alter_part_protect_mode(key string, value string ) partitioned by (year string, month string) stored as textfile +PREHOOK: query: -- Create table +create table if not exists alter_part_protect_mode(key string, value string ) partitioned by (year string, month string) stored as textfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table if not exists alter_part_protect_mode(key string, value string ) partitioned by (year string, month string) stored as textfile +POSTHOOK: query: -- Create table +create table if not exists alter_part_protect_mode(key string, value string ) partitioned by (year string, month string) stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@alter_part_protect_mode -PREHOOK: query: load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='10') +PREHOOK: query: -- Load data +load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='10') PREHOOK: type: LOAD PREHOOK: Output: default@alter_part_protect_mode -POSTHOOK: query: load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='10') +POSTHOOK: query: -- Load data +load data local inpath '../data/files/T1.txt' overwrite into table alter_part_protect_mode partition (year='1996', month='10') POSTHOOK: type: LOAD POSTHOOK: Output: default@alter_part_protect_mode POSTHOOK: Output: default@alter_part_protect_mode@year=1996/month=10 @@ -31,12 +35,14 @@ POSTHOOK: query: load data local inpath '../data/files/T1.txt' overwrite into ta POSTHOOK: type: LOAD POSTHOOK: Output: default@alter_part_protect_mode POSTHOOK: Output: default@alter_part_protect_mode@year=1994/month=07 -PREHOOK: query: alter table alter_part_protect_mode partition (year='1996') disable offline +PREHOOK: query: -- offline +alter table alter_part_protect_mode partition (year='1996') disable offline PREHOOK: type: ALTERPARTITION_PROTECTMODE PREHOOK: Input: default@alter_part_protect_mode PREHOOK: Output: default@alter_part_protect_mode@year=1996/month=10 PREHOOK: Output: default@alter_part_protect_mode@year=1996/month=12 -POSTHOOK: query: alter table alter_part_protect_mode partition (year='1996') disable offline +POSTHOOK: query: -- offline +alter table alter_part_protect_mode partition (year='1996') disable offline POSTHOOK: type: ALTERPARTITION_PROTECTMODE POSTHOOK: Input: default@alter_part_protect_mode POSTHOOK: Input: default@alter_part_protect_mode@year=1996/month=10 @@ -101,12 +107,14 @@ POSTHOOK: Input: default@alter_part_protect_mode@year=1995/month=09 7 17 1995 09 8 18 1995 09 8 28 1995 09 -PREHOOK: query: alter table alter_part_protect_mode partition (year='1996') enable no_drop +PREHOOK: query: -- no_drop +alter table alter_part_protect_mode partition (year='1996') enable no_drop PREHOOK: type: ALTERPARTITION_PROTECTMODE PREHOOK: Input: default@alter_part_protect_mode PREHOOK: Output: default@alter_part_protect_mode@year=1996/month=10 PREHOOK: Output: default@alter_part_protect_mode@year=1996/month=12 -POSTHOOK: query: alter table alter_part_protect_mode partition (year='1996') enable no_drop +POSTHOOK: query: -- no_drop +alter table alter_part_protect_mode partition (year='1996') enable no_drop POSTHOOK: type: ALTERPARTITION_PROTECTMODE POSTHOOK: Input: default@alter_part_protect_mode POSTHOOK: Input: default@alter_part_protect_mode@year=1996/month=10 @@ -147,12 +155,14 @@ POSTHOOK: query: alter table alter_part_protect_mode drop partition (year='1994' POSTHOOK: type: ALTERTABLE_DROPPARTS POSTHOOK: Input: default@alter_part_protect_mode POSTHOOK: Output: default@alter_part_protect_mode@year=1994/month=07 -PREHOOK: query: alter table alter_part_protect_mode partition (year='1996') disable no_drop +PREHOOK: query: -- Cleanup +alter table alter_part_protect_mode partition (year='1996') disable no_drop PREHOOK: type: ALTERPARTITION_PROTECTMODE PREHOOK: Input: default@alter_part_protect_mode PREHOOK: Output: default@alter_part_protect_mode@year=1996/month=10 PREHOOK: Output: default@alter_part_protect_mode@year=1996/month=12 -POSTHOOK: query: alter table alter_part_protect_mode partition (year='1996') disable no_drop +POSTHOOK: query: -- Cleanup +alter table alter_part_protect_mode partition (year='1996') disable no_drop POSTHOOK: type: ALTERPARTITION_PROTECTMODE POSTHOOK: Input: default@alter_part_protect_mode POSTHOOK: Input: default@alter_part_protect_mode@year=1996/month=10 diff --git ql/src/test/results/clientpositive/alter_partition_with_whitelist.q.out ql/src/test/results/clientpositive/alter_partition_with_whitelist.q.out index 0e8674d..0982dc1 100644 --- ql/src/test/results/clientpositive/alter_partition_with_whitelist.q.out +++ ql/src/test/results/clientpositive/alter_partition_with_whitelist.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE part_whitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) +PREHOOK: query: -- This pattern matches only letters. + +CREATE TABLE part_whitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part_whitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) +POSTHOOK: query: -- This pattern matches only letters. + +CREATE TABLE part_whitelist_test (key STRING, value STRING) PARTITIONED BY (ds STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@part_whitelist_test PREHOOK: query: SHOW PARTITIONS part_whitelist_test diff --git ql/src/test/results/clientpositive/alter_rename_partition.q.out ql/src/test/results/clientpositive/alter_rename_partition.q.out index 2290dc2..c6fa3f9 100644 --- ql/src/test/results/clientpositive/alter_rename_partition.q.out +++ ql/src/test/results/clientpositive/alter_rename_partition.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: DROP TABLE alter_rename_partition_src +PREHOOK: query: -- Cleanup +DROP TABLE alter_rename_partition_src PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE alter_rename_partition_src +POSTHOOK: query: -- Cleanup +DROP TABLE alter_rename_partition_src POSTHOOK: type: DROPTABLE PREHOOK: query: DROP TABLE alter_rename_partition PREHOOK: type: DROPTABLE @@ -103,11 +105,13 @@ POSTHOOK: Lineage: alter_rename_partition PARTITION(pcol1=old_part1:,pcol2=old_p 4 new_part1: new_part2: 5 new_part1: new_part2: 6 new_part1: new_part2: -PREHOOK: query: DROP TABLE alter_rename_partition_src +PREHOOK: query: -- Cleanup +DROP TABLE alter_rename_partition_src PREHOOK: type: DROPTABLE PREHOOK: Input: default@alter_rename_partition_src PREHOOK: Output: default@alter_rename_partition_src -POSTHOOK: query: DROP TABLE alter_rename_partition_src +POSTHOOK: query: -- Cleanup +DROP TABLE alter_rename_partition_src POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@alter_rename_partition_src POSTHOOK: Output: default@alter_rename_partition_src @@ -134,9 +138,13 @@ src_thrift srcbucket srcbucket2 srcpart -PREHOOK: query: CREATE DATABASE alter_rename_partition_db +PREHOOK: query: -- With non-default Database + +CREATE DATABASE alter_rename_partition_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE alter_rename_partition_db +POSTHOOK: query: -- With non-default Database + +CREATE DATABASE alter_rename_partition_db POSTHOOK: type: CREATEDATABASE POSTHOOK: Lineage: alter_rename_partition PARTITION(pcol1=old_part1:,pcol2=old_part2:).col1 SIMPLE [(alter_rename_partition_src)alter_rename_partition_src.FieldSchema(name:col1, type:string, comment:null), ] PREHOOK: query: USE alter_rename_partition_db diff --git ql/src/test/results/clientpositive/alter_rename_partition_authorization.q.out ql/src/test/results/clientpositive/alter_rename_partition_authorization.q.out index a738797..4262b7c 100644 --- ql/src/test/results/clientpositive/alter_rename_partition_authorization.q.out +++ ql/src/test/results/clientpositive/alter_rename_partition_authorization.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: create table src_auth_tmp as select * from src +PREHOOK: query: -- SORT_BEFORE_DIFF + +create table src_auth_tmp as select * from src PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src -POSTHOOK: query: create table src_auth_tmp as select * from src +POSTHOOK: query: -- SORT_BEFORE_DIFF + +create table src_auth_tmp as select * from src POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: default@src_auth_tmp @@ -24,10 +28,12 @@ PREHOOK: Output: default@src_auth_tmp POSTHOOK: query: grant select on table src_auth_tmp to user hive_test_user POSTHOOK: type: GRANT_PRIVILEGE POSTHOOK: Output: default@src_auth_tmp -PREHOOK: query: grant Create on table authorization_part to user hive_test_user +PREHOOK: query: -- column grant to user +grant Create on table authorization_part to user hive_test_user PREHOOK: type: GRANT_PRIVILEGE PREHOOK: Output: default@authorization_part -POSTHOOK: query: grant Create on table authorization_part to user hive_test_user +POSTHOOK: query: -- column grant to user +grant Create on table authorization_part to user hive_test_user POSTHOOK: type: GRANT_PRIVILEGE POSTHOOK: Output: default@authorization_part PREHOOK: query: grant Update on table authorization_part to user hive_test_user diff --git ql/src/test/results/clientpositive/alter_table_serde.q.out ql/src/test/results/clientpositive/alter_table_serde.q.out index 8ac1d2e..0c80720 100644 --- ql/src/test/results/clientpositive/alter_table_serde.q.out +++ ql/src/test/results/clientpositive/alter_table_serde.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: create table test_table (id int, query string, name string) +PREHOOK: query: -- test table +create table test_table (id int, query string, name string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table test_table (id int, query string, name string) +POSTHOOK: query: -- test table +create table test_table (id int, query string, name string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table PREHOOK: query: describe extended test_table @@ -60,9 +62,11 @@ POSTHOOK: query: drop table test_table POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@test_table POSTHOOK: Output: default@test_table -PREHOOK: query: create table test_table (id int, query string, name string) partitioned by (dt string) +PREHOOK: query: --- test partitioned table +create table test_table (id int, query string, name string) partitioned by (dt string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table test_table (id int, query string, name string) partitioned by (dt string) +POSTHOOK: query: --- test partitioned table +create table test_table (id int, query string, name string) partitioned by (dt string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table PREHOOK: query: alter table test_table add partition (dt = '2011') @@ -139,11 +143,15 @@ dt string None dt string None #### A masked pattern was here #### -PREHOOK: query: alter table test_table partition(dt='2011') set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +PREHOOK: query: -- test partitions + +alter table test_table partition(dt='2011') set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' PREHOOK: type: ALTERPARTITION_SERIALIZER PREHOOK: Input: default@test_table PREHOOK: Output: default@test_table@dt=2011 -POSTHOOK: query: alter table test_table partition(dt='2011') set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +POSTHOOK: query: -- test partitions + +alter table test_table partition(dt='2011') set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' POSTHOOK: type: ALTERPARTITION_SERIALIZER POSTHOOK: Input: default@test_table POSTHOOK: Input: default@test_table@dt=2011 diff --git ql/src/test/results/clientpositive/alter_table_serde2.q.out ql/src/test/results/clientpositive/alter_table_serde2.q.out index 71350f6..b4c7d42 100644 --- ql/src/test/results/clientpositive/alter_table_serde2.q.out +++ ql/src/test/results/clientpositive/alter_table_serde2.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING) +PREHOOK: query: -- Tests that when overwriting a partition in a table after altering the serde properties +-- the partition metadata is updated as well. + +CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING) +POSTHOOK: query: -- Tests that when overwriting a partition in a table after altering the serde properties +-- the partition metadata is updated as well. + +CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tst1 PREHOOK: query: DESCRIBE FORMATTED tst1 @@ -87,11 +93,15 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: ALTER TABLE tst1 SET SERDEPROPERTIES ('field.delim' = ',') +PREHOOK: query: -- Test altering the serde properties + +ALTER TABLE tst1 SET SERDEPROPERTIES ('field.delim' = ',') PREHOOK: type: ALTERTABLE_SERDEPROPERTIES PREHOOK: Input: default@tst1 PREHOOK: Output: default@tst1 -POSTHOOK: query: ALTER TABLE tst1 SET SERDEPROPERTIES ('field.delim' = ',') +POSTHOOK: query: -- Test altering the serde properties + +ALTER TABLE tst1 SET SERDEPROPERTIES ('field.delim' = ',') POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES POSTHOOK: Input: default@tst1 POSTHOOK: Output: default@tst1 diff --git ql/src/test/results/clientpositive/ambiguous_col.q.out ql/src/test/results/clientpositive/ambiguous_col.q.out index cba465e..d0c92b0 100644 --- ql/src/test/results/clientpositive/ambiguous_col.q.out +++ ql/src/test/results/clientpositive/ambiguous_col.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +PREHOOK: query: -- TOK_ALLCOLREF +explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t PREHOOK: type: QUERY -POSTHOOK: query: explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +POSTHOOK: query: -- TOK_ALLCOLREF +explain select * from (select a.key, a.* from (select * from src) a join (select * from src1) b on (a.key = b.key)) t POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) @@ -82,9 +84,11 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +PREHOOK: query: -- DOT +explain select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t PREHOOK: type: QUERY -POSTHOOK: query: explain select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +POSTHOOK: query: -- DOT +explain select * from (select a.key, a.`[k].*` from (select * from src) a join (select * from src1) b on (a.key = b.key)) t POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) `[k].*`))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) @@ -160,9 +164,11 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +PREHOOK: query: -- EXPRESSION +explain select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t PREHOOK: type: QUERY -POSTHOOK: query: explain select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t +POSTHOOK: query: -- EXPRESSION +explain select * from (select a.key, a.key from (select * from src) a join (select * from src1) b on (a.key = b.key)) t POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) diff --git ql/src/test/results/clientpositive/archive_multi.q.out ql/src/test/results/clientpositive/archive_multi.q.out index 601b7b4..7a5838f 100644 --- ql/src/test/results/clientpositive/archive_multi.q.out +++ ql/src/test/results/clientpositive/archive_multi.q.out @@ -104,14 +104,18 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpar POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 PREHOOK: type: QUERY PREHOOK: Input: default@tstsrcpart PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 POSTHOOK: type: QUERY POSTHOOK: Input: default@tstsrcpart diff --git ql/src/test/results/clientpositive/authorization_1.q.out ql/src/test/results/clientpositive/authorization_1.q.out index 2e0691a..0970ef6 100644 --- ql/src/test/results/clientpositive/authorization_1.q.out +++ ql/src/test/results/clientpositive/authorization_1.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: create table src_autho_test as select * from src +PREHOOK: query: -- SORT_BEFORE_DIFF + +create table src_autho_test as select * from src PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src -POSTHOOK: query: create table src_autho_test as select * from src +POSTHOOK: query: -- SORT_BEFORE_DIFF + +create table src_autho_test as select * from src POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: default@src_autho_test @@ -416,9 +420,11 @@ PREHOOK: Output: default@src_autho_test POSTHOOK: query: revoke select on table src_autho_test from role src_role POSTHOOK: type: REVOKE_PRIVILEGE POSTHOOK: Output: default@src_autho_test -PREHOOK: query: drop role src_role +PREHOOK: query: -- drop role +drop role src_role PREHOOK: type: DROPROLE -POSTHOOK: query: drop role src_role +POSTHOOK: query: -- drop role +drop role src_role POSTHOOK: type: DROPROLE PREHOOK: query: drop table src_autho_test PREHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/authorization_2.q.out ql/src/test/results/clientpositive/authorization_2.q.out index 60d7f7c..c934a2a 100644 --- ql/src/test/results/clientpositive/authorization_2.q.out +++ ql/src/test/results/clientpositive/authorization_2.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table authorization_part (key int, value string) partitioned by (ds string) +PREHOOK: query: -- SORT_BEFORE_DIFF + +create table authorization_part (key int, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table authorization_part (key int, value string) partitioned by (ds string) +POSTHOOK: query: -- SORT_BEFORE_DIFF + +create table authorization_part (key int, value string) partitioned by (ds string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@authorization_part PREHOOK: query: create table src_auth_tmp as select * from src @@ -18,10 +22,12 @@ POSTHOOK: query: ALTER TABLE authorization_part SET TBLPROPERTIES ("PARTITION_LE POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@authorization_part POSTHOOK: Output: default@authorization_part -PREHOOK: query: grant Create on table authorization_part to user hive_test_user +PREHOOK: query: -- column grant to user +grant Create on table authorization_part to user hive_test_user PREHOOK: type: GRANT_PRIVILEGE PREHOOK: Output: default@authorization_part -POSTHOOK: query: grant Create on table authorization_part to user hive_test_user +POSTHOOK: query: -- column grant to user +grant Create on table authorization_part to user hive_test_user POSTHOOK: type: GRANT_PRIVILEGE POSTHOOK: Output: default@authorization_part PREHOOK: query: grant Update on table authorization_part to user hive_test_user @@ -281,9 +287,11 @@ POSTHOOK: Input: default@authorization_part POSTHOOK: Output: default@authorization_part@ds=2010 POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).key EXPRESSION [(src_auth_tmp)src_auth_tmp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).value SIMPLE [(src_auth_tmp)src_auth_tmp.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: show grant user hive_test_user on table authorization_part +PREHOOK: query: -- table grant to user +show grant user hive_test_user on table authorization_part PREHOOK: type: SHOW_GRANT -POSTHOOK: query: show grant user hive_test_user on table authorization_part +POSTHOOK: query: -- table grant to user +show grant user hive_test_user on table authorization_part POSTHOOK: type: SHOW_GRANT POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).key EXPRESSION [(src_auth_tmp)src_auth_tmp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).value SIMPLE [(src_auth_tmp)src_auth_tmp.FieldSchema(name:value, type:string, comment:null), ] @@ -678,9 +686,13 @@ POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).key EXPRESSION [(src_au POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).value SIMPLE [(src_auth_tmp)src_auth_tmp.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).key EXPRESSION [(src_auth_tmp)src_auth_tmp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).value SIMPLE [(src_auth_tmp)src_auth_tmp.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: show grant group hive_test_group1 on table authorization_part +PREHOOK: query: -- column grant to group + +show grant group hive_test_group1 on table authorization_part PREHOOK: type: SHOW_GRANT -POSTHOOK: query: show grant group hive_test_group1 on table authorization_part +POSTHOOK: query: -- column grant to group + +show grant group hive_test_group1 on table authorization_part POSTHOOK: type: SHOW_GRANT POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).key EXPRESSION [(src_auth_tmp)src_auth_tmp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).value SIMPLE [(src_auth_tmp)src_auth_tmp.FieldSchema(name:value, type:string, comment:null), ] @@ -930,9 +942,11 @@ POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).key EXPRESSION [(src_au POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).value SIMPLE [(src_auth_tmp)src_auth_tmp.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).key EXPRESSION [(src_auth_tmp)src_auth_tmp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).value SIMPLE [(src_auth_tmp)src_auth_tmp.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: show grant group hive_test_group1 on table authorization_part +PREHOOK: query: -- table grant to group +show grant group hive_test_group1 on table authorization_part PREHOOK: type: SHOW_GRANT -POSTHOOK: query: show grant group hive_test_group1 on table authorization_part +POSTHOOK: query: -- table grant to group +show grant group hive_test_group1 on table authorization_part POSTHOOK: type: SHOW_GRANT POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).key EXPRESSION [(src_auth_tmp)src_auth_tmp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: authorization_part PARTITION(ds=2010).value SIMPLE [(src_auth_tmp)src_auth_tmp.FieldSchema(name:value, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/authorization_3.q.out ql/src/test/results/clientpositive/authorization_3.q.out index 5441ff3..4555892 100644 --- ql/src/test/results/clientpositive/authorization_3.q.out +++ ql/src/test/results/clientpositive/authorization_3.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: create table src_autho_test as select * from src +PREHOOK: query: -- SORT_BEFORE_DIFF + +create table src_autho_test as select * from src PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src -POSTHOOK: query: create table src_autho_test as select * from src +POSTHOOK: query: -- SORT_BEFORE_DIFF + +create table src_autho_test as select * from src POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: default@src_autho_test diff --git ql/src/test/results/clientpositive/authorization_4.q.out ql/src/test/results/clientpositive/authorization_4.q.out index 9dded39..f80b517 100644 --- ql/src/test/results/clientpositive/authorization_4.q.out +++ ql/src/test/results/clientpositive/authorization_4.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: create table src_autho_test as select * from src +PREHOOK: query: -- SORT_BEFORE_DIFF + +create table src_autho_test as select * from src PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src -POSTHOOK: query: create table src_autho_test as select * from src +POSTHOOK: query: -- SORT_BEFORE_DIFF + +create table src_autho_test as select * from src POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: default@src_autho_test diff --git ql/src/test/results/clientpositive/authorization_5.q.out ql/src/test/results/clientpositive/authorization_5.q.out index ed6dd89..11effa7 100644 --- ql/src/test/results/clientpositive/authorization_5.q.out +++ ql/src/test/results/clientpositive/authorization_5.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database' +PREHOOK: query: -- SORT_BEFORE_DIFF + +CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database' PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database' +POSTHOOK: query: -- SORT_BEFORE_DIFF + +CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database' POSTHOOK: type: CREATEDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES diff --git ql/src/test/results/clientpositive/authorization_6.q.out ql/src/test/results/clientpositive/authorization_6.q.out index 28c94da..b8483ca 100644 --- ql/src/test/results/clientpositive/authorization_6.q.out +++ ql/src/test/results/clientpositive/authorization_6.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: create table src_auth_tmp as select * from src +PREHOOK: query: -- SORT_BEFORE_DIFF + +create table src_auth_tmp as select * from src PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src -POSTHOOK: query: create table src_auth_tmp as select * from src +POSTHOOK: query: -- SORT_BEFORE_DIFF + +create table src_auth_tmp as select * from src POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: default@src_auth_tmp @@ -24,10 +28,12 @@ PREHOOK: Output: default@src_auth_tmp POSTHOOK: query: grant select on table src_auth_tmp to user hive_test_user POSTHOOK: type: GRANT_PRIVILEGE POSTHOOK: Output: default@src_auth_tmp -PREHOOK: query: grant Create on table authorization_part to user hive_test_user +PREHOOK: query: -- column grant to user +grant Create on table authorization_part to user hive_test_user PREHOOK: type: GRANT_PRIVILEGE PREHOOK: Output: default@authorization_part -POSTHOOK: query: grant Create on table authorization_part to user hive_test_user +POSTHOOK: query: -- column grant to user +grant Create on table authorization_part to user hive_test_user POSTHOOK: type: GRANT_PRIVILEGE POSTHOOK: Output: default@authorization_part PREHOOK: query: grant Update on table authorization_part to user hive_test_user diff --git ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out index 4b56199..cf97745 100644 --- ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out +++ ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 PREHOOK: query: explain diff --git ql/src/test/results/clientpositive/auto_join25.q.out ql/src/test/results/clientpositive/auto_join25.q.out index 9bb6053..2ccded2 100644 --- ql/src/test/results/clientpositive/auto_join25.q.out +++ ql/src/test/results/clientpositive/auto_join25.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: query: -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: query: -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) diff --git ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out index 2c3a828..ac89111 100644 --- ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out @@ -34,12 +34,14 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -129,7 +131,8 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 22 -PREHOOK: query: explain +PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select key, count(*) from @@ -139,7 +142,8 @@ select count(*) from group by key ) subq2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select key, count(*) from @@ -293,7 +297,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 6 -PREHOOK: query: explain +PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -311,7 +317,9 @@ join on src1.key = src2.key order by src1.key, src1.cnt1, src2.cnt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -621,14 +629,18 @@ POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:stri 5 9 9 8 1 1 9 1 1 -PREHOOK: query: explain +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -733,7 +745,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain select count(*) from ( select * from @@ -745,7 +759,9 @@ select count(*) from join tbl2 b on subq2.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain select count(*) from ( select * from @@ -865,7 +881,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain select count(*) from ( select * from @@ -884,7 +902,9 @@ select count(*) from ) subq4 on subq2.key = subq4.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain select count(*) from ( select * from @@ -1023,14 +1043,20 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -1135,14 +1161,18 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side +-- join should be performed +explain select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side +-- join should be performed +explain select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join @@ -1278,12 +1308,16 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 22 -PREHOOK: query: explain +PREHOOK: query: -- One of the tables is a sub-query and the other is not. +-- It should be converted to a sort-merge join. +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- One of the tables is a sub-query and the other is not. +-- It should be converted to a sort-merge join. +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key @@ -1382,7 +1416,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1392,7 +1428,9 @@ select count(*) from (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 on (subq1.key = subq3.key) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1509,7 +1547,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 56 -PREHOOK: query: explain +PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -1522,7 +1562,9 @@ select count(*) from ( join tbl2 b on subq2.key = b.key) a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -1663,14 +1705,18 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- The join is followed by a multi-table insert. It should be converted to +-- a sort-merge join +explain from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, val1, val2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The join is followed by a multi-table insert. It should be converted to +-- a sort-merge join +explain from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -2009,14 +2055,18 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. +-- It should be converted to a sort-merge join +explain from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 insert overwrite table dest1 select key, val1 insert overwrite table dest2 select key, count(*) group by key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. +-- It should be converted to a sort-merge join +explain from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out index 4116d0f..53c7c21 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket + +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket + +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') @@ -71,9 +75,11 @@ PREHOOK: Output: default@bucket_big@ds=2008-04-09 POSTHOOK: query: load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') POSTHOOK: type: LOAD POSTHOOK: Output: default@bucket_big@ds=2008-04-09 -PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out index 8fbe341..da375f6 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out @@ -34,7 +34,8 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- One of the subqueries contains a union, so it should not be converted to a sort-merge join. +explain select count(*) from ( select * from @@ -46,7 +47,8 @@ select count(*) from (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- One of the subqueries contains a union, so it should not be converted to a sort-merge join. +explain select count(*) from ( select * from @@ -262,14 +264,16 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 40 -PREHOOK: query: explain +PREHOOK: query: -- One of the subqueries contains a groupby, so it should not be converted to a sort-merge join. +explain select count(*) from (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- One of the subqueries contains a groupby, so it should not be converted to a sort-merge join. +explain select count(*) from (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 join diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out index 26194e4..b5b0e81 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 1 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 1 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') @@ -59,9 +61,11 @@ PREHOOK: Output: default@bucket_big@ds=2008-04-09 POSTHOOK: query: load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') POSTHOOK: type: LOAD POSTHOOK: Output: default@bucket_big@ds=2008-04-09 -PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: query: -- Since the leftmost table is assumed as the big table, arrange the tables in the join accordingly +explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: query: -- Since the leftmost table is assumed as the big table, arrange the tables in the join accordingly +explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_big) a) (TOK_TABREF (TOK_TABNAME bucket_small) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out index a6d7037..169b159 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 2 part, 2 bucket & big 1 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 2 part, 2 bucket & big 1 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') @@ -59,9 +61,11 @@ PREHOOK: Output: default@bucket_big@ds=2008-04-08 POSTHOOK: query: load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') POSTHOOK: type: LOAD POSTHOOK: Output: default@bucket_big@ds=2008-04-08 -PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out index 49262ea..d69be67 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 2 part, 4 bucket & big 1 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 2 part, 4 bucket & big 1 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') @@ -71,9 +73,11 @@ PREHOOK: Output: default@bucket_big@ds=2008-04-08 POSTHOOK: query: load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') POSTHOOK: type: LOAD POSTHOOK: Output: default@bucket_big@ds=2008-04-08 -PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out index 16894b6..3edd6c4 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small no part, 4 bucket & big no part, 2 bucket +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small no part, 4 bucket & big no part, 2 bucket +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small @@ -44,9 +46,11 @@ PREHOOK: Output: default@bucket_big POSTHOOK: query: load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big POSTHOOK: type: LOAD POSTHOOK: Output: default@bucket_big -PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out index 840a931..36a9c74 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out @@ -70,9 +70,11 @@ POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value +PREHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on a different key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value +POSTHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on a different key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value POSTHOOK: type: QUERY POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -207,9 +209,11 @@ POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 2654 -PREHOOK: query: explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key +PREHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key +POSTHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key POSTHOOK: type: QUERY POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -581,9 +585,11 @@ POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 2654 -PREHOOK: query: explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key +PREHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key PREHOOK: type: QUERY -POSTHOOK: query: explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key +POSTHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key POSTHOOK: type: QUERY POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -679,9 +685,11 @@ POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 2654 -PREHOOK: query: explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value +PREHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on a different key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value +POSTHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on a different key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value POSTHOOK: type: QUERY POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out index c979d1b..a4da4a5 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 2 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 2 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') @@ -84,9 +86,11 @@ PREHOOK: Output: default@bucket_big@ds=2008-04-09 POSTHOOK: query: load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') POSTHOOK: type: LOAD POSTHOOK: Output: default@bucket_big@ds=2008-04-09 -PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out index 3b17991..a63a131 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 2 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 2 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') @@ -84,9 +86,11 @@ PREHOOK: Output: default@bucket_big@ds=2008-04-09 POSTHOOK: query: load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') POSTHOOK: type: LOAD POSTHOOK: Output: default@bucket_big@ds=2008-04-09 -PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out index e105db6..80aaff8 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out @@ -34,12 +34,14 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -129,7 +131,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 22 -PREHOOK: query: explain +PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +-- Add a order by at the end to make the results deterministic. +explain select key, count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -137,7 +141,9 @@ select key, count(*) from group by key order by key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +-- Add a order by at the end to make the results deterministic. +explain select key, count(*) from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -285,7 +291,8 @@ POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:stri 5 9 8 1 9 1 -PREHOOK: query: explain +PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select key, count(*) from @@ -295,7 +302,8 @@ select count(*) from group by key ) subq2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select key, count(*) from @@ -449,7 +457,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 6 -PREHOOK: query: explain +PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -467,7 +477,9 @@ join on src1.key = src2.key order by src1.key, src1.cnt1, src2.cnt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from @@ -889,14 +901,18 @@ POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:stri 5 9 9 8 1 1 9 1 1 -PREHOOK: query: explain +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1001,7 +1017,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain select count(*) from ( select * from @@ -1013,7 +1031,9 @@ select count(*) from join tbl2 b on subq2.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain select count(*) from ( select * from @@ -1133,7 +1153,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain select count(*) from ( select * from @@ -1152,7 +1174,9 @@ select count(*) from ) subq4 on subq2.key = subq4.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain select count(*) from ( select * from @@ -1291,14 +1315,20 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -1403,14 +1433,18 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized mapside +-- join should be performed +explain select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized mapside +-- join should be performed +explain select count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join @@ -1551,12 +1585,16 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 22 -PREHOOK: query: explain +PREHOOK: query: -- The left table is a sub-query and the right table is not. +-- It should be converted to a sort-merge join. +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The left table is a sub-query and the right table is not. +-- It should be converted to a sort-merge join. +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key @@ -1655,13 +1693,17 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- The right table is a sub-query and the left table is not. +-- It should be converted to a sort-merge join. +explain select count(*) from tbl1 a join (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 on a.key = subq1.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The right table is a sub-query and the left table is not. +-- It should be converted to a sort-merge join. +explain select count(*) from tbl1 a join (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 @@ -1754,7 +1796,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1764,7 +1808,9 @@ select count(*) from (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 on (subq1.key = subq3.key) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1881,7 +1927,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 56 -PREHOOK: query: explain +PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -1894,7 +1942,9 @@ select count(*) from ( join tbl2 b on subq2.key = b.key) a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain select count(*) from ( select subq2.key as key, subq2.value as value1, b.value as value2 from ( diff --git ql/src/test/results/clientpositive/autogen_colalias.q.out ql/src/test/results/clientpositive/autogen_colalias.q.out index e6a19af..889494d 100644 --- ql/src/test/results/clientpositive/autogen_colalias.q.out +++ ql/src/test/results/clientpositive/autogen_colalias.q.out @@ -88,7 +88,9 @@ POSTHOOK: type: DESCTABLE # col_name data_type comment key string None -PREHOOK: query: DROP TEMPORARY FUNCTION test_max +PREHOOK: query: -- Drop the temporary function at the end till HIVE-3160 gets fixed +DROP TEMPORARY FUNCTION test_max PREHOOK: type: DROPFUNCTION -POSTHOOK: query: DROP TEMPORARY FUNCTION test_max +POSTHOOK: query: -- Drop the temporary function at the end till HIVE-3160 gets fixed +DROP TEMPORARY FUNCTION test_max POSTHOOK: type: DROPFUNCTION diff --git ql/src/test/results/clientpositive/avro_change_schema.q.out ql/src/test/results/clientpositive/avro_change_schema.q.out index 56d1276..87edc83 100644 --- ql/src/test/results/clientpositive/avro_change_schema.q.out +++ ql/src/test/results/clientpositive/avro_change_schema.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: CREATE TABLE avro2 +PREHOOK: query: -- verify that we can update the table properties +CREATE TABLE avro2 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS @@ -12,7 +13,8 @@ TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", { "name":"string2", "type":"string" } ] }') PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE avro2 +POSTHOOK: query: -- verify that we can update the table properties +CREATE TABLE avro2 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS diff --git ql/src/test/results/clientpositive/avro_compression_enabled.q.out ql/src/test/results/clientpositive/avro_compression_enabled.q.out index 6a4c508..eefcfe7 100644 --- ql/src/test/results/clientpositive/avro_compression_enabled.q.out +++ ql/src/test/results/clientpositive/avro_compression_enabled.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: CREATE TABLE doctors4 +PREHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS @@ -33,7 +35,9 @@ TBLPROPERTIES ('avro.schema.literal'='{ ] }') PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE doctors4 +POSTHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS diff --git ql/src/test/results/clientpositive/avro_evolved_schemas.q.out ql/src/test/results/clientpositive/avro_evolved_schemas.q.out index 39d6234..40d700d 100644 --- ql/src/test/results/clientpositive/avro_evolved_schemas.q.out +++ ql/src/test/results/clientpositive/avro_evolved_schemas.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: CREATE TABLE doctors_with_new_field +PREHOOK: query: -- verify that new fields in schema get propagated to table scans +CREATE TABLE doctors_with_new_field ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS @@ -33,7 +34,8 @@ TBLPROPERTIES ('avro.schema.literal'='{ ] }') PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE doctors_with_new_field +POSTHOOK: query: -- verify that new fields in schema get propagated to table scans +CREATE TABLE doctors_with_new_field ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS diff --git ql/src/test/results/clientpositive/avro_joins.q.out ql/src/test/results/clientpositive/avro_joins.q.out index 9cd5702..db98485 100644 --- ql/src/test/results/clientpositive/avro_joins.q.out +++ ql/src/test/results/clientpositive/avro_joins.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: CREATE TABLE doctors4 +PREHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS @@ -33,7 +35,9 @@ TBLPROPERTIES ('avro.schema.literal'='{ ] }') PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE doctors4 +POSTHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS diff --git ql/src/test/results/clientpositive/avro_nullable_fields.q.out ql/src/test/results/clientpositive/avro_nullable_fields.q.out index e23e088..a0a89c6 100644 --- ql/src/test/results/clientpositive/avro_nullable_fields.q.out +++ ql/src/test/results/clientpositive/avro_nullable_fields.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: CREATE TABLE test_serializer(string1 STRING, +PREHOOK: query: -- Verify that nullable fields properly work +CREATE TABLE test_serializer(string1 STRING, int1 INT, tinyint1 TINYINT, smallint1 SMALLINT, @@ -16,7 +17,8 @@ PREHOOK: query: CREATE TABLE test_serializer(string1 STRING, ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n' STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_serializer(string1 STRING, +POSTHOOK: query: -- Verify that nullable fields properly work +CREATE TABLE test_serializer(string1 STRING, int1 INT, tinyint1 TINYINT, smallint1 SMALLINT, diff --git ql/src/test/results/clientpositive/avro_sanity_test.q.out ql/src/test/results/clientpositive/avro_sanity_test.q.out index 2e1e074..e625a4c 100644 --- ql/src/test/results/clientpositive/avro_sanity_test.q.out +++ ql/src/test/results/clientpositive/avro_sanity_test.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: CREATE TABLE doctors +PREHOOK: query: -- verify that we can actually read avro files +CREATE TABLE doctors ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS @@ -27,7 +28,8 @@ TBLPROPERTIES ('avro.schema.literal'='{ ] }') PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE doctors +POSTHOOK: query: -- verify that we can actually read avro files +CREATE TABLE doctors ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS diff --git ql/src/test/results/clientpositive/avro_schema_error_message.q.out ql/src/test/results/clientpositive/avro_schema_error_message.q.out index f12dfb5..b699181 100644 --- ql/src/test/results/clientpositive/avro_schema_error_message.q.out +++ ql/src/test/results/clientpositive/avro_schema_error_message.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: CREATE TABLE avro_with_no_schema +PREHOOK: query: -- verify we get the sentinel schema if we don't provide one + +CREATE TABLE avro_with_no_schema ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE avro_with_no_schema +POSTHOOK: query: -- verify we get the sentinel schema if we don't provide one + +CREATE TABLE avro_with_no_schema ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS diff --git ql/src/test/results/clientpositive/ba_table1.q.out ql/src/test/results/clientpositive/ba_table1.q.out index 33c626b..2b98c59 100644 --- ql/src/test/results/clientpositive/ba_table1.q.out +++ ql/src/test/results/clientpositive/ba_table1.q.out @@ -2,9 +2,13 @@ PREHOOK: query: drop table ba_test PREHOOK: type: DROPTABLE POSTHOOK: query: drop table ba_test POSTHOOK: type: DROPTABLE -PREHOOK: query: create table ba_test (ba_key binary, ba_val binary) +PREHOOK: query: -- This query tests a) binary type works correctly in grammar b) string can be cast into binary c) binary can be stored in a table d) binary data can be loaded back again and queried d) order-by on a binary key + +create table ba_test (ba_key binary, ba_val binary) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table ba_test (ba_key binary, ba_val binary) +POSTHOOK: query: -- This query tests a) binary type works correctly in grammar b) string can be cast into binary c) binary can be stored in a table d) binary data can be loaded back again and queried d) order-by on a binary key + +create table ba_test (ba_key binary, ba_val binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@ba_test PREHOOK: query: describe extended ba_test diff --git ql/src/test/results/clientpositive/ba_table2.q.out ql/src/test/results/clientpositive/ba_table2.q.out index bbb473e..8942cb9 100644 --- ql/src/test/results/clientpositive/ba_table2.q.out +++ ql/src/test/results/clientpositive/ba_table2.q.out @@ -2,9 +2,13 @@ PREHOOK: query: drop table ba_test PREHOOK: type: DROPTABLE POSTHOOK: query: drop table ba_test POSTHOOK: type: DROPTABLE -PREHOOK: query: create table ba_test (ba_key binary, ba_val binary) +PREHOOK: query: -- All the test in ba_test1.q + using LazyBinarySerde instead of LazySimpleSerde + +create table ba_test (ba_key binary, ba_val binary) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table ba_test (ba_key binary, ba_val binary) +POSTHOOK: query: -- All the test in ba_test1.q + using LazyBinarySerde instead of LazySimpleSerde + +create table ba_test (ba_key binary, ba_val binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@ba_test PREHOOK: query: alter table ba_test set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' diff --git ql/src/test/results/clientpositive/ba_table3.q.out ql/src/test/results/clientpositive/ba_table3.q.out index 2a0d6ea..eaa8051 100644 --- ql/src/test/results/clientpositive/ba_table3.q.out +++ ql/src/test/results/clientpositive/ba_table3.q.out @@ -2,9 +2,13 @@ PREHOOK: query: drop table ba_test PREHOOK: type: DROPTABLE POSTHOOK: query: drop table ba_test POSTHOOK: type: DROPTABLE -PREHOOK: query: create table ba_test (ba_key binary, ba_val binary) +PREHOOK: query: -- All the tests of ba_table1.q + test for a group-by and aggregation on a binary key. + +create table ba_test (ba_key binary, ba_val binary) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table ba_test (ba_key binary, ba_val binary) +POSTHOOK: query: -- All the tests of ba_table1.q + test for a group-by and aggregation on a binary key. + +create table ba_test (ba_key binary, ba_val binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@ba_test PREHOOK: query: from src insert overwrite table ba_test select cast (src.key as binary), cast (src.value as binary) diff --git ql/src/test/results/clientpositive/ba_table_udfs.q.out ql/src/test/results/clientpositive/ba_table_udfs.q.out index a54d6fe..b97c33d 100644 --- ql/src/test/results/clientpositive/ba_table_udfs.q.out +++ ql/src/test/results/clientpositive/ba_table_udfs.q.out @@ -2,7 +2,9 @@ PREHOOK: query: USE default PREHOOK: type: SWITCHDATABASE POSTHOOK: query: USE default POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: SELECT +PREHOOK: query: -- this query tests all the udfs provided to work with binary types + +SELECT key, value, LENGTH(CAST(src.key AS BINARY)), @@ -17,7 +19,9 @@ LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- this query tests all the udfs provided to work with binary types + +SELECT key, value, LENGTH(CAST(src.key AS BINARY)), diff --git ql/src/test/results/clientpositive/ba_table_union.q.out ql/src/test/results/clientpositive/ba_table_union.q.out index cfc0358..85ea093 100644 --- ql/src/test/results/clientpositive/ba_table_union.q.out +++ ql/src/test/results/clientpositive/ba_table_union.q.out @@ -2,9 +2,11 @@ PREHOOK: query: drop table ba_test PREHOOK: type: DROPTABLE POSTHOOK: query: drop table ba_test POSTHOOK: type: DROPTABLE -PREHOOK: query: create table ba_test (ba_key binary, ba_val binary) +PREHOOK: query: -- this query tests ba_table1.q + nested queries with multiple operations on binary data types + union on binary types +create table ba_test (ba_key binary, ba_val binary) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table ba_test (ba_key binary, ba_val binary) +POSTHOOK: query: -- this query tests ba_table1.q + nested queries with multiple operations on binary data types + union on binary types +create table ba_test (ba_key binary, ba_val binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@ba_test PREHOOK: query: describe extended ba_test diff --git ql/src/test/results/clientpositive/binary_output_format.q.out ql/src/test/results/clientpositive/binary_output_format.q.out index ffc0d31..46122da 100644 --- ql/src/test/results/clientpositive/binary_output_format.q.out +++ ql/src/test/results/clientpositive/binary_output_format.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: CREATE TABLE dest1(mydata STRING) +PREHOOK: query: -- Create a table with binary output format +CREATE TABLE dest1(mydata STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( @@ -8,7 +9,8 @@ STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveBinaryOutputFormat' PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(mydata STRING) +POSTHOOK: query: -- Create a table with binary output format +CREATE TABLE dest1(mydata STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( @@ -19,7 +21,8 @@ STORED AS OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveBinaryOutputFormat' POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Insert into that table using transform +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT TRANSFORM(*) USING 'cat' @@ -32,7 +35,8 @@ SELECT TRANSFORM(*) RECORDREADER 'org.apache.hadoop.hive.ql.exec.BinaryRecordReader' FROM src PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Insert into that table using transform +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT TRANSFORM(*) USING 'cat' @@ -368,11 +372,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.mydata SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT * FROM dest1 +PREHOOK: query: -- Test the result +SELECT * FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest1 +POSTHOOK: query: -- Test the result +SELECT * FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/binary_table_bincolserde.q.out ql/src/test/results/clientpositive/binary_table_bincolserde.q.out index 324f41f..2b90acd 100644 --- ql/src/test/results/clientpositive/binary_table_bincolserde.q.out +++ ql/src/test/results/clientpositive/binary_table_bincolserde.q.out @@ -2,9 +2,13 @@ PREHOOK: query: drop table ba_test PREHOOK: type: DROPTABLE POSTHOOK: query: drop table ba_test POSTHOOK: type: DROPTABLE -PREHOOK: query: create table ba_test (ba_key binary, ba_val binary) stored as rcfile +PREHOOK: query: -- Tests everything in binary_table_colserde.q + uses LazyBinaryColumnarSerde + +create table ba_test (ba_key binary, ba_val binary) stored as rcfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table ba_test (ba_key binary, ba_val binary) stored as rcfile +POSTHOOK: query: -- Tests everything in binary_table_colserde.q + uses LazyBinaryColumnarSerde + +create table ba_test (ba_key binary, ba_val binary) stored as rcfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@ba_test PREHOOK: query: alter table ba_test set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' diff --git ql/src/test/results/clientpositive/binary_table_colserde.q.out ql/src/test/results/clientpositive/binary_table_colserde.q.out index 7c0e0e2..e747129 100644 --- ql/src/test/results/clientpositive/binary_table_colserde.q.out +++ ql/src/test/results/clientpositive/binary_table_colserde.q.out @@ -2,9 +2,13 @@ PREHOOK: query: drop table ba_test PREHOOK: type: DROPTABLE POSTHOOK: query: drop table ba_test POSTHOOK: type: DROPTABLE -PREHOOK: query: create table ba_test (ba_key binary, ba_val binary) stored as rcfile +PREHOOK: query: -- Everything in ba_table1.q + columnar serde in RCFILE. + +create table ba_test (ba_key binary, ba_val binary) stored as rcfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table ba_test (ba_key binary, ba_val binary) stored as rcfile +POSTHOOK: query: -- Everything in ba_table1.q + columnar serde in RCFILE. + +create table ba_test (ba_key binary, ba_val binary) stored as rcfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@ba_test PREHOOK: query: describe extended ba_test diff --git ql/src/test/results/clientpositive/bucket_groupby.q.out ql/src/test/results/clientpositive/bucket_groupby.q.out index e6f76ad..0ff90d1 100644 --- ql/src/test/results/clientpositive/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/bucket_groupby.q.out @@ -1180,11 +1180,13 @@ STAGE PLANS: limit: -1 -PREHOOK: query: drop table clustergroupby +PREHOOK: query: -- number of buckets cannot be changed, so drop the table +drop table clustergroupby PREHOOK: type: DROPTABLE PREHOOK: Input: default@clustergroupby PREHOOK: Output: default@clustergroupby -POSTHOOK: query: drop table clustergroupby +POSTHOOK: query: -- number of buckets cannot be changed, so drop the table +drop table clustergroupby POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@clustergroupby POSTHOOK: Output: default@clustergroupby @@ -1583,11 +1585,13 @@ POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).value SIMPLE [(src)src.Field 111 1 113 2 114 1 -PREHOOK: query: drop table clustergroupby +PREHOOK: query: -- number of buckets cannot be changed, so drop the table +drop table clustergroupby PREHOOK: type: DROPTABLE PREHOOK: Input: default@clustergroupby PREHOOK: Output: default@clustergroupby -POSTHOOK: query: drop table clustergroupby +POSTHOOK: query: -- number of buckets cannot be changed, so drop the table +drop table clustergroupby POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@clustergroupby POSTHOOK: Output: default@clustergroupby diff --git ql/src/test/results/clientpositive/bucket_map_join_1.q.out ql/src/test/results/clientpositive/bucket_map_join_1.q.out index 700a00d..56131b0 100644 --- ql/src/test/results/clientpositive/bucket_map_join_1.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_1.q.out @@ -32,10 +32,18 @@ PREHOOK: Output: default@table2 POSTHOOK: query: load data local inpath '../data/files/SortCol2Col1.txt' overwrite into table table2 POSTHOOK: type: LOAD POSTHOOK: Output: default@table2 -PREHOOK: query: explain extended +PREHOOK: query: -- The tables are bucketed in same columns in different order, +-- but sorted in different column orders +-- Neither bucketed map-join, nor sort-merge join should be performed + +explain extended select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: -- The tables are bucketed in same columns in different order, +-- but sorted in different column orders +-- Neither bucketed map-join, nor sort-merge join should be performed + +explain extended select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/bucket_map_join_2.q.out ql/src/test/results/clientpositive/bucket_map_join_2.q.out index c044516..1e7bea5 100644 --- ql/src/test/results/clientpositive/bucket_map_join_2.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_2.q.out @@ -32,10 +32,18 @@ PREHOOK: Output: default@table2 POSTHOOK: query: load data local inpath '../data/files/SortCol2Col1.txt' overwrite into table table2 POSTHOOK: type: LOAD POSTHOOK: Output: default@table2 -PREHOOK: query: explain extended +PREHOOK: query: -- The tables are bucketed in same columns in different order, +-- but sorted in different column orders +-- Neither bucketed map-join, nor sort-merge join should be performed + +explain extended select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: -- The tables are bucketed in same columns in different order, +-- but sorted in different column orders +-- Neither bucketed map-join, nor sort-merge join should be performed + +explain extended select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/bucketcontext_1.q.out ql/src/test/results/clientpositive/bucketcontext_1.q.out index 467e35f..43e34ce 100644 --- ql/src/test/results/clientpositive/bucketcontext_1.q.out +++ ql/src/test/results/clientpositive/bucketcontext_1.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') diff --git ql/src/test/results/clientpositive/bucketcontext_2.q.out ql/src/test/results/clientpositive/bucketcontext_2.q.out index 3f12b2c..ab44de5 100644 --- ql/src/test/results/clientpositive/bucketcontext_2.q.out +++ ql/src/test/results/clientpositive/bucketcontext_2.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 1 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 1 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') diff --git ql/src/test/results/clientpositive/bucketcontext_3.q.out ql/src/test/results/clientpositive/bucketcontext_3.q.out index b26a68f..592765a 100644 --- ql/src/test/results/clientpositive/bucketcontext_3.q.out +++ ql/src/test/results/clientpositive/bucketcontext_3.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 2 part, 2 bucket & big 1 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 2 part, 2 bucket & big 1 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') diff --git ql/src/test/results/clientpositive/bucketcontext_4.q.out ql/src/test/results/clientpositive/bucketcontext_4.q.out index 8a2ad68..6fc94a7 100644 --- ql/src/test/results/clientpositive/bucketcontext_4.q.out +++ ql/src/test/results/clientpositive/bucketcontext_4.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 2 part, 4 bucket & big 1 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 2 part, 4 bucket & big 1 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') diff --git ql/src/test/results/clientpositive/bucketcontext_5.q.out ql/src/test/results/clientpositive/bucketcontext_5.q.out index ec4bd86..8eb9a71 100644 --- ql/src/test/results/clientpositive/bucketcontext_5.q.out +++ ql/src/test/results/clientpositive/bucketcontext_5.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small no part, 4 bucket & big no part, 2 bucket +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small no part, 4 bucket & big no part, 2 bucket +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small diff --git ql/src/test/results/clientpositive/bucketcontext_6.q.out ql/src/test/results/clientpositive/bucketcontext_6.q.out index 2caf71f..8271292 100644 --- ql/src/test/results/clientpositive/bucketcontext_6.q.out +++ ql/src/test/results/clientpositive/bucketcontext_6.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small no part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small no part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small diff --git ql/src/test/results/clientpositive/bucketcontext_7.q.out ql/src/test/results/clientpositive/bucketcontext_7.q.out index 897346f..db9bb1d 100644 --- ql/src/test/results/clientpositive/bucketcontext_7.q.out +++ ql/src/test/results/clientpositive/bucketcontext_7.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 2 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 2 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') diff --git ql/src/test/results/clientpositive/bucketcontext_8.q.out ql/src/test/results/clientpositive/bucketcontext_8.q.out index 87ad861..21b5dc5 100644 --- ql/src/test/results/clientpositive/bucketcontext_8.q.out +++ ql/src/test/results/clientpositive/bucketcontext_8.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: query: -- small 2 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: query: -- small 2 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucket_small PREHOOK: query: load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') diff --git ql/src/test/results/clientpositive/bucketmapjoin1.q.out ql/src/test/results/clientpositive/bucketmapjoin1.q.out index 0704ea9..4bbd35f 100644 --- ql/src/test/results/clientpositive/bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin1.q.out @@ -13,12 +13,14 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -PREHOOK: query: explain extended +PREHOOK: query: -- empty partitions (HIVE-3205) +explain extended select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key where b.ds="2008-04-08" PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: -- empty partitions (HIVE-3205) +explain extended select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key where b.ds="2008-04-08" diff --git ql/src/test/results/clientpositive/bucketmapjoin10.q.out ql/src/test/results/clientpositive/bucketmapjoin10.q.out index d0f2d65..3466e6d 100644 --- ql/src/test/results/clientpositive/bucketmapjoin10.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin10.q.out @@ -100,12 +100,16 @@ POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@srcbucket_mapjoin_part_2 POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL diff --git ql/src/test/results/clientpositive/bucketmapjoin11.q.out ql/src/test/results/clientpositive/bucketmapjoin11.q.out index 59aa52b..1c12c09 100644 --- ql/src/test/results/clientpositive/bucketmapjoin11.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin11.q.out @@ -104,12 +104,18 @@ PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') POSTHOOK: type: LOAD POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is +-- a power of 2 and the bucketing columns match so bucket map join should be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is +-- a power of 2 and the bucketing columns match so bucket map join should be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL diff --git ql/src/test/results/clientpositive/bucketmapjoin12.q.out ql/src/test/results/clientpositive/bucketmapjoin12.q.out index 3d81e4a..abf9783 100644 --- ql/src/test/results/clientpositive/bucketmapjoin12.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin12.q.out @@ -74,12 +74,16 @@ POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_3 CLUSTERED BY (key) INTO 2 POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@srcbucket_mapjoin_part_3 POSTHOOK: Output: default@srcbucket_mapjoin_part_3 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The partition bucketing metadata match but one table is not bucketed, bucket map join should still be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The partition bucketing metadata match but one table is not bucketed, bucket map join should still be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' @@ -262,12 +266,16 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_2 POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 #### A masked pattern was here #### 464 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The table bucketing metadata match but one partition is not bucketed, bucket map join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b ON a.key = b.key AND a.part = '1' and b.part = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The table bucketing metadata match but one partition is not bucketed, bucket map join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b ON a.key = b.key AND a.part = '1' and b.part = '1' diff --git ql/src/test/results/clientpositive/bucketmapjoin13.q.out ql/src/test/results/clientpositive/bucketmapjoin13.q.out index 86972e2..870cb35 100644 --- ql/src/test/results/clientpositive/bucketmapjoin13.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin13.q.out @@ -5,12 +5,14 @@ POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) P CLUSTERED BY (value) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@srcbucket_mapjoin_part_1 -PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -27,12 +29,14 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_1 POSTHOOK: Output: default@srcbucket_mapjoin_part_1 POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') SELECT * FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 -POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') SELECT * FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -52,12 +56,14 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(s POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_2 is bucketed by 'key' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_2 is bucketed by 'key' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -68,12 +74,16 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(s POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' +-- and it is also being joined. So, bucketed map-join cannot be performed +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' +-- and it is also being joined. So, bucketed map-join cannot be performed +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key @@ -310,12 +320,16 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 2056 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key and a.part = '2' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key and a.part = '2' @@ -524,12 +538,16 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(s POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key @@ -738,12 +756,18 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(s POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +-- The fact that the table is being bucketed by 'value' does not matter +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +-- The fact that the table is being bucketed by 'value' does not matter +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key diff --git ql/src/test/results/clientpositive/bucketmapjoin2.q.out ql/src/test/results/clientpositive/bucketmapjoin2.q.out index 68b04d5..7f3fb3e 100644 --- ql/src/test/results/clientpositive/bucketmapjoin2.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin2.q.out @@ -1167,10 +1167,12 @@ POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_pa POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] 0 0 0 -PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: query: -- HIVE-3210 +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') PREHOOK: type: LOAD PREHOOK: Output: default@srcbucket_mapjoin_part_2 -POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: query: -- HIVE-3210 +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') POSTHOOK: type: LOAD POSTHOOK: Output: default@srcbucket_mapjoin_part_2 POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 diff --git ql/src/test/results/clientpositive/bucketmapjoin8.q.out ql/src/test/results/clientpositive/bucketmapjoin8.q.out index c436411..2a5a5d5 100644 --- ql/src/test/results/clientpositive/bucketmapjoin8.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin8.q.out @@ -46,12 +46,16 @@ POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@srcbucket_mapjoin_part_2 POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The partition bucketing metadata match but the tables have different numbers of buckets, bucket map join should still be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The partition bucketing metadata match but the tables have different numbers of buckets, bucket map join should still be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' @@ -242,12 +246,16 @@ POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@srcbucket_mapjoin_part_2 POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The partition bucketing metadata match but the tables are bucketed on different columns, bucket map join should still be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The partition bucketing metadata match but the tables are bucketed on different columns, bucket map join should still be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' diff --git ql/src/test/results/clientpositive/bucketmapjoin9.q.out ql/src/test/results/clientpositive/bucketmapjoin9.q.out index 78126a7..c2db270 100644 --- ql/src/test/results/clientpositive/bucketmapjoin9.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin9.q.out @@ -52,12 +52,16 @@ POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@srcbucket_mapjoin_part_2 POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The table bucketing metadata matches but the partitions have different numbers of buckets, bucket map join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The table bucketing metadata matches but the partitions have different numbers of buckets, bucket map join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' and b.part = '1' @@ -269,12 +273,16 @@ POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 POSTHOOK: type: ALTERTABLE_CLUSTER_SORT POSTHOOK: Input: default@srcbucket_mapjoin_part_2 POSTHOOK: Output: default@srcbucket_mapjoin_part_2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The table bucketing metadata matches but the partitions are bucketed on different columns, bucket map join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' AND b.part = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The table bucketing metadata matches but the partitions are bucketed on different columns, bucket map join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' AND b.part = '1' diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out index 28175f1..2230fd1 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out @@ -106,9 +106,11 @@ PREHOOK: Output: default@test4 POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE test4 POSTHOOK: type: LOAD POSTHOOK: Output: default@test4 -PREHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key=R.key AND L.value=R.value +PREHOOK: query: -- should be allowed +explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key=R.key AND L.value=R.value PREHOOK: type: QUERY -POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key=R.key AND L.value=R.value +POSTHOOK: query: -- should be allowed +explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key=R.key AND L.value=R.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test1) L) (TOK_TABREF (TOK_TABNAME test1) R) (AND (= (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL R) key)) (= (. (TOK_TABLE_OR_COL L) value) (. (TOK_TABLE_OR_COL R) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) @@ -404,9 +406,11 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key+L.key=R.key +PREHOOK: query: -- should not apply bucket mapjoin +explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key+L.key=R.key PREHOOK: type: QUERY -POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key+L.key=R.key +POSTHOOK: query: -- should not apply bucket mapjoin +explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key+L.key=R.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test1) L) (TOK_TABREF (TOK_TABNAME test1) R) (= (+ (. (TOK_TABLE_OR_COL L) key) (. (TOK_TABLE_OR_COL L) key)) (. (TOK_TABLE_OR_COL R) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST R))) (TOK_SELEXPR TOK_ALLCOLREF)))) diff --git ql/src/test/results/clientpositive/column_access_stats.q.out ql/src/test/results/clientpositive/column_access_stats.q.out index eb72572..ad232b7 100644 --- ql/src/test/results/clientpositive/column_access_stats.q.out +++ ql/src/test/results/clientpositive/column_access_stats.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: query: -- This test is used for testing the ColumnAccessAnalyzer + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PREHOOK: type: LOAD @@ -7,7 +9,8 @@ PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -PREHOOK: query: SELECT key FROM T1 ORDER BY key +PREHOOK: query: -- Simple select queries +SELECT key FROM T1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### @@ -43,7 +46,8 @@ PREHOOK: Input: default@t1 1 1 1 -PREHOOK: query: EXPLAIN SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key +PREHOOK: query: -- More complicated select queries +EXPLAIN SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key PREHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) @@ -183,7 +187,8 @@ Columns:key,val 24.0 26.0 36.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Work with union +EXPLAIN SELECT * FROM ( SELECT key as c FROM T1 UNION ALL @@ -385,7 +390,8 @@ Columns:key 8 8 8 -PREHOOK: query: FROM T1 +PREHOOK: query: -- Work with insert overwrite +FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE T3 SELECT key, sum(val) GROUP BY key PREHOOK: type: QUERY @@ -395,7 +401,8 @@ PREHOOK: Output: default@t3 Table:default@t1 Columns:key,val -PREHOOK: query: SELECT * +PREHOOK: query: -- Simple joins +SELECT * FROM T1 JOIN T2 ON T1.key = T2.key ORDER BY T1.key, T1.val, T2.key, T2.val @@ -542,7 +549,8 @@ Columns:key,val Table:default@t1 Columns:key,val -PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * +PREHOOK: query: -- Map join +SELECT /*+ MAPJOIN(a) */ * FROM T1 a JOIN T2 b ON a.key = b.key ORDER BY a.key, a.val, b.key, b.val @@ -562,7 +570,8 @@ Columns:key,val 7 17 7 1 8 18 8 2 8 28 8 2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- More joins +EXPLAIN SELECT * FROM T1 JOIN T2 ON T1.key = T2.key AND T1.val = 3 and T2.val = 3 @@ -837,7 +846,8 @@ Columns:key,val Table:default@t1 Columns:key,val -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Join followed by join +EXPLAIN SELECT * FROM ( diff --git ql/src/test/results/clientpositive/combine2_hadoop20.q.out ql/src/test/results/clientpositive/combine2_hadoop20.q.out index e18142e..1ef67f4 100644 --- ql/src/test/results/clientpositive/combine2_hadoop20.q.out +++ ql/src/test/results/clientpositive/combine2_hadoop20.q.out @@ -2,12 +2,28 @@ PREHOOK: query: USE default PREHOOK: type: SWITCHDATABASE POSTHOOK: query: USE default POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: create table combine2(key string) partitioned by (value string) +PREHOOK: query: -- EXCLUDE_OS_WINDOWS +-- excluded on windows because of difference in file name encoding logic + + +create table combine2(key string) partitioned by (value string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table combine2(key string) partitioned by (value string) +POSTHOOK: query: -- EXCLUDE_OS_WINDOWS +-- excluded on windows because of difference in file name encoding logic + + +create table combine2(key string) partitioned by (value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@combine2 -PREHOOK: query: insert overwrite table combine2 partition(value) +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) select * from ( select key, value from src where key < 10 union all @@ -17,7 +33,15 @@ select * from ( PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@combine2 -POSTHOOK: query: insert overwrite table combine2 partition(value) +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) select * from ( select key, value from src where key < 10 union all diff --git ql/src/test/results/clientpositive/comments.q.out ql/src/test/results/clientpositive/comments.q.out deleted file mode 100644 index 8d54ebc..0000000 --- ql/src/test/results/clientpositive/comments.q.out +++ /dev/null @@ -1,219 +0,0 @@ -PREHOOK: query: select * from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select * from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -238 val_238 - -311 val_311 - val_27 - val_165 - val_409 -255 val_255 -278 val_278 -98 val_98 - val_484 - val_265 - val_193 -401 val_401 -150 val_150 -273 val_273 -224 -369 -66 val_66 -128 -213 val_213 -146 val_146 -406 val_406 - - - -PREHOOK: query: select "--" from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select "--" from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- -PREHOOK: query: select * from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select * from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -238 val_238 - -311 val_311 - val_27 - val_165 - val_409 -255 val_255 -278 val_278 -98 val_98 - val_484 - val_265 - val_193 -401 val_401 -150 val_150 -273 val_273 -224 -369 -66 val_66 -128 -213 val_213 -146 val_146 -406 val_406 - - - -PREHOOK: query: select "ke --- -y" from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select "ke --- -y" from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -ke --- -y -PREHOOK: query: select * from src1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: select * from src1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -238 val_238 - -311 val_311 - val_27 - val_165 - val_409 -255 val_255 -278 val_278 -98 val_98 - val_484 - val_265 - val_193 -401 val_401 -150 val_150 -273 val_273 -224 -369 -66 val_66 -128 -213 val_213 -146 val_146 -406 val_406 - - - diff --git ql/src/test/results/clientpositive/compute_stats_binary.q.out ql/src/test/results/clientpositive/compute_stats_binary.q.out index 06a96fc..3982505 100644 --- ql/src/test/results/clientpositive/compute_stats_binary.q.out +++ ql/src/test/results/clientpositive/compute_stats_binary.q.out @@ -3,10 +3,12 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tab_binary(a binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tab_binary -PREHOOK: query: LOAD DATA LOCAL INPATH "../data/files/binary.txt" INTO TABLE tab_binary +PREHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/binary.txt" INTO TABLE tab_binary PREHOOK: type: LOAD PREHOOK: Output: default@tab_binary -POSTHOOK: query: LOAD DATA LOCAL INPATH "../data/files/binary.txt" INTO TABLE tab_binary +POSTHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/binary.txt" INTO TABLE tab_binary POSTHOOK: type: LOAD POSTHOOK: Output: default@tab_binary PREHOOK: query: select count(*) from tab_binary @@ -18,11 +20,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_binary #### A masked pattern was here #### 10 -PREHOOK: query: select compute_stats(a, 16) from tab_binary +PREHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_binary PREHOOK: type: QUERY PREHOOK: Input: default@tab_binary #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a, 16) from tab_binary +POSTHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_binary POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_binary #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/compute_stats_boolean.q.out ql/src/test/results/clientpositive/compute_stats_boolean.q.out index e2405d5..88836ab 100644 --- ql/src/test/results/clientpositive/compute_stats_boolean.q.out +++ ql/src/test/results/clientpositive/compute_stats_boolean.q.out @@ -3,10 +3,12 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tab_bool(a boolean) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tab_bool -PREHOOK: query: LOAD DATA LOCAL INPATH "../data/files/bool.txt" INTO TABLE tab_bool +PREHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/bool.txt" INTO TABLE tab_bool PREHOOK: type: LOAD PREHOOK: Output: default@tab_bool -POSTHOOK: query: LOAD DATA LOCAL INPATH "../data/files/bool.txt" INTO TABLE tab_bool +POSTHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/bool.txt" INTO TABLE tab_bool POSTHOOK: type: LOAD POSTHOOK: Output: default@tab_bool PREHOOK: query: select count(*) from tab_bool @@ -18,11 +20,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_bool #### A masked pattern was here #### 33 -PREHOOK: query: select compute_stats(a, 16) from tab_bool +PREHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_bool PREHOOK: type: QUERY PREHOOK: Input: default@tab_bool #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a, 16) from tab_bool +POSTHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_bool POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_bool #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/compute_stats_double.q.out ql/src/test/results/clientpositive/compute_stats_double.q.out index bc78b82..16bea10 100644 --- ql/src/test/results/clientpositive/compute_stats_double.q.out +++ ql/src/test/results/clientpositive/compute_stats_double.q.out @@ -3,10 +3,12 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tab_double(a double) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tab_double -PREHOOK: query: LOAD DATA LOCAL INPATH "../data/files/double.txt" INTO TABLE tab_double +PREHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/double.txt" INTO TABLE tab_double PREHOOK: type: LOAD PREHOOK: Output: default@tab_double -POSTHOOK: query: LOAD DATA LOCAL INPATH "../data/files/double.txt" INTO TABLE tab_double +POSTHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/double.txt" INTO TABLE tab_double POSTHOOK: type: LOAD POSTHOOK: Output: default@tab_double PREHOOK: query: select count(*) from tab_double @@ -18,11 +20,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_double #### A masked pattern was here #### 16 -PREHOOK: query: select compute_stats(a, 16) from tab_double +PREHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_double PREHOOK: type: QUERY PREHOOK: Input: default@tab_double #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a, 16) from tab_double +POSTHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_double POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_double #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/compute_stats_empty_table.q.out ql/src/test/results/clientpositive/compute_stats_empty_table.q.out index 584b23b..8dffaf3 100644 --- ql/src/test/results/clientpositive/compute_stats_empty_table.q.out +++ ql/src/test/results/clientpositive/compute_stats_empty_table.q.out @@ -12,11 +12,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_empty #### A masked pattern was here #### 0 -PREHOOK: query: select compute_stats(a, 16) from tab_empty +PREHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_empty PREHOOK: type: QUERY PREHOOK: Input: default@tab_empty #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a, 16) from tab_empty +POSTHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_empty POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_empty #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/compute_stats_long.q.out ql/src/test/results/clientpositive/compute_stats_long.q.out index 94d601d..8faf605 100644 --- ql/src/test/results/clientpositive/compute_stats_long.q.out +++ ql/src/test/results/clientpositive/compute_stats_long.q.out @@ -3,10 +3,12 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tab_int(a int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tab_int -PREHOOK: query: LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int +PREHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int PREHOOK: type: LOAD PREHOOK: Output: default@tab_int -POSTHOOK: query: LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int +POSTHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int POSTHOOK: type: LOAD POSTHOOK: Output: default@tab_int PREHOOK: query: select count(*) from tab_int @@ -18,11 +20,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_int #### A masked pattern was here #### 12 -PREHOOK: query: select compute_stats(a, 16) from tab_int +PREHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_int PREHOOK: type: QUERY PREHOOK: Input: default@tab_int #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a, 16) from tab_int +POSTHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_int POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_int #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/compute_stats_string.q.out ql/src/test/results/clientpositive/compute_stats_string.q.out index 87e65d9..0e3b591 100644 --- ql/src/test/results/clientpositive/compute_stats_string.q.out +++ ql/src/test/results/clientpositive/compute_stats_string.q.out @@ -3,10 +3,12 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tab_string(a string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tab_string -PREHOOK: query: LOAD DATA LOCAL INPATH "../data/files/string.txt" INTO TABLE tab_string +PREHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/string.txt" INTO TABLE tab_string PREHOOK: type: LOAD PREHOOK: Output: default@tab_string -POSTHOOK: query: LOAD DATA LOCAL INPATH "../data/files/string.txt" INTO TABLE tab_string +POSTHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/string.txt" INTO TABLE tab_string POSTHOOK: type: LOAD POSTHOOK: Output: default@tab_string PREHOOK: query: select count(*) from tab_string @@ -18,11 +20,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_string #### A masked pattern was here #### 10 -PREHOOK: query: select compute_stats(a, 16) from tab_string +PREHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_string PREHOOK: type: QUERY PREHOOK: Input: default@tab_string #### A masked pattern was here #### -POSTHOOK: query: select compute_stats(a, 16) from tab_string +POSTHOOK: query: -- compute statistical summary of data +select compute_stats(a, 16) from tab_string POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_string #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/convert_enum_to_string.q.out ql/src/test/results/clientpositive/convert_enum_to_string.q.out index 8b429ad..af981a7 100644 --- ql/src/test/results/clientpositive/convert_enum_to_string.q.out +++ ql/src/test/results/clientpositive/convert_enum_to_string.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: create table convert_enum_to_string +PREHOOK: query: -- Ensure Enum fields are converted to strings (instead of struct) + +create table convert_enum_to_string partitioned by (b string) row format serde "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer" with serdeproperties ( "serialization.class"="org.apache.hadoop.hive.serde2.thrift.test.MegaStruct", "serialization.format"="org.apache.thrift.protocol.TBinaryProtocol") PREHOOK: type: CREATETABLE -POSTHOOK: query: create table convert_enum_to_string +POSTHOOK: query: -- Ensure Enum fields are converted to strings (instead of struct) + +create table convert_enum_to_string partitioned by (b string) row format serde "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer" with serdeproperties ( diff --git ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out index f92da6b..68003e4 100644 --- ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out +++ ql/src/test/results/clientpositive/create_alter_list_bucketing_table1.q.out @@ -1,7 +1,15 @@ -PREHOOK: query: CREATE TABLE if not exists stored_as_dirs_multiple (col1 STRING, col2 int, col3 STRING) +PREHOOK: query: -- Test stored as directories +-- it covers a few cases + +-- 1. create a table with stored as directories +CREATE TABLE if not exists stored_as_dirs_multiple (col1 STRING, col2 int, col3 STRING) SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78)) stored as DIRECTORIES PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE if not exists stored_as_dirs_multiple (col1 STRING, col2 int, col3 STRING) +POSTHOOK: query: -- Test stored as directories +-- it covers a few cases + +-- 1. create a table with stored as directories +CREATE TABLE if not exists stored_as_dirs_multiple (col1 STRING, col2 int, col3 STRING) SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78)) stored as DIRECTORIES POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@stored_as_dirs_multiple @@ -38,11 +46,13 @@ Skewed Columns: [col1, col2] Skewed Values: [[s1, 1], [s3, 3], [s13, 13], [s78, 78]] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table stored_as_dirs_multiple not stored as DIRECTORIES +PREHOOK: query: -- 2. turn off stored as directories but table is still a skewed table +alter table stored_as_dirs_multiple not stored as DIRECTORIES PREHOOK: type: ALTERTABLE_SKEWED PREHOOK: Input: default@stored_as_dirs_multiple PREHOOK: Output: default@stored_as_dirs_multiple -POSTHOOK: query: alter table stored_as_dirs_multiple not stored as DIRECTORIES +POSTHOOK: query: -- 2. turn off stored as directories but table is still a skewed table +alter table stored_as_dirs_multiple not stored as DIRECTORIES POSTHOOK: type: ALTERTABLE_SKEWED POSTHOOK: Input: default@stored_as_dirs_multiple POSTHOOK: Output: default@stored_as_dirs_multiple @@ -78,11 +88,13 @@ Skewed Columns: [col1, col2] Skewed Values: [[s1, 1], [s3, 3], [s13, 13], [s78, 78]] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table stored_as_dirs_multiple not skewed +PREHOOK: query: -- 3. turn off skewed +alter table stored_as_dirs_multiple not skewed PREHOOK: type: ALTERTABLE_SKEWED PREHOOK: Input: default@stored_as_dirs_multiple PREHOOK: Output: default@stored_as_dirs_multiple -POSTHOOK: query: alter table stored_as_dirs_multiple not skewed +POSTHOOK: query: -- 3. turn off skewed +alter table stored_as_dirs_multiple not skewed POSTHOOK: type: ALTERTABLE_SKEWED POSTHOOK: Input: default@stored_as_dirs_multiple POSTHOOK: Output: default@stored_as_dirs_multiple @@ -116,9 +128,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: CREATE TABLE stored_as_dirs_single (key STRING, value STRING) +PREHOOK: query: -- 4. alter a table to stored as directories +CREATE TABLE stored_as_dirs_single (key STRING, value STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE stored_as_dirs_single (key STRING, value STRING) +POSTHOOK: query: -- 4. alter a table to stored as directories +CREATE TABLE stored_as_dirs_single (key STRING, value STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@stored_as_dirs_single PREHOOK: query: alter table stored_as_dirs_single SKEWED BY (key) ON ('1','5','6') @@ -163,11 +177,13 @@ Skewed Columns: [key] Skewed Values: [[1], [5], [6]] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table stored_as_dirs_single not skewed +PREHOOK: query: -- 5. turn off skewed should turn off stored as directories too +alter table stored_as_dirs_single not skewed PREHOOK: type: ALTERTABLE_SKEWED PREHOOK: Input: default@stored_as_dirs_single PREHOOK: Output: default@stored_as_dirs_single -POSTHOOK: query: alter table stored_as_dirs_single not skewed +POSTHOOK: query: -- 5. turn off skewed should turn off stored as directories too +alter table stored_as_dirs_single not skewed POSTHOOK: type: ALTERTABLE_SKEWED POSTHOOK: Input: default@stored_as_dirs_single POSTHOOK: Output: default@stored_as_dirs_single @@ -200,12 +216,14 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: alter table stored_as_dirs_single SKEWED BY (key) ON ('1','5','6') +PREHOOK: query: -- 6. turn on stored as directories again +alter table stored_as_dirs_single SKEWED BY (key) ON ('1','5','6') stored as DIRECTORIES PREHOOK: type: ALTERTABLE_SKEWED PREHOOK: Input: default@stored_as_dirs_single PREHOOK: Output: default@stored_as_dirs_single -POSTHOOK: query: alter table stored_as_dirs_single SKEWED BY (key) ON ('1','5','6') +POSTHOOK: query: -- 6. turn on stored as directories again +alter table stored_as_dirs_single SKEWED BY (key) ON ('1','5','6') stored as DIRECTORIES POSTHOOK: type: ALTERTABLE_SKEWED POSTHOOK: Input: default@stored_as_dirs_single @@ -242,9 +260,11 @@ Skewed Columns: [key] Skewed Values: [[1], [5], [6]] Storage Desc Params: serialization.format 1 -PREHOOK: query: create table stored_as_dirs_single_like like stored_as_dirs_single +PREHOOK: query: -- 7. create table like +create table stored_as_dirs_single_like like stored_as_dirs_single PREHOOK: type: CREATETABLE -POSTHOOK: query: create table stored_as_dirs_single_like like stored_as_dirs_single +POSTHOOK: query: -- 7. create table like +create table stored_as_dirs_single_like like stored_as_dirs_single POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@stored_as_dirs_single_like PREHOOK: query: describe formatted stored_as_dirs_single_like @@ -279,11 +299,13 @@ Skewed Columns: [key] Skewed Values: [[1], [5], [6]] Storage Desc Params: serialization.format 1 -PREHOOK: query: drop table stored_as_dirs_single +PREHOOK: query: -- cleanup +drop table stored_as_dirs_single PREHOOK: type: DROPTABLE PREHOOK: Input: default@stored_as_dirs_single PREHOOK: Output: default@stored_as_dirs_single -POSTHOOK: query: drop table stored_as_dirs_single +POSTHOOK: query: -- cleanup +drop table stored_as_dirs_single POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@stored_as_dirs_single POSTHOOK: Output: default@stored_as_dirs_single diff --git ql/src/test/results/clientpositive/create_big_view.q.out ql/src/test/results/clientpositive/create_big_view.q.out index bd8d8bc..20a4dc1 100644 --- ql/src/test/results/clientpositive/create_big_view.q.out +++ ql/src/test/results/clientpositive/create_big_view.q.out @@ -2,7 +2,9 @@ PREHOOK: query: DROP VIEW big_view PREHOOK: type: DROPVIEW POSTHOOK: query: DROP VIEW big_view POSTHOOK: type: DROPVIEW -PREHOOK: query: CREATE VIEW big_view AS SELECT +PREHOOK: query: -- Define a view with long SQL text to test metastore and other limits. + +CREATE VIEW big_view AS SELECT 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' AS a, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', @@ -240,7 +242,9 @@ PREHOOK: query: CREATE VIEW big_view AS SELECT 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' FROM src PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW big_view AS SELECT +POSTHOOK: query: -- Define a view with long SQL text to test metastore and other limits. + +CREATE VIEW big_view AS SELECT 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' AS a, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', diff --git ql/src/test/results/clientpositive/create_like2.q.out ql/src/test/results/clientpositive/create_like2.q.out index 14fb3dc..e02e2ad 100644 --- ql/src/test/results/clientpositive/create_like2.q.out +++ ql/src/test/results/clientpositive/create_like2.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE table1(a INT, b STRING) +PREHOOK: query: -- Tests the copying over of Table Parameters according to a HiveConf setting +-- when doing a CREATE TABLE LIKE. + +CREATE TABLE table1(a INT, b STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE table1(a INT, b STRING) +POSTHOOK: query: -- Tests the copying over of Table Parameters according to a HiveConf setting +-- when doing a CREATE TABLE LIKE. + +CREATE TABLE table1(a INT, b STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@table1 PREHOOK: query: ALTER TABLE table1 SET TBLPROPERTIES ('a'='1', 'b'='2', 'c'='3', 'd' = '4') diff --git ql/src/test/results/clientpositive/create_like_tbl_props.q.out ql/src/test/results/clientpositive/create_like_tbl_props.q.out index ab64cbe..039fbe3 100644 --- ql/src/test/results/clientpositive/create_like_tbl_props.q.out +++ ql/src/test/results/clientpositive/create_like_tbl_props.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE test_table LIKE src TBLPROPERTIES('key'='value') +PREHOOK: query: -- Test that CREATE TABLE LIKE commands can take explicit table properties + +CREATE TABLE test_table LIKE src TBLPROPERTIES('key'='value') PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table LIKE src TBLPROPERTIES('key'='value') +POSTHOOK: query: -- Test that CREATE TABLE LIKE commands can take explicit table properties + +CREATE TABLE test_table LIKE src TBLPROPERTIES('key'='value') POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table PREHOOK: query: DESC FORMATTED test_table @@ -72,9 +76,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: CREATE TABLE test_table2 LIKE src TBLPROPERTIES('key2' = 'value2') +PREHOOK: query: -- Test that CREATE TABLE LIKE commands can take default and explicit table properties + +CREATE TABLE test_table2 LIKE src TBLPROPERTIES('key2' = 'value2') PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table2 LIKE src TBLPROPERTIES('key2' = 'value2') +POSTHOOK: query: -- Test that CREATE TABLE LIKE commands can take default and explicit table properties + +CREATE TABLE test_table2 LIKE src TBLPROPERTIES('key2' = 'value2') POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table2 PREHOOK: query: DESC FORMATTED test_table2 @@ -108,9 +116,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: CREATE TABLE test_table3 LIKE test_table2 TBLPROPERTIES('key2' = 'value3') +PREHOOK: query: -- Test that properties inherited are overwritten by explicitly set ones + +CREATE TABLE test_table3 LIKE test_table2 TBLPROPERTIES('key2' = 'value3') PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table3 LIKE test_table2 TBLPROPERTIES('key2' = 'value3') +POSTHOOK: query: -- Test that properties inherited are overwritten by explicitly set ones + +CREATE TABLE test_table3 LIKE test_table2 TBLPROPERTIES('key2' = 'value3') POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table3 PREHOOK: query: DESC FORMATTED test_table3 diff --git ql/src/test/results/clientpositive/create_like_view.q.out ql/src/test/results/clientpositive/create_like_view.q.out index 9072cdd..7e640f6 100644 --- ql/src/test/results/clientpositive/create_like_view.q.out +++ ql/src/test/results/clientpositive/create_like_view.q.out @@ -239,9 +239,11 @@ POSTHOOK: Lineage: table1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, POSTHOOK: Lineage: table1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: table2.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: table2.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create view view1 partitioned on (ds, hr) as select * from srcpart +PREHOOK: query: -- check partitions +create view view1 partitioned on (ds, hr) as select * from srcpart PREHOOK: type: CREATEVIEW -POSTHOOK: query: create view view1 partitioned on (ds, hr) as select * from srcpart +POSTHOOK: query: -- check partitions +create view view1 partitioned on (ds, hr) as select * from srcpart POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@view1 POSTHOOK: Lineage: table1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/create_or_replace_view.q.out ql/src/test/results/clientpositive/create_or_replace_view.q.out index 7d5ea3a..bb9b669 100644 --- ql/src/test/results/clientpositive/create_or_replace_view.q.out +++ ql/src/test/results/clientpositive/create_or_replace_view.q.out @@ -39,9 +39,11 @@ Sort Columns: [] # View Information View Original Text: select * from srcpart View Expanded Text: select `srcpart`.`key`, `srcpart`.`value`, `srcpart`.`ds`, `srcpart`.`hr` from `default`.`srcpart` -PREHOOK: query: create or replace view v partitioned on (ds, hr) as select * from srcpart +PREHOOK: query: -- modifying definition of unpartitioned view +create or replace view v partitioned on (ds, hr) as select * from srcpart PREHOOK: type: CREATEVIEW -POSTHOOK: query: create or replace view v partitioned on (ds, hr) as select * from srcpart +POSTHOOK: query: -- modifying definition of unpartitioned view +create or replace view v partitioned on (ds, hr) as select * from srcpart POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@v PREHOOK: query: alter view v add partition (ds='2008-04-08',hr='11') @@ -123,9 +125,11 @@ POSTHOOK: query: show partitions v POSTHOOK: type: SHOWPARTITIONS ds=2008-04-08/hr=11 ds=2008-04-08/hr=12 -PREHOOK: query: create or replace view v partitioned on (ds, hr) as select value, ds, hr from srcpart +PREHOOK: query: -- altering partitioned view 1 +create or replace view v partitioned on (ds, hr) as select value, ds, hr from srcpart PREHOOK: type: CREATEVIEW -POSTHOOK: query: create or replace view v partitioned on (ds, hr) as select value, ds, hr from srcpart +POSTHOOK: query: -- altering partitioned view 1 +create or replace view v partitioned on (ds, hr) as select value, ds, hr from srcpart POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@v PREHOOK: query: select * from v where value='val_409' and ds='2008-04-08' and hr='11' @@ -184,9 +188,11 @@ POSTHOOK: query: show partitions v POSTHOOK: type: SHOWPARTITIONS ds=2008-04-08/hr=11 ds=2008-04-08/hr=12 -PREHOOK: query: create or replace view v partitioned on (ds, hr) as select key, value, ds, hr from srcpart +PREHOOK: query: -- altering partitioned view 2 +create or replace view v partitioned on (ds, hr) as select key, value, ds, hr from srcpart PREHOOK: type: CREATEVIEW -POSTHOOK: query: create or replace view v partitioned on (ds, hr) as select key, value, ds, hr from srcpart +POSTHOOK: query: -- altering partitioned view 2 +create or replace view v partitioned on (ds, hr) as select key, value, ds, hr from srcpart POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@v PREHOOK: query: select * from v where value='val_409' and ds='2008-04-08' and hr='11' @@ -254,9 +260,11 @@ POSTHOOK: query: drop view v POSTHOOK: type: DROPVIEW POSTHOOK: Input: default@v POSTHOOK: Output: default@v -PREHOOK: query: create table srcpart_temp like srcpart +PREHOOK: query: -- updating to fix view with invalid definition +create table srcpart_temp like srcpart PREHOOK: type: CREATETABLE -POSTHOOK: query: create table srcpart_temp like srcpart +POSTHOOK: query: -- updating to fix view with invalid definition +create table srcpart_temp like srcpart POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@srcpart_temp PREHOOK: query: create view v partitioned on (ds, hr) as select * from srcpart_temp @@ -272,9 +280,11 @@ POSTHOOK: query: drop table srcpart_temp POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@srcpart_temp POSTHOOK: Output: default@srcpart_temp -PREHOOK: query: create or replace view v partitioned on (ds, hr) as select * from srcpart +PREHOOK: query: -- v is now invalid +create or replace view v partitioned on (ds, hr) as select * from srcpart PREHOOK: type: CREATEVIEW -POSTHOOK: query: create or replace view v partitioned on (ds, hr) as select * from srcpart +POSTHOOK: query: -- v is now invalid +create or replace view v partitioned on (ds, hr) as select * from srcpart POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@v PREHOOK: query: describe formatted v diff --git ql/src/test/results/clientpositive/create_view.q.out ql/src/test/results/clientpositive/create_view.q.out index ebf2b80..caa0254 100644 --- ql/src/test/results/clientpositive/create_view.q.out +++ ql/src/test/results/clientpositive/create_view.q.out @@ -136,10 +136,12 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@view3 #### A masked pattern was here #### VAL_86 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- test EXPLAIN output for CREATE VIEW +EXPLAIN CREATE VIEW view0(valoo) AS SELECT upper(value) FROM src WHERE key=86 PREHOOK: type: CREATEVIEW -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- test EXPLAIN output for CREATE VIEW +EXPLAIN CREATE VIEW view0(valoo) AS SELECT upper(value) FROM src WHERE key=86 POSTHOOK: type: CREATEVIEW ABSTRACT SYNTAX TREE: @@ -161,10 +163,12 @@ STAGE PLANS: original text: SELECT upper(value) FROM src WHERE key=86 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- make sure EXPLAIN works with a query which references a view +EXPLAIN SELECT * from view2 where key=18 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- make sure EXPLAIN works with a query which references a view +EXPLAIN SELECT * from view2 where key=18 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -403,9 +407,13 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE table1 (key int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@table1 -PREHOOK: query: DESCRIBE EXTENDED table1 +PREHOOK: query: -- use DESCRIBE EXTENDED on a base table and an external table as points +-- of comparison for view descriptions +DESCRIBE EXTENDED table1 PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE EXTENDED table1 +POSTHOOK: query: -- use DESCRIBE EXTENDED on a base table and an external table as points +-- of comparison for view descriptions +DESCRIBE EXTENDED table1 POSTHOOK: type: DESCTABLE # col_name data_type comment @@ -422,9 +430,13 @@ key string default value string default #### A masked pattern was here #### -PREHOOK: query: DESCRIBE EXTENDED table1 +PREHOOK: query: -- use DESCRIBE EXTENDED on a base table as a point of comparison for +-- view descriptions +DESCRIBE EXTENDED table1 PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE EXTENDED table1 +POSTHOOK: query: -- use DESCRIBE EXTENDED on a base table as a point of comparison for +-- view descriptions +DESCRIBE EXTENDED table1 POSTHOOK: type: DESCTABLE # col_name data_type comment @@ -557,10 +569,14 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st key1 int None key2 int None -PREHOOK: query: CREATE VIEW view6(valoo COMMENT 'I cannot spell') AS +PREHOOK: query: -- verify that column name and comment in DDL portion +-- overrides column alias in SELECT +CREATE VIEW view6(valoo COMMENT 'I cannot spell') AS SELECT upper(value) as blarg FROM src WHERE key=86 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW view6(valoo COMMENT 'I cannot spell') AS +POSTHOOK: query: -- verify that column name and comment in DDL portion +-- overrides column alias in SELECT +CREATE VIEW view6(valoo COMMENT 'I cannot spell') AS SELECT upper(value) as blarg FROM src WHERE key=86 POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@view6 @@ -573,13 +589,15 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st # col_name data_type comment valoo string I cannot spell -PREHOOK: query: CREATE VIEW view7 AS +PREHOOK: query: -- verify that ORDER BY and LIMIT are both supported in view def +CREATE VIEW view7 AS SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key, value LIMIT 10 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW view7 AS +POSTHOOK: query: -- verify that ORDER BY and LIMIT are both supported in view def +CREATE VIEW view7 AS SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key, value @@ -608,12 +626,18 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st 87 val_87 90 val_90 90 val_90 -PREHOOK: query: SELECT * FROM view7 ORDER BY key DESC, value +PREHOOK: query: -- top-level ORDER BY should override the one inside the view +-- (however, the inside ORDER BY should still influence the evaluation +-- of the limit) +SELECT * FROM view7 ORDER BY key DESC, value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@view7 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM view7 ORDER BY key DESC, value +POSTHOOK: query: -- top-level ORDER BY should override the one inside the view +-- (however, the inside ORDER BY should still influence the evaluation +-- of the limit) +SELECT * FROM view7 ORDER BY key DESC, value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@view7 @@ -629,12 +653,14 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st 83 val_83 83 val_83 82 val_82 -PREHOOK: query: SELECT * FROM view7 LIMIT 5 +PREHOOK: query: -- top-level LIMIT should override if lower +SELECT * FROM view7 LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@view7 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM view7 LIMIT 5 +POSTHOOK: query: -- top-level LIMIT should override if lower +SELECT * FROM view7 LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@view7 @@ -645,12 +671,14 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st 83 val_83 84 val_84 84 val_84 -PREHOOK: query: SELECT * FROM view7 LIMIT 20 +PREHOOK: query: -- but not if higher +SELECT * FROM view7 LIMIT 20 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@view7 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM view7 LIMIT 20 +POSTHOOK: query: -- but not if higher +SELECT * FROM view7 LIMIT 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@view7 @@ -666,10 +694,12 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st 87 val_87 90 val_90 90 val_90 -PREHOOK: query: CREATE TEMPORARY FUNCTION test_translate AS +PREHOOK: query: -- test usage of a function within a view +CREATE TEMPORARY FUNCTION test_translate AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestTranslate' PREHOOK: type: CREATEFUNCTION -POSTHOOK: query: CREATE TEMPORARY FUNCTION test_translate AS +POSTHOOK: query: -- test usage of a function within a view +CREATE TEMPORARY FUNCTION test_translate AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFTestTranslate' POSTHOOK: type: CREATEFUNCTION POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -739,18 +769,22 @@ POSTHOOK: Input: default@view8 #### A masked pattern was here #### POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] bbc -PREHOOK: query: CREATE TEMPORARY FUNCTION test_max AS +PREHOOK: query: -- test usage of a UDAF within a view +CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax' PREHOOK: type: CREATEFUNCTION -POSTHOOK: query: CREATE TEMPORARY FUNCTION test_max AS +POSTHOOK: query: -- test usage of a UDAF within a view +CREATE TEMPORARY FUNCTION test_max AS 'org.apache.hadoop.hive.ql.udf.UDAFTestMax' POSTHOOK: type: CREATEFUNCTION POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: CREATE VIEW view9(m) AS +PREHOOK: query: -- disable map-side aggregation +CREATE VIEW view9(m) AS SELECT test_max(length(value)) FROM src PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW view9(m) AS +POSTHOOK: query: -- disable map-side aggregation +CREATE VIEW view9(m) AS SELECT test_max(length(value)) FROM src POSTHOOK: type: CREATEVIEW @@ -821,11 +855,13 @@ POSTHOOK: type: DROPVIEW POSTHOOK: Input: default@view9 POSTHOOK: Output: default@view9 POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: CREATE VIEW view9(m) AS +PREHOOK: query: -- enable map-side aggregation +CREATE VIEW view9(m) AS SELECT test_max(length(value)) FROM src PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW view9(m) AS +POSTHOOK: query: -- enable map-side aggregation +CREATE VIEW view9(m) AS SELECT test_max(length(value)) FROM src POSTHOOK: type: CREATEVIEW @@ -887,10 +923,12 @@ POSTHOOK: Input: default@view9 #### A masked pattern was here #### POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] 7 -PREHOOK: query: CREATE VIEW view10 AS +PREHOOK: query: -- test usage of a subselect within a view +CREATE VIEW view10 AS SELECT slurp.* FROM (SELECT * FROM src WHERE key=86) slurp PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW view10 AS +POSTHOOK: query: -- test usage of a subselect within a view +CREATE VIEW view10 AS SELECT slurp.* FROM (SELECT * FROM src WHERE key=86) slurp POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@view10 @@ -949,10 +987,12 @@ POSTHOOK: Input: default@view10 #### A masked pattern was here #### POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] 86 val_86 -PREHOOK: query: CREATE TEMPORARY FUNCTION test_explode AS +PREHOOK: query: -- test usage of a UDTF within a view +CREATE TEMPORARY FUNCTION test_explode AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode' PREHOOK: type: CREATEFUNCTION -POSTHOOK: query: CREATE TEMPORARY FUNCTION test_explode AS +POSTHOOK: query: -- test usage of a UDTF within a view +CREATE TEMPORARY FUNCTION test_explode AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode' POSTHOOK: type: CREATEFUNCTION POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -1024,10 +1064,12 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st 1 2 3 -PREHOOK: query: CREATE VIEW view12 AS +PREHOOK: query: -- test usage of LATERAL within a view +CREATE VIEW view12 AS SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW view12 AS +POSTHOOK: query: -- test usage of LATERAL within a view +CREATE VIEW view12 AS SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@view12 @@ -1090,13 +1132,15 @@ POSTHOOK: Input: default@view12 #### A masked pattern was here #### POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] 0 val_0 1 -PREHOOK: query: SELECT * FROM view2 LATERAL VIEW explode(array(1,2,3)) myTable AS myCol +PREHOOK: query: -- test usage of LATERAL with a view as the LHS +SELECT * FROM view2 LATERAL VIEW explode(array(1,2,3)) myTable AS myCol ORDER BY key ASC, myCol ASC LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@view2 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM view2 LATERAL VIEW explode(array(1,2,3)) myTable AS myCol +POSTHOOK: query: -- test usage of LATERAL with a view as the LHS +SELECT * FROM view2 LATERAL VIEW explode(array(1,2,3)) myTable AS myCol ORDER BY key ASC, myCol ASC LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -1104,11 +1148,13 @@ POSTHOOK: Input: default@view2 #### A masked pattern was here #### POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] 0 val_0 1 -PREHOOK: query: CREATE VIEW view13 AS +PREHOOK: query: -- test usage of TABLESAMPLE within a view +CREATE VIEW view13 AS SELECT s.key FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 ON key) s PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW view13 AS +POSTHOOK: query: -- test usage of TABLESAMPLE within a view +CREATE VIEW view13 AS SELECT s.key FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 ON key) s POSTHOOK: type: CREATEVIEW @@ -1183,7 +1229,8 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st 10 10 15 -PREHOOK: query: CREATE VIEW view14 AS +PREHOOK: query: -- test usage of JOIN+UNION+AGG all within same view +CREATE VIEW view14 AS SELECT unionsrc1.key as k1, unionsrc1.value as v1, unionsrc2.key as k2, unionsrc2.value as v2 FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 @@ -1195,7 +1242,8 @@ JOIN select s4.key as key, s4.value as value from src s4 where s4.key < 10) unionsrc2 ON (unionsrc1.key = unionsrc2.key) PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW view14 AS +POSTHOOK: query: -- test usage of JOIN+UNION+AGG all within same view +CREATE VIEW view14 AS SELECT unionsrc1.key as k1, unionsrc1.value as v1, unionsrc2.key as k2, unionsrc2.value as v2 FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 @@ -1327,12 +1375,14 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st 8 val_8 8 val_8 9 val_9 9 val_9 tst1 500 tst1 500 -PREHOOK: query: CREATE VIEW view15 AS +PREHOOK: query: -- test usage of GROUP BY within view +CREATE VIEW view15 AS SELECT key,COUNT(value) AS value_count FROM src GROUP BY key PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW view15 AS +POSTHOOK: query: -- test usage of GROUP BY within view +CREATE VIEW view15 AS SELECT key,COUNT(value) AS value_count FROM src GROUP BY key @@ -1414,11 +1464,13 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st 406 4 468 4 489 4 -PREHOOK: query: CREATE VIEW view16 AS +PREHOOK: query: -- test usage of DISTINCT within view +CREATE VIEW view16 AS SELECT DISTINCT value FROM src PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW view16 AS +POSTHOOK: query: -- test usage of DISTINCT within view +CREATE VIEW view16 AS SELECT DISTINCT value FROM src POSTHOOK: type: CREATEVIEW @@ -1493,11 +1545,13 @@ val_11 val_111 val_113 val_114 -PREHOOK: query: DROP TABLE IF EXISTS view16 +PREHOOK: query: -- HIVE-2133: DROP TABLE IF EXISTS should ignore a matching view name +DROP TABLE IF EXISTS view16 PREHOOK: type: DROPTABLE PREHOOK: Input: default@view16 PREHOOK: Output: default@view16 -POSTHOOK: query: DROP TABLE IF EXISTS view16 +POSTHOOK: query: -- HIVE-2133: DROP TABLE IF EXISTS should ignore a matching view name +DROP TABLE IF EXISTS view16 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@view16 POSTHOOK: Output: default@view16 @@ -1510,11 +1564,13 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st # col_name data_type comment value string None -PREHOOK: query: DROP VIEW IF EXISTS table1 +PREHOOK: query: -- Likewise, DROP VIEW IF EXISTS should ignore a matching table name +DROP VIEW IF EXISTS table1 PREHOOK: type: DROPVIEW PREHOOK: Input: default@table1 PREHOOK: Output: default@table1 -POSTHOOK: query: DROP VIEW IF EXISTS table1 +POSTHOOK: query: -- Likewise, DROP VIEW IF EXISTS should ignore a matching table name +DROP VIEW IF EXISTS table1 POSTHOOK: type: DROPVIEW POSTHOOK: Input: default@table1 POSTHOOK: Output: default@table1 @@ -1528,11 +1584,19 @@ POSTHOOK: Lineage: table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:st key int None value string None -PREHOOK: query: DROP VIEW view1 +PREHOOK: query: -- this should work since currently we don't track view->table +-- dependencies for implementing RESTRICT + + +DROP VIEW view1 PREHOOK: type: DROPVIEW PREHOOK: Input: default@view1 PREHOOK: Output: default@view1 -POSTHOOK: query: DROP VIEW view1 +POSTHOOK: query: -- this should work since currently we don't track view->table +-- dependencies for implementing RESTRICT + + +DROP VIEW view1 POSTHOOK: type: DROPVIEW POSTHOOK: Input: default@view1 POSTHOOK: Output: default@view1 diff --git ql/src/test/results/clientpositive/create_view_partitioned.q.out ql/src/test/results/clientpositive/create_view_partitioned.q.out index f40e5ba..4dca237 100644 --- ql/src/test/results/clientpositive/create_view_partitioned.q.out +++ ql/src/test/results/clientpositive/create_view_partitioned.q.out @@ -10,14 +10,18 @@ PREHOOK: query: DROP VIEW vp3 PREHOOK: type: DROPVIEW POSTHOOK: query: DROP VIEW vp3 POSTHOOK: type: DROPVIEW -PREHOOK: query: CREATE VIEW vp1 +PREHOOK: query: -- test partitioned view definition +-- (underlying table is not actually partitioned) +CREATE VIEW vp1 PARTITIONED ON (value) AS SELECT key, value FROM src WHERE key=86 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW vp1 +POSTHOOK: query: -- test partitioned view definition +-- (underlying table is not actually partitioned) +CREATE VIEW vp1 PARTITIONED ON (value) AS SELECT key, value @@ -127,13 +131,15 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@vp1 POSTHOOK: Output: default@vp1@value=val_86 POSTHOOK: Output: default@vp1@value=val_xyz -PREHOOK: query: ALTER VIEW vp1 +PREHOOK: query: -- should work since we use IF NOT EXISTS +ALTER VIEW vp1 ADD IF NOT EXISTS PARTITION (value='val_xyz') PREHOOK: type: ALTERTABLE_ADDPARTS PREHOOK: Input: default@src PREHOOK: Input: default@vp1 PREHOOK: Output: default@vp1@value=val_xyz -POSTHOOK: query: ALTER VIEW vp1 +POSTHOOK: query: -- should work since we use IF NOT EXISTS +ALTER VIEW vp1 ADD IF NOT EXISTS PARTITION (value='val_xyz') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Input: default@src @@ -186,11 +192,13 @@ DROP PARTITION (value='val_xyz') POSTHOOK: type: ALTERTABLE_DROPPARTS POSTHOOK: Input: default@vp1 POSTHOOK: Output: default@vp1@value=val_xyz -PREHOOK: query: ALTER VIEW vp1 +PREHOOK: query: -- should work since we use IF EXISTS +ALTER VIEW vp1 DROP IF EXISTS PARTITION (value='val_xyz') PREHOOK: type: ALTERTABLE_DROPPARTS PREHOOK: Input: default@vp1 -POSTHOOK: query: ALTER VIEW vp1 +POSTHOOK: query: -- should work since we use IF EXISTS +ALTER VIEW vp1 DROP IF EXISTS PARTITION (value='val_xyz') POSTHOOK: type: ALTERTABLE_DROPPARTS POSTHOOK: Input: default@vp1 @@ -199,22 +207,34 @@ PREHOOK: type: SHOWPARTITIONS POSTHOOK: query: SHOW PARTITIONS vp1 POSTHOOK: type: SHOWPARTITIONS value=val_86 -PREHOOK: query: SELECT * FROM vp1 +PREHOOK: query: -- Even though no partition predicate is specified in the next query, +-- the WHERE clause inside of the view should satisfy strict mode. +-- In other words, strict only applies to underlying tables +-- (regardless of whether or not the view is partitioned). +SELECT * FROM vp1 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@vp1 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM vp1 +POSTHOOK: query: -- Even though no partition predicate is specified in the next query, +-- the WHERE clause inside of the view should satisfy strict mode. +-- In other words, strict only applies to underlying tables +-- (regardless of whether or not the view is partitioned). +SELECT * FROM vp1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@vp1 #### A masked pattern was here #### 86 val_86 -PREHOOK: query: CREATE VIEW vp2 +PREHOOK: query: -- test a partitioned view on top of an underlying partitioned table, +-- but with only a suffix of the partitioning columns +CREATE VIEW vp2 PARTITIONED ON (hr) AS SELECT * FROM srcpart WHERE key < 10 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW vp2 +POSTHOOK: query: -- test a partitioned view on top of an underlying partitioned table, +-- but with only a suffix of the partitioning columns +CREATE VIEW vp2 PARTITIONED ON (hr) AS SELECT * FROM srcpart WHERE key < 10 POSTHOOK: type: CREATEVIEW @@ -307,14 +327,18 @@ POSTHOOK: Input: default@vp2 8 9 9 -PREHOOK: query: CREATE VIEW vp3(k,v) +PREHOOK: query: -- test a partitioned view where the PARTITIONED ON clause references +-- an imposed column name +CREATE VIEW vp3(k,v) PARTITIONED ON (v) AS SELECT key, value FROM src WHERE key=86 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW vp3(k,v) +POSTHOOK: query: -- test a partitioned view where the PARTITIONED ON clause references +-- an imposed column name +CREATE VIEW vp3(k,v) PARTITIONED ON (v) AS SELECT key, value diff --git ql/src/test/results/clientpositive/cross_join.q.out ql/src/test/results/clientpositive/cross_join.q.out index 89e098a..bc9b67e 100644 --- ql/src/test/results/clientpositive/cross_join.q.out +++ ql/src/test/results/clientpositive/cross_join.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: explain select src.key from src join src src2 +PREHOOK: query: -- current +explain select src.key from src join src src2 PREHOOK: type: QUERY -POSTHOOK: query: explain select src.key from src join src src2 +POSTHOOK: query: -- current +explain select src.key from src join src src2 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src) src2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))))) @@ -54,9 +56,11 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain select src.key from src cross join src src2 +PREHOOK: query: -- ansi cross join +explain select src.key from src cross join src src2 PREHOOK: type: QUERY -POSTHOOK: query: explain select src.key from src cross join src src2 +POSTHOOK: query: -- ansi cross join +explain select src.key from src cross join src src2 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_CROSSJOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src) src2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))))) @@ -110,9 +114,11 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain select src.key from src cross join src src2 on src.key=src2.key +PREHOOK: query: -- appending condition is allowed +explain select src.key from src cross join src src2 on src.key=src2.key PREHOOK: type: QUERY -POSTHOOK: query: explain select src.key from src cross join src src2 on src.key=src2.key +POSTHOOK: query: -- appending condition is allowed +explain select src.key from src cross join src src2 on src.key=src2.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_CROSSJOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src) src2) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))))) diff --git ql/src/test/results/clientpositive/ctas_hadoop20.q.out ql/src/test/results/clientpositive/ctas_hadoop20.q.out index 8f4ae38..ead9ea5 100644 --- ql/src/test/results/clientpositive/ctas_hadoop20.q.out +++ ql/src/test/results/clientpositive/ctas_hadoop20.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table nzhang_Tmp(a int, b string) +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table nzhang_Tmp(a int, b string) +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@nzhang_Tmp PREHOOK: query: select * from nzhang_Tmp diff --git ql/src/test/results/clientpositive/ctas_uses_database_location.q.out ql/src/test/results/clientpositive/ctas_uses_database_location.q.out index e23e651..fd7abc0 100644 --- ql/src/test/results/clientpositive/ctas_uses_database_location.q.out +++ ql/src/test/results/clientpositive/ctas_uses_database_location.q.out @@ -1,7 +1,15 @@ -PREHOOK: query: CREATE DATABASE db1 +PREHOOK: query: -- Tests that CTAS queries in non-default databases use the location of the database +-- not the hive.metastore.warehouse.dir for intermediate files (FileSinkOperator output). +-- If hive.metastore.warehouse.dir were used this would fail because the scheme is invalid. + +CREATE DATABASE db1 #### A masked pattern was here #### PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE db1 +POSTHOOK: query: -- Tests that CTAS queries in non-default databases use the location of the database +-- not the hive.metastore.warehouse.dir for intermediate files (FileSinkOperator output). +-- If hive.metastore.warehouse.dir were used this would fail because the scheme is invalid. + +CREATE DATABASE db1 #### A masked pattern was here #### POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE db1 diff --git ql/src/test/results/clientpositive/database.q.out ql/src/test/results/clientpositive/database.q.out index 4716cc9..8b92629 100644 --- ql/src/test/results/clientpositive/database.q.out +++ ql/src/test/results/clientpositive/database.q.out @@ -3,9 +3,11 @@ PREHOOK: type: SHOWDATABASES POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES default -PREHOOK: query: CREATE DATABASE test_db COMMENT 'Hive test database' +PREHOOK: query: -- CREATE with comment +CREATE DATABASE test_db COMMENT 'Hive test database' PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE test_db COMMENT 'Hive test database' +POSTHOOK: query: -- CREATE with comment +CREATE DATABASE test_db COMMENT 'Hive test database' POSTHOOK: type: CREATEDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES @@ -13,9 +15,11 @@ POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES default test_db -PREHOOK: query: CREATE DATABASE IF NOT EXISTS test_db +PREHOOK: query: -- CREATE INE already exists +CREATE DATABASE IF NOT EXISTS test_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE IF NOT EXISTS test_db +POSTHOOK: query: -- CREATE INE already exists +CREATE DATABASE IF NOT EXISTS test_db POSTHOOK: type: CREATEDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES @@ -23,24 +27,30 @@ POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES default test_db -PREHOOK: query: SHOW SCHEMAS +PREHOOK: query: -- SHOW DATABASES synonym +SHOW SCHEMAS PREHOOK: type: SHOWDATABASES -POSTHOOK: query: SHOW SCHEMAS +POSTHOOK: query: -- SHOW DATABASES synonym +SHOW SCHEMAS POSTHOOK: type: SHOWDATABASES default test_db -PREHOOK: query: DROP DATABASE test_db +PREHOOK: query: -- DROP +DROP DATABASE test_db PREHOOK: type: DROPDATABASE -POSTHOOK: query: DROP DATABASE test_db +POSTHOOK: query: -- DROP +DROP DATABASE test_db POSTHOOK: type: DROPDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES default -PREHOOK: query: CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database' +PREHOOK: query: -- CREATE INE doesn't exist +CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database' PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database' +POSTHOOK: query: -- CREATE INE doesn't exist +CREATE DATABASE IF NOT EXISTS test_db COMMENT 'Hive test database' POSTHOOK: type: CREATEDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES @@ -48,22 +58,28 @@ POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES default test_db -PREHOOK: query: DROP DATABASE IF EXISTS test_db +PREHOOK: query: -- DROP IE exists +DROP DATABASE IF EXISTS test_db PREHOOK: type: DROPDATABASE -POSTHOOK: query: DROP DATABASE IF EXISTS test_db +POSTHOOK: query: -- DROP IE exists +DROP DATABASE IF EXISTS test_db POSTHOOK: type: DROPDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES default -PREHOOK: query: DROP DATABASE IF EXISTS test_db +PREHOOK: query: -- DROP IE doesn't exist +DROP DATABASE IF EXISTS test_db PREHOOK: type: DROPDATABASE -POSTHOOK: query: DROP DATABASE IF EXISTS test_db +POSTHOOK: query: -- DROP IE doesn't exist +DROP DATABASE IF EXISTS test_db POSTHOOK: type: DROPDATABASE -PREHOOK: query: CREATE DATABASE test_db +PREHOOK: query: -- SHOW +CREATE DATABASE test_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE test_db +POSTHOOK: query: -- SHOW +CREATE DATABASE test_db POSTHOOK: type: CREATEDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES @@ -71,14 +87,18 @@ POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES default test_db -PREHOOK: query: SHOW DATABASES LIKE 'test*' +PREHOOK: query: -- SHOW pattern +SHOW DATABASES LIKE 'test*' PREHOOK: type: SHOWDATABASES -POSTHOOK: query: SHOW DATABASES LIKE 'test*' +POSTHOOK: query: -- SHOW pattern +SHOW DATABASES LIKE 'test*' POSTHOOK: type: SHOWDATABASES test_db -PREHOOK: query: SHOW DATABASES LIKE '*ef*' +PREHOOK: query: -- SHOW pattern +SHOW DATABASES LIKE '*ef*' PREHOOK: type: SHOWDATABASES -POSTHOOK: query: SHOW DATABASES LIKE '*ef*' +POSTHOOK: query: -- SHOW pattern +SHOW DATABASES LIKE '*ef*' POSTHOOK: type: SHOWDATABASES default PREHOOK: query: USE test_db @@ -91,9 +111,11 @@ POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES default test_db -PREHOOK: query: CREATE TABLE test_table (col1 STRING) STORED AS TEXTFILE +PREHOOK: query: -- CREATE table in non-default DB +CREATE TABLE test_table (col1 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table (col1 STRING) STORED AS TEXTFILE +POSTHOOK: query: -- CREATE table in non-default DB +CREATE TABLE test_table (col1 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: test_db@test_table PREHOOK: query: SHOW TABLES @@ -101,25 +123,31 @@ PREHOOK: type: SHOWTABLES POSTHOOK: query: SHOW TABLES POSTHOOK: type: SHOWTABLES test_table -PREHOOK: query: DESCRIBE test_table +PREHOOK: query: -- DESCRIBE table in non-default DB +DESCRIBE test_table PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE test_table +POSTHOOK: query: -- DESCRIBE table in non-default DB +DESCRIBE test_table POSTHOOK: type: DESCTABLE # col_name data_type comment col1 string None -PREHOOK: query: DESCRIBE EXTENDED test_table +PREHOOK: query: -- DESCRIBE EXTENDED in non-default DB +DESCRIBE EXTENDED test_table PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE EXTENDED test_table +POSTHOOK: query: -- DESCRIBE EXTENDED in non-default DB +DESCRIBE EXTENDED test_table POSTHOOK: type: DESCTABLE # col_name data_type comment col1 string None #### A masked pattern was here #### -PREHOOK: query: CREATE TABLE test_table_like LIKE test_table +PREHOOK: query: -- CREATE LIKE in non-default DB +CREATE TABLE test_table_like LIKE test_table PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table_like LIKE test_table +POSTHOOK: query: -- CREATE LIKE in non-default DB +CREATE TABLE test_table_like LIKE test_table POSTHOOK: type: CREATETABLE POSTHOOK: Output: test_db@test_table_like PREHOOK: query: SHOW TABLES @@ -137,11 +165,13 @@ POSTHOOK: type: DESCTABLE col1 string None #### A masked pattern was here #### -PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/test.dat' +PREHOOK: query: -- LOAD and SELECT +LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE test_table PREHOOK: type: LOAD PREHOOK: Output: test_db@test_table -POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/test.dat' +POSTHOOK: query: -- LOAD and SELECT +LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE test_table POSTHOOK: type: LOAD POSTHOOK: Output: test_db@test_table @@ -159,11 +189,13 @@ POSTHOOK: Input: test_db@test_table 4 5 6 -PREHOOK: query: DROP TABLE test_table +PREHOOK: query: -- DROP and CREATE w/o LOAD +DROP TABLE test_table PREHOOK: type: DROPTABLE PREHOOK: Input: test_db@test_table PREHOOK: Output: test_db@test_table -POSTHOOK: query: DROP TABLE test_table +POSTHOOK: query: -- DROP and CREATE w/o LOAD +DROP TABLE test_table POSTHOOK: type: DROPTABLE POSTHOOK: Input: test_db@test_table POSTHOOK: Output: test_db@test_table @@ -191,9 +223,11 @@ POSTHOOK: query: SELECT * FROM test_table POSTHOOK: type: QUERY POSTHOOK: Input: test_db@test_table #### A masked pattern was here #### -PREHOOK: query: USE test_db +PREHOOK: query: -- CREATE table that already exists in DEFAULT +USE test_db PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE test_db +POSTHOOK: query: -- CREATE table that already exists in DEFAULT +USE test_db POSTHOOK: type: SWITCHDATABASE PREHOOK: query: CREATE TABLE src (col1 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -237,9 +271,11 @@ POSTHOOK: Input: default@src 278 val_278 98 val_98 484 val_484 -PREHOOK: query: USE test_db +PREHOOK: query: -- DROP DATABASE +USE test_db PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE test_db +POSTHOOK: query: -- DROP DATABASE +USE test_db POSTHOOK: type: SWITCHDATABASE PREHOOK: query: DROP TABLE src PREHOOK: type: DROPTABLE @@ -282,9 +318,11 @@ PREHOOK: type: SHOWDATABASES POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES default -PREHOOK: query: CREATE DATABASE to_drop_db1 +PREHOOK: query: -- DROP EMPTY DATABASE CASCADE +CREATE DATABASE to_drop_db1 PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE to_drop_db1 +POSTHOOK: query: -- DROP EMPTY DATABASE CASCADE +CREATE DATABASE to_drop_db1 POSTHOOK: type: CREATEDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES @@ -305,9 +343,11 @@ PREHOOK: type: SHOWDATABASES POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES default -PREHOOK: query: CREATE DATABASE to_drop_db2 +PREHOOK: query: -- DROP NON-EMPTY DATABASE CASCADE +CREATE DATABASE to_drop_db2 PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE to_drop_db2 +POSTHOOK: query: -- DROP NON-EMPTY DATABASE CASCADE +CREATE DATABASE to_drop_db2 POSTHOOK: type: CREATEDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES @@ -354,9 +394,11 @@ POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] default -PREHOOK: query: CREATE DATABASE to_drop_db3 +PREHOOK: query: -- DROP NON-EMPTY DATABASE CASCADE IF EXISTS +CREATE DATABASE to_drop_db3 PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE to_drop_db3 +POSTHOOK: query: -- DROP NON-EMPTY DATABASE CASCADE IF EXISTS +CREATE DATABASE to_drop_db3 POSTHOOK: type: CREATEDATABASE POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] PREHOOK: query: SHOW DATABASES @@ -393,9 +435,11 @@ POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] default -PREHOOK: query: DROP DATABASE IF EXISTS non_exists_db3 CASCADE +PREHOOK: query: -- DROP NON-EXISTING DATABASE CASCADE IF EXISTS +DROP DATABASE IF EXISTS non_exists_db3 CASCADE PREHOOK: type: DROPDATABASE -POSTHOOK: query: DROP DATABASE IF EXISTS non_exists_db3 CASCADE +POSTHOOK: query: -- DROP NON-EXISTING DATABASE CASCADE IF EXISTS +DROP DATABASE IF EXISTS non_exists_db3 CASCADE POSTHOOK: type: DROPDATABASE POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] PREHOOK: query: SHOW DATABASES @@ -404,14 +448,18 @@ POSTHOOK: query: SHOW DATABASES POSTHOOK: type: SHOWDATABASES POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] default -PREHOOK: query: DROP DATABASE IF EXISTS non_exists_db3 RESTRICT +PREHOOK: query: -- DROP NON-EXISTING DATABASE RESTRICT IF EXISTS +DROP DATABASE IF EXISTS non_exists_db3 RESTRICT PREHOOK: type: DROPDATABASE -POSTHOOK: query: DROP DATABASE IF EXISTS non_exists_db3 RESTRICT +POSTHOOK: query: -- DROP NON-EXISTING DATABASE RESTRICT IF EXISTS +DROP DATABASE IF EXISTS non_exists_db3 RESTRICT POSTHOOK: type: DROPDATABASE POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] -PREHOOK: query: CREATE DATABASE to_drop_db4 +PREHOOK: query: -- DROP EMPTY DATABASE RESTRICT +CREATE DATABASE to_drop_db4 PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE to_drop_db4 +POSTHOOK: query: -- DROP EMPTY DATABASE RESTRICT +CREATE DATABASE to_drop_db4 POSTHOOK: type: CREATEDATABASE POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] PREHOOK: query: SHOW DATABASES @@ -433,13 +481,13 @@ POSTHOOK: type: SHOWDATABASES POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] default PREHOOK: query: -- - +-- Canonical Name Tests -- CREATE DATABASE db1 PREHOOK: type: CREATEDATABASE POSTHOOK: query: -- - +-- Canonical Name Tests -- CREATE DATABASE db1 @@ -450,28 +498,34 @@ PREHOOK: type: CREATEDATABASE POSTHOOK: query: CREATE DATABASE db2 POSTHOOK: type: CREATEDATABASE POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] -PREHOOK: query: CREATE TABLE db1.src(key STRING, value STRING) +PREHOOK: query: -- CREATE foreign table +CREATE TABLE db1.src(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE db1.src(key STRING, value STRING) +POSTHOOK: query: -- CREATE foreign table +CREATE TABLE db1.src(key STRING, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: db1@src POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] -PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv1.txt' +PREHOOK: query: -- LOAD into foreign table +LOAD DATA LOCAL INPATH '../data/files/kv1.txt' OVERWRITE INTO TABLE db1.src PREHOOK: type: LOAD PREHOOK: Output: db1@src -POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv1.txt' +POSTHOOK: query: -- LOAD into foreign table +LOAD DATA LOCAL INPATH '../data/files/kv1.txt' OVERWRITE INTO TABLE db1.src POSTHOOK: type: LOAD POSTHOOK: Output: db1@src POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] -PREHOOK: query: SELECT * FROM db1.src +PREHOOK: query: -- SELECT from foreign table +SELECT * FROM db1.src PREHOOK: type: QUERY PREHOOK: Input: db1@src #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM db1.src +POSTHOOK: query: -- SELECT from foreign table +SELECT * FROM db1.src POSTHOOK: type: QUERY POSTHOOK: Input: db1@src #### A masked pattern was here #### @@ -976,35 +1030,41 @@ POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] 400 val_400 200 val_200 97 val_97 -PREHOOK: query: CREATE TABLE db1.srcpart(key STRING, value STRING) +PREHOOK: query: -- CREATE Partitioned foreign table +CREATE TABLE db1.srcpart(key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE db1.srcpart(key STRING, value STRING) +POSTHOOK: query: -- CREATE Partitioned foreign table +CREATE TABLE db1.srcpart(key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: db1@srcpart POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] -PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv1.txt' +PREHOOK: query: -- LOAD data into Partitioned foreign table +LOAD DATA LOCAL INPATH '../data/files/kv1.txt' OVERWRITE INTO TABLE db1.srcpart PARTITION (ds='2008-04-08', hr='11') PREHOOK: type: LOAD PREHOOK: Output: db1@srcpart -POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv1.txt' +POSTHOOK: query: -- LOAD data into Partitioned foreign table +LOAD DATA LOCAL INPATH '../data/files/kv1.txt' OVERWRITE INTO TABLE db1.srcpart PARTITION (ds='2008-04-08', hr='11') POSTHOOK: type: LOAD POSTHOOK: Output: db1@srcpart POSTHOOK: Output: db1@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] -PREHOOK: query: SELECT key, value FROM db1.srcpart +PREHOOK: query: -- SELECT from Partitioned foreign table +SELECT key, value FROM db1.srcpart WHERE key < 100 AND ds='2008-04-08' AND hr='11' PREHOOK: type: QUERY PREHOOK: Input: db1@srcpart PREHOOK: Input: db1@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: SELECT key, value FROM db1.srcpart +POSTHOOK: query: -- SELECT from Partitioned foreign table +SELECT key, value FROM db1.srcpart WHERE key < 100 AND ds='2008-04-08' AND hr='11' POSTHOOK: type: QUERY POSTHOOK: Input: db1@srcpart @@ -1095,9 +1155,11 @@ POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] 37 val_37 90 val_90 97 val_97 -PREHOOK: query: USE db2 +PREHOOK: query: -- SELECT JOINed product of two foreign tables +USE db2 PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE db2 +POSTHOOK: query: -- SELECT JOINed product of two foreign tables +USE db2 POSTHOOK: type: SWITCHDATABASE POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] PREHOOK: query: SELECT a.* FROM db1.src a JOIN default.src1 b @@ -1150,27 +1212,32 @@ POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] 66 val_66 98 val_98 98 val_98 -PREHOOK: query: CREATE TABLE conflict_name AS +PREHOOK: query: -- CREATE TABLE AS SELECT from foreign table +CREATE TABLE conflict_name AS SELECT value FROM default.src WHERE key = 66 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src -POSTHOOK: query: CREATE TABLE conflict_name AS +POSTHOOK: query: -- CREATE TABLE AS SELECT from foreign table +CREATE TABLE conflict_name AS SELECT value FROM default.src WHERE key = 66 POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: db2@conflict_name POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] -PREHOOK: query: CREATE TABLE db1.conflict_name AS +PREHOOK: query: -- CREATE foreign table +CREATE TABLE db1.conflict_name AS SELECT value FROM db1.src WHERE key = 8 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: db1@src -POSTHOOK: query: CREATE TABLE db1.conflict_name AS +POSTHOOK: query: -- CREATE foreign table +CREATE TABLE db1.conflict_name AS SELECT value FROM db1.src WHERE key = 8 POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: db1@src POSTHOOK: Output: db1@conflict_name POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] -PREHOOK: query: SELECT * FROM ( +PREHOOK: query: -- query tables with the same names in different DBs +SELECT * FROM ( SELECT value FROM db1.conflict_name UNION ALL SELECT value FROM conflict_name @@ -1179,7 +1246,8 @@ PREHOOK: type: QUERY PREHOOK: Input: db1@conflict_name PREHOOK: Input: db2@conflict_name #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM ( +POSTHOOK: query: -- query tables with the same names in different DBs +SELECT * FROM ( SELECT value FROM db1.conflict_name UNION ALL SELECT value FROM conflict_name @@ -1217,10 +1285,12 @@ POSTHOOK: Input: db2@conflict_name POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] val_66 val_8 -PREHOOK: query: CREATE TABLE bucketized_src (key INT, value STRING) +PREHOOK: query: -- TABLESAMPLES +CREATE TABLE bucketized_src (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE bucketized_src (key INT, value STRING) +POSTHOOK: query: -- TABLESAMPLES +CREATE TABLE bucketized_src (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bucketized_src @@ -1250,9 +1320,11 @@ POSTHOOK: Lineage: bucketized_src.key EXPRESSION [(src)src.FieldSchema(name:key, POSTHOOK: Lineage: bucketized_src.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: temp_tbl2.c EXPRESSION [(temp_tbl)temp_tbl.null, ] 66 -PREHOOK: query: CREATE TABLE db2.src1 LIKE default.src +PREHOOK: query: -- CREATE TABLE LIKE +CREATE TABLE db2.src1 LIKE default.src PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE db2.src1 LIKE default.src +POSTHOOK: query: -- CREATE TABLE LIKE +CREATE TABLE db2.src1 LIKE default.src POSTHOOK: type: CREATETABLE POSTHOOK: Output: db2@src1 POSTHOOK: Lineage: bucketized_src.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -1278,11 +1350,13 @@ key string default value string default #### A masked pattern was here #### -PREHOOK: query: SELECT key FROM `default`.src ORDER BY key LIMIT 1 +PREHOOK: query: -- character escaping +SELECT key FROM `default`.src ORDER BY key LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM `default`.src ORDER BY key LIMIT 1 +POSTHOOK: query: -- character escaping +SELECT key FROM `default`.src ORDER BY key LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/database_drop.q.out ql/src/test/results/clientpositive/database_drop.q.out index 2688be4..6c4440f 100644 --- ql/src/test/results/clientpositive/database_drop.q.out +++ ql/src/test/results/clientpositive/database_drop.q.out @@ -1,6 +1,18 @@ -PREHOOK: query: CREATE DATABASE db5 +PREHOOK: query: -- create database with multiple tables, indexes and views. +-- Use both partitioned and non-partitioned tables, as well as +-- tables and indexes with specific storage locations +-- verify the drop the database with cascade works and that the directories +-- outside the database's default storage are removed as part of the drop + +CREATE DATABASE db5 PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE db5 +POSTHOOK: query: -- create database with multiple tables, indexes and views. +-- Use both partitioned and non-partitioned tables, as well as +-- tables and indexes with specific storage locations +-- verify the drop the database with cascade works and that the directories +-- outside the database's default storage are removed as part of the drop + +CREATE DATABASE db5 POSTHOOK: type: CREATEDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES @@ -13,9 +25,11 @@ PREHOOK: type: SWITCHDATABASE POSTHOOK: query: USE db5 POSTHOOK: type: SWITCHDATABASE #### A masked pattern was here #### -PREHOOK: query: CREATE TABLE temp_tbl (id INT, name STRING) +PREHOOK: query: -- add a table, index and view +CREATE TABLE temp_tbl (id INT, name STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE temp_tbl (id INT, name STRING) +POSTHOOK: query: -- add a table, index and view +CREATE TABLE temp_tbl (id INT, name STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: db5@temp_tbl PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE temp_tbl @@ -45,8 +59,10 @@ POSTHOOK: Output: db5@db5__temp_tbl_idx1__ POSTHOOK: Lineage: db5__temp_tbl_idx1__._bucketname SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__._offsets EXPRESSION [(temp_tbl)temp_tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__.id SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:id, type:int, comment:null), ] +PREHOOK: query: -- add a table, index and view with a different storage location #### A masked pattern was here #### PREHOOK: type: CREATETABLE +POSTHOOK: query: -- add a table, index and view with a different storage location #### A masked pattern was here #### POSTHOOK: type: CREATETABLE POSTHOOK: Output: db5@temp_tbl2 @@ -92,9 +108,11 @@ POSTHOOK: Lineage: db5__temp_tbl2_idx2__.id SIMPLE [(temp_tbl2)temp_tbl2.FieldSc POSTHOOK: Lineage: db5__temp_tbl_idx1__._bucketname SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__._offsets EXPRESSION [(temp_tbl)temp_tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__.id SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:id, type:int, comment:null), ] -PREHOOK: query: CREATE TABLE part_tab (id INT, name STRING) PARTITIONED BY (ds string) +PREHOOK: query: -- add a partitioned table, index and view +CREATE TABLE part_tab (id INT, name STRING) PARTITIONED BY (ds string) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part_tab (id INT, name STRING) PARTITIONED BY (ds string) +POSTHOOK: query: -- add a partitioned table, index and view +CREATE TABLE part_tab (id INT, name STRING) PARTITIONED BY (ds string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: db5@part_tab POSTHOOK: Lineage: db5__temp_tbl2_idx2__._bucketname SIMPLE [(temp_tbl2)temp_tbl2.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] @@ -181,10 +199,12 @@ POSTHOOK: Lineage: db5__temp_tbl2_idx2__.id SIMPLE [(temp_tbl2)temp_tbl2.FieldSc POSTHOOK: Lineage: db5__temp_tbl_idx1__._bucketname SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__._offsets EXPRESSION [(temp_tbl)temp_tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__.id SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:id, type:int, comment:null), ] -PREHOOK: query: CREATE TABLE part_tab2 (id INT, name STRING) PARTITIONED BY (ds string) +PREHOOK: query: -- add a partitioned table, index and view with a different storage location +CREATE TABLE part_tab2 (id INT, name STRING) PARTITIONED BY (ds string) #### A masked pattern was here #### PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part_tab2 (id INT, name STRING) PARTITIONED BY (ds string) +POSTHOOK: query: -- add a partitioned table, index and view with a different storage location +CREATE TABLE part_tab2 (id INT, name STRING) PARTITIONED BY (ds string) #### A masked pattern was here #### POSTHOOK: type: CREATETABLE POSTHOOK: Output: db5@part_tab2 @@ -310,10 +330,12 @@ POSTHOOK: Lineage: db5__temp_tbl2_idx2__.id SIMPLE [(temp_tbl2)temp_tbl2.FieldSc POSTHOOK: Lineage: db5__temp_tbl_idx1__._bucketname SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__._offsets EXPRESSION [(temp_tbl)temp_tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__.id SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:id, type:int, comment:null), ] -PREHOOK: query: CREATE TABLE part_tab3 (id INT, name STRING) PARTITIONED BY (ds string) +PREHOOK: query: -- add a partitioned table, index and view with a different storage location +CREATE TABLE part_tab3 (id INT, name STRING) PARTITIONED BY (ds string) #### A masked pattern was here #### PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part_tab3 (id INT, name STRING) PARTITIONED BY (ds string) +POSTHOOK: query: -- add a partitioned table, index and view with a different storage location +CREATE TABLE part_tab3 (id INT, name STRING) PARTITIONED BY (ds string) #### A masked pattern was here #### POSTHOOK: type: CREATETABLE POSTHOOK: Output: db5@part_tab3 @@ -500,13 +522,15 @@ POSTHOOK: Lineage: db5__temp_tbl2_idx2__.id SIMPLE [(temp_tbl2)temp_tbl2.FieldSc POSTHOOK: Lineage: db5__temp_tbl_idx1__._bucketname SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__._offsets EXPRESSION [(temp_tbl)temp_tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__.id SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:id, type:int, comment:null), ] -PREHOOK: query: CREATE EXTERNAL TABLE extab1(id INT, name STRING) ROW FORMAT +PREHOOK: query: -- add an external table +CREATE EXTERNAL TABLE extab1(id INT, name STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '' LINES TERMINATED BY '\n' STORED AS TEXTFILE #### A masked pattern was here #### PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE EXTERNAL TABLE extab1(id INT, name STRING) ROW FORMAT +POSTHOOK: query: -- add an external table +CREATE EXTERNAL TABLE extab1(id INT, name STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '' LINES TERMINATED BY '\n' STORED AS TEXTFILE @@ -537,9 +561,11 @@ POSTHOOK: Lineage: db5__temp_tbl2_idx2__.id SIMPLE [(temp_tbl2)temp_tbl2.FieldSc POSTHOOK: Lineage: db5__temp_tbl_idx1__._bucketname SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__._offsets EXPRESSION [(temp_tbl)temp_tbl.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: db5__temp_tbl_idx1__.id SIMPLE [(temp_tbl)temp_tbl.FieldSchema(name:id, type:int, comment:null), ] -PREHOOK: query: DROP DATABASE db5 CASCADE +PREHOOK: query: -- drop the database with cascade +DROP DATABASE db5 CASCADE PREHOOK: type: DROPDATABASE -POSTHOOK: query: DROP DATABASE db5 CASCADE +POSTHOOK: query: -- drop the database with cascade +DROP DATABASE db5 CASCADE POSTHOOK: type: DROPDATABASE POSTHOOK: Lineage: db5__part_tab2_idx4__ PARTITION(ds=2008-04-09)._bucketname SIMPLE [(part_tab2)part_tab2.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: db5__part_tab2_idx4__ PARTITION(ds=2008-04-09)._offsets EXPRESSION [(part_tab2)part_tab2.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] diff --git ql/src/test/results/clientpositive/decimal_udf.q.out ql/src/test/results/clientpositive/decimal_udf.q.out index 6a9ed55..943fc34 100644 --- ql/src/test/results/clientpositive/decimal_udf.q.out +++ ql/src/test/results/clientpositive/decimal_udf.q.out @@ -19,9 +19,11 @@ PREHOOK: Output: default@decimal_udf POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_UDF POSTHOOK: type: LOAD POSTHOOK: Output: default@decimal_udf -PREHOOK: query: EXPLAIN SELECT key + key FROM DECIMAL_UDF +PREHOOK: query: -- addition +EXPLAIN SELECT key + key FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key + key FROM DECIMAL_UDF +POSTHOOK: query: -- addition +EXPLAIN SELECT key + key FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))) @@ -343,9 +345,11 @@ POSTHOOK: Input: default@decimal_udf 1.9999999999999999999999999 -1234567889.123456789 1234567891.12345678 -PREHOOK: query: EXPLAIN SELECT key - key FROM DECIMAL_UDF +PREHOOK: query: -- substraction +EXPLAIN SELECT key - key FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key - key FROM DECIMAL_UDF +POSTHOOK: query: -- substraction +EXPLAIN SELECT key - key FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))) @@ -667,9 +671,11 @@ POSTHOOK: Input: default@decimal_udf -1E-25 -1234567891.123456789 1234567889.12345678 -PREHOOK: query: EXPLAIN SELECT key * key FROM DECIMAL_UDF +PREHOOK: query: -- multiplication +EXPLAIN SELECT key * key FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key * key FROM DECIMAL_UDF +POSTHOOK: query: -- multiplication +EXPLAIN SELECT key * key FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))) @@ -991,9 +997,11 @@ POSTHOOK: Input: default@decimal_udf 1.9999999999999999999999998 -2469135780.246913578 2469135780.24691356 -PREHOOK: query: EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 +PREHOOK: query: -- division +EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 +POSTHOOK: query: -- division +EXPLAIN SELECT key / 0 FROM DECIMAL_UDF limit 1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL key) 0))) (TOK_LIMIT 1))) @@ -1387,9 +1395,11 @@ POSTHOOK: Input: default@decimal_udf 0.49999999999999999999999995 -617283945.0617283945 617283945.06172839 -PREHOOK: query: EXPLAIN SELECT abs(key) FROM DECIMAL_UDF +PREHOOK: query: -- abs +EXPLAIN SELECT abs(key) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT abs(key) FROM DECIMAL_UDF +POSTHOOK: query: -- abs +EXPLAIN SELECT abs(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION abs (TOK_TABLE_OR_COL key)))))) @@ -1468,9 +1478,11 @@ POSTHOOK: Input: default@decimal_udf 0.9999999999999999999999999 1234567890.123456789 1234567890.12345678 -PREHOOK: query: EXPLAIN SELECT value, sum(key) / count(key), avg(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value +PREHOOK: query: -- avg +EXPLAIN SELECT value, sum(key) / count(key), avg(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT value, sum(key) / count(key), avg(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value +POSTHOOK: query: -- avg +EXPLAIN SELECT value, sum(key) / count(key), avg(key) FROM DECIMAL_UDF GROUP BY value ORDER BY value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (/ (TOK_FUNCTION sum (TOK_TABLE_OR_COL key)) (TOK_FUNCTION count (TOK_TABLE_OR_COL key)))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) @@ -1605,9 +1617,11 @@ POSTHOOK: Input: default@decimal_udf 200 2E+2 2E+2 4400 -4.4E+3 -4.4E+3 1234567890 1234567890.12345678 1234567890.12345678 -PREHOOK: query: EXPLAIN SELECT -key FROM DECIMAL_UDF +PREHOOK: query: -- negative +EXPLAIN SELECT -key FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT -key FROM DECIMAL_UDF +POSTHOOK: query: -- negative +EXPLAIN SELECT -key FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (- (TOK_TABLE_OR_COL key)))))) @@ -1686,9 +1700,11 @@ POSTHOOK: Input: default@decimal_udf -0.9999999999999999999999999 1234567890.123456789 -1234567890.12345678 -PREHOOK: query: EXPLAIN SELECT +key FROM DECIMAL_UDF +PREHOOK: query: -- positive +EXPLAIN SELECT +key FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +key FROM DECIMAL_UDF +POSTHOOK: query: -- positive +EXPLAIN SELECT +key FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key)))))) @@ -1767,9 +1783,11 @@ POSTHOOK: Input: default@decimal_udf 0.9999999999999999999999999 -1234567890.123456789 1234567890.12345678 -PREHOOK: query: EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF +PREHOOK: query: -- ceiling +EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF +POSTHOOK: query: -- ceiling +EXPlAIN SELECT CEIL(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION CEIL (TOK_TABLE_OR_COL key)))))) @@ -1848,9 +1866,11 @@ POSTHOOK: Input: default@decimal_udf 1 -1.23456789E+9 1234567891 -PREHOOK: query: EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF +PREHOOK: query: -- floor +EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF +POSTHOOK: query: -- floor +EXPLAIN SELECT FLOOR(key) FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION FLOOR (TOK_TABLE_OR_COL key)))))) @@ -1929,9 +1949,11 @@ POSTHOOK: Input: default@decimal_udf 0 -1234567891 1.23456789E+9 -PREHOOK: query: EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF +PREHOOK: query: -- round +EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF +POSTHOOK: query: -- round +EXPLAIN SELECT ROUND(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION ROUND (TOK_TABLE_OR_COL key) 2))))) @@ -2010,9 +2032,11 @@ POSTHOOK: Input: default@decimal_udf 1 -1234567890.12 1234567890.12 -PREHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +PREHOOK: query: -- power +EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF +POSTHOOK: query: -- power +EXPLAIN SELECT POWER(key, 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION POWER (TOK_TABLE_OR_COL key) 2))))) @@ -2091,9 +2115,11 @@ POSTHOOK: Input: default@decimal_udf 0.99999999999999999999999980000000000000000000000001 1524157875323883675.019051998750190521 1524157875323883652.7968299765279684 -PREHOOK: query: EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF +PREHOOK: query: -- modulo +EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF +POSTHOOK: query: -- modulo +EXPLAIN SELECT (key + 1) % (key / 2) FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (% (+ (TOK_TABLE_OR_COL key) 1) (/ (TOK_TABLE_OR_COL key) 2)))))) @@ -2172,9 +2198,11 @@ NULL 1E-25 -617283944.0617283945 1 -PREHOOK: query: EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value +PREHOOK: query: -- stddev, var +EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value +POSTHOOK: query: -- stddev, var +EXPLAIN SELECT value, stddev(key), variance(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION stddev (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION variance (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) @@ -2278,9 +2306,11 @@ POSTHOOK: Input: default@decimal_udf 200 0.0 0.0 4400 0.0 0.0 1234567890 0.0 0.0 -PREHOOK: query: EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value +PREHOOK: query: -- stddev_samp, var_samp +EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value +POSTHOOK: query: -- stddev_samp, var_samp +EXPLAIN SELECT value, stddev_samp(key), var_samp(key) FROM DECIMAL_UDF GROUP BY value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION stddev_samp (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION var_samp (TOK_TABLE_OR_COL key)))) (TOK_GROUPBY (TOK_TABLE_OR_COL value)))) @@ -2384,9 +2414,11 @@ POSTHOOK: Input: default@decimal_udf 200 0.0 0.0 4400 0.0 0.0 1234567890 0.0 0.0 -PREHOOK: query: EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF +PREHOOK: query: -- histogram +EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF +POSTHOOK: query: -- histogram +EXPLAIN SELECT histogram_numeric(key, 3) FROM DECIMAL_UDF POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME DECIMAL_UDF))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION histogram_numeric (TOK_TABLE_OR_COL key) 3))))) diff --git ql/src/test/results/clientpositive/describe_comment_indent.q.out ql/src/test/results/clientpositive/describe_comment_indent.q.out index 713eb63..9379f85 100644 --- ql/src/test/results/clientpositive/describe_comment_indent.q.out +++ ql/src/test/results/clientpositive/describe_comment_indent.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: CREATE TABLE test_table( +PREHOOK: query: -- test comment indent processing for multi-line comments + +CREATE TABLE test_table( col1 INT COMMENT 'col1 one line comment', col2 STRING COMMENT 'col2 two lines comment', @@ -8,7 +10,9 @@ comment') COMMENT 'table comment two lines' PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table( +POSTHOOK: query: -- test comment indent processing for multi-line comments + +CREATE TABLE test_table( col1 INT COMMENT 'col1 one line comment', col2 STRING COMMENT 'col2 two lines comment', diff --git ql/src/test/results/clientpositive/describe_pretty.q.out ql/src/test/results/clientpositive/describe_pretty.q.out index 49c4fbc..d94dfe5 100644 --- ql/src/test/results/clientpositive/describe_pretty.q.out +++ ql/src/test/results/clientpositive/describe_pretty.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: CREATE TABLE test_table( +PREHOOK: query: -- test comment indent processing for multi-line comments + +CREATE TABLE test_table( col1 INT COMMENT 'col1 one line comment', col2 STRING COMMENT 'col2 two lines comment', @@ -14,7 +16,9 @@ adipiscing tortor. Integer venenatis', COMMENT 'table comment two lines' PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table( +POSTHOOK: query: -- test comment indent processing for multi-line comments + +CREATE TABLE test_table( col1 INT COMMENT 'col1 one line comment', col2 STRING COMMENT 'col2 two lines comment', @@ -31,9 +35,37 @@ COMMENT 'table comment two lines' POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table -PREHOOK: query: DESCRIBE PRETTY test_table +PREHOOK: query: -- There will be an extra tab at the end of each comment line in the output. +-- This is because DESCRIBE command separates the column, type and +-- comment field using a \t. DESCRIBE PRETTY uses spaces instead +-- of \t to separate columns. Hive gets confused when it parses the string +-- table description constructed in MetaDataPrettyFormatUtils, and adds a tab +-- at the end of each line. +-- There are three ways to address this: +-- 1. Pad each row to the full terminal width with extra spaces. +-- 2. Assume a maximum tab width of 8, and subtract 2 * 8 spaces from the +-- available line width. This approach wastes upto 2 * 8 - 2 columns. +-- 3. Since the pretty output is meant only for human consumption, do nothing. +-- Just add a comment to the unit test file explaining what is happening. +-- This is the approach chosen. + +DESCRIBE PRETTY test_table PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE PRETTY test_table +POSTHOOK: query: -- There will be an extra tab at the end of each comment line in the output. +-- This is because DESCRIBE command separates the column, type and +-- comment field using a \t. DESCRIBE PRETTY uses spaces instead +-- of \t to separate columns. Hive gets confused when it parses the string +-- table description constructed in MetaDataPrettyFormatUtils, and adds a tab +-- at the end of each line. +-- There are three ways to address this: +-- 1. Pad each row to the full terminal width with extra spaces. +-- 2. Assume a maximum tab width of 8, and subtract 2 * 8 spaces from the +-- available line width. This approach wastes upto 2 * 8 - 2 columns. +-- 3. Since the pretty output is meant only for human consumption, do nothing. +-- Just add a comment to the unit test file explaining what is happening. +-- This is the approach chosen. + +DESCRIBE PRETTY test_table POSTHOOK: type: DESCTABLE col_name data_type comment diff --git ql/src/test/results/clientpositive/describe_syntax.q.out ql/src/test/results/clientpositive/describe_syntax.q.out index 09859b1..fefd983 100644 --- ql/src/test/results/clientpositive/describe_syntax.q.out +++ ql/src/test/results/clientpositive/describe_syntax.q.out @@ -32,9 +32,11 @@ POSTHOOK: query: ALTER TABLE t1 ADD PARTITION (ds='4', part='5') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Input: db1@t1 POSTHOOK: Output: db1@t1@ds=4/part=5 -PREHOOK: query: DESCRIBE t1 +PREHOOK: query: -- describe table +DESCRIBE t1 PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE t1 +POSTHOOK: query: -- describe table +DESCRIBE t1 POSTHOOK: type: DESCTABLE # col_name data_type comment @@ -101,9 +103,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: DESCRIBE db1.t1 +PREHOOK: query: -- describe database.table +DESCRIBE db1.t1 PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE db1.t1 +POSTHOOK: query: -- describe database.table +DESCRIBE db1.t1 POSTHOOK: type: DESCTABLE # col_name data_type comment @@ -170,9 +174,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: DESCRIBE t1 key1 +PREHOOK: query: -- describe table column +DESCRIBE t1 key1 PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE t1 key1 +POSTHOOK: query: -- describe table column +DESCRIBE t1 key1 POSTHOOK: type: DESCTABLE # col_name data_type comment @@ -191,9 +197,11 @@ POSTHOOK: type: DESCTABLE # col_name data_type comment key1 int from deserializer -PREHOOK: query: DESCRIBE db1.t1 key1 +PREHOOK: query: -- describe database.tabe column +DESCRIBE db1.t1 key1 PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE db1.t1 key1 +POSTHOOK: query: -- describe database.tabe column +DESCRIBE db1.t1 key1 POSTHOOK: type: DESCTABLE # col_name data_type comment @@ -212,9 +220,15 @@ POSTHOOK: type: DESCTABLE # col_name data_type comment key1 int from deserializer -PREHOOK: query: DESCRIBE t1.key1 +PREHOOK: query: -- describe table.column +-- after first checking t1.key1 for database.table not valid +-- fall back to the old syntax table.column +DESCRIBE t1.key1 PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE t1.key1 +POSTHOOK: query: -- describe table.column +-- after first checking t1.key1 for database.table not valid +-- fall back to the old syntax table.column +DESCRIBE t1.key1 POSTHOOK: type: DESCTABLE # col_name data_type comment @@ -233,9 +247,11 @@ POSTHOOK: type: DESCTABLE # col_name data_type comment key1 int from deserializer -PREHOOK: query: DESCRIBE t1 PARTITION(ds='4', part='5') +PREHOOK: query: -- describe table partition +DESCRIBE t1 PARTITION(ds='4', part='5') PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE t1 PARTITION(ds='4', part='5') +POSTHOOK: query: -- describe table partition +DESCRIBE t1 PARTITION(ds='4', part='5') POSTHOOK: type: DESCTABLE # col_name data_type comment @@ -302,9 +318,11 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: DESCRIBE db1.t1 PARTITION(ds='4', part='5') +PREHOOK: query: -- describe database.table partition +DESCRIBE db1.t1 PARTITION(ds='4', part='5') PREHOOK: type: DESCTABLE -POSTHOOK: query: DESCRIBE db1.t1 PARTITION(ds='4', part='5') +POSTHOOK: query: -- describe database.table partition +DESCRIBE db1.t1 PARTITION(ds='4', part='5') POSTHOOK: type: DESCTABLE # col_name data_type comment diff --git ql/src/test/results/clientpositive/describe_xpath.q.out ql/src/test/results/clientpositive/describe_xpath.q.out index b085df9..282d325 100644 --- ql/src/test/results/clientpositive/describe_xpath.q.out +++ ql/src/test/results/clientpositive/describe_xpath.q.out @@ -1,43 +1,55 @@ -PREHOOK: query: describe src_thrift.lint +PREHOOK: query: -- Describe a list structure in a thrift table +describe src_thrift.lint PREHOOK: type: DESCTABLE -POSTHOOK: query: describe src_thrift.lint +POSTHOOK: query: -- Describe a list structure in a thrift table +describe src_thrift.lint POSTHOOK: type: DESCTABLE # col_name data_type comment lint array from deserializer -PREHOOK: query: describe src_thrift.lint.$elem$ +PREHOOK: query: -- Describe the element of a list +describe src_thrift.lint.$elem$ PREHOOK: type: DESCTABLE -POSTHOOK: query: describe src_thrift.lint.$elem$ +POSTHOOK: query: -- Describe the element of a list +describe src_thrift.lint.$elem$ POSTHOOK: type: DESCTABLE # col_name data_type comment $elem$ int from deserializer -PREHOOK: query: describe src_thrift.mStringString.$key$ +PREHOOK: query: -- Describe the key of a map +describe src_thrift.mStringString.$key$ PREHOOK: type: DESCTABLE -POSTHOOK: query: describe src_thrift.mStringString.$key$ +POSTHOOK: query: -- Describe the key of a map +describe src_thrift.mStringString.$key$ POSTHOOK: type: DESCTABLE # col_name data_type comment $key$ string from deserializer -PREHOOK: query: describe src_thrift.mStringString.$value$ +PREHOOK: query: -- Describe the value of a map +describe src_thrift.mStringString.$value$ PREHOOK: type: DESCTABLE -POSTHOOK: query: describe src_thrift.mStringString.$value$ +POSTHOOK: query: -- Describe the value of a map +describe src_thrift.mStringString.$value$ POSTHOOK: type: DESCTABLE # col_name data_type comment $value$ string from deserializer -PREHOOK: query: describe src_thrift.lintString.$elem$ +PREHOOK: query: -- Describe a complex element of a list +describe src_thrift.lintString.$elem$ PREHOOK: type: DESCTABLE -POSTHOOK: query: describe src_thrift.lintString.$elem$ +POSTHOOK: query: -- Describe a complex element of a list +describe src_thrift.lintString.$elem$ POSTHOOK: type: DESCTABLE # col_name data_type comment myint int from deserializer mystring string from deserializer underscore_int int from deserializer -PREHOOK: query: describe src_thrift.lintString.$elem$.myint +PREHOOK: query: -- Describe a member of an element of a list +describe src_thrift.lintString.$elem$.myint PREHOOK: type: DESCTABLE -POSTHOOK: query: describe src_thrift.lintString.$elem$.myint +POSTHOOK: query: -- Describe a member of an element of a list +describe src_thrift.lintString.$elem$.myint POSTHOOK: type: DESCTABLE # col_name data_type comment diff --git ql/src/test/results/clientpositive/diff_part_input_formats.q.out ql/src/test/results/clientpositive/diff_part_input_formats.q.out index 3ddcca1..474023b 100644 --- ql/src/test/results/clientpositive/diff_part_input_formats.q.out +++ ql/src/test/results/clientpositive/diff_part_input_formats.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE part_test (key STRING, value STRING) PARTITIONED BY (ds STRING) STORED AS SEQUENCEFILE +PREHOOK: query: -- Tests the case where a table is changed from sequence file to a RC file, +-- resulting in partitions in both file formats. If no valid partitions are +-- selected, then it should still use RC file for reading the dummy partition. +CREATE TABLE part_test (key STRING, value STRING) PARTITIONED BY (ds STRING) STORED AS SEQUENCEFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part_test (key STRING, value STRING) PARTITIONED BY (ds STRING) STORED AS SEQUENCEFILE +POSTHOOK: query: -- Tests the case where a table is changed from sequence file to a RC file, +-- resulting in partitions in both file formats. If no valid partitions are +-- selected, then it should still use RC file for reading the dummy partition. +CREATE TABLE part_test (key STRING, value STRING) PARTITIONED BY (ds STRING) STORED AS SEQUENCEFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@part_test PREHOOK: query: ALTER TABLE part_test ADD PARTITION(ds='1') diff --git ql/src/test/results/clientpositive/driverhook.q.out ql/src/test/results/clientpositive/driverhook.q.out index 66793c3..cf22a22 100644 --- ql/src/test/results/clientpositive/driverhook.q.out +++ ql/src/test/results/clientpositive/driverhook.q.out @@ -1,14 +1,20 @@ - - - +-- This query should appear in the Hive CLI output. +-- We test DriverTestHook, which does exactly that. +-- This should not break. +SELECT * FROM src LIMIT 1 +PREHOOK: query: -- This query should appear in the Hive CLI output. +-- We test DriverTestHook, which does exactly that. +-- This should not break. SELECT * FROM src LIMIT 1 -PREHOOK: query: SELECT * FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM src LIMIT 1 +POSTHOOK: query: -- This query should appear in the Hive CLI output. +-- We test DriverTestHook, which does exactly that. +-- This should not break. +SELECT * FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/drop_database_removes_partition_dirs.q.out ql/src/test/results/clientpositive/drop_database_removes_partition_dirs.q.out index ef79291..e2f32b5 100644 --- ql/src/test/results/clientpositive/drop_database_removes_partition_dirs.q.out +++ ql/src/test/results/clientpositive/drop_database_removes_partition_dirs.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE DATABASE test_database +PREHOOK: query: -- This test verifies that if a partition exists outside a table's current location when the +-- database is dropped the partition's location is dropped as well. + +CREATE DATABASE test_database PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE test_database +POSTHOOK: query: -- This test verifies that if a partition exists outside a table's current location when the +-- database is dropped the partition's location is dropped as well. + +CREATE DATABASE test_database POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE test_database PREHOOK: type: SWITCHDATABASE diff --git ql/src/test/results/clientpositive/drop_index_removes_partition_dirs.q.out ql/src/test/results/clientpositive/drop_index_removes_partition_dirs.q.out index 550ffc2..2bb0e22 100644 --- ql/src/test/results/clientpositive/drop_index_removes_partition_dirs.q.out +++ ql/src/test/results/clientpositive/drop_index_removes_partition_dirs.q.out @@ -1,9 +1,15 @@ -PREHOOK: query: CREATE TABLE test_table (key STRING, value STRING) +PREHOOK: query: -- This test verifies that if a partition exists outside an index table's current location when the +-- index is dropped the partition's location is dropped as well. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE #### A masked pattern was here #### PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table (key STRING, value STRING) +POSTHOOK: query: -- This test verifies that if a partition exists outside an index table's current location when the +-- index is dropped the partition's location is dropped as well. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/drop_partitions_ignore_protection.q.out ql/src/test/results/clientpositive/drop_partitions_ignore_protection.q.out index aeb8e6e..e783982 100644 --- ql/src/test/results/clientpositive/drop_partitions_ignore_protection.q.out +++ ql/src/test/results/clientpositive/drop_partitions_ignore_protection.q.out @@ -35,11 +35,15 @@ p string None p string None #### A masked pattern was here #### -PREHOOK: query: alter table tbl_protectmode_no_drop drop partition (p='p1') ignore protection +PREHOOK: query: -- The partition will be dropped, even though we have enabled no_drop +-- as 'ignore protection' has been specified in the command predicate +alter table tbl_protectmode_no_drop drop partition (p='p1') ignore protection PREHOOK: type: ALTERTABLE_DROPPARTS PREHOOK: Input: default@tbl_protectmode_no_drop PREHOOK: Output: default@tbl_protectmode_no_drop@p=p1 -POSTHOOK: query: alter table tbl_protectmode_no_drop drop partition (p='p1') ignore protection +POSTHOOK: query: -- The partition will be dropped, even though we have enabled no_drop +-- as 'ignore protection' has been specified in the command predicate +alter table tbl_protectmode_no_drop drop partition (p='p1') ignore protection POSTHOOK: type: ALTERTABLE_DROPPARTS POSTHOOK: Input: default@tbl_protectmode_no_drop POSTHOOK: Output: default@tbl_protectmode_no_drop@p=p1 diff --git ql/src/test/results/clientpositive/drop_table_removes_partition_dirs.q.out ql/src/test/results/clientpositive/drop_table_removes_partition_dirs.q.out index 022debc..6cc545b 100644 --- ql/src/test/results/clientpositive/drop_table_removes_partition_dirs.q.out +++ ql/src/test/results/clientpositive/drop_table_removes_partition_dirs.q.out @@ -1,9 +1,15 @@ -PREHOOK: query: CREATE TABLE test_table (key STRING, value STRING) +PREHOOK: query: -- This test verifies that if a partition exists outside the table's current location when the +-- table is dropped the partition's location is dropped as well. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE #### A masked pattern was here #### PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table (key STRING, value STRING) +POSTHOOK: query: -- This test verifies that if a partition exists outside the table's current location when the +-- table is dropped the partition's location is dropped as well. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/escape_clusterby1.q.out ql/src/test/results/clientpositive/escape_clusterby1.q.out index 615d09b..ec65513 100644 --- ql/src/test/results/clientpositive/escape_clusterby1.q.out +++ ql/src/test/results/clientpositive/escape_clusterby1.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: explain +PREHOOK: query: -- escaped column names in cluster by are not working jira 3267 +explain select key, value from src cluster by key, value PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- escaped column names in cluster by are not working jira 3267 +explain select key, value from src cluster by key, value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/escape_distributeby1.q.out ql/src/test/results/clientpositive/escape_distributeby1.q.out index 3fa1568..64605d8 100644 --- ql/src/test/results/clientpositive/escape_distributeby1.q.out +++ ql/src/test/results/clientpositive/escape_distributeby1.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: explain +PREHOOK: query: -- escaped column names in distribute by by are not working jira 3267 +explain select key, value from src distribute by key, value PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- escaped column names in distribute by by are not working jira 3267 +explain select key, value from src distribute by key, value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/escape_orderby1.q.out ql/src/test/results/clientpositive/escape_orderby1.q.out index c6a6383..8d105e3 100644 --- ql/src/test/results/clientpositive/escape_orderby1.q.out +++ ql/src/test/results/clientpositive/escape_orderby1.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: explain +PREHOOK: query: -- escaped column names in order by are not working jira 3267 +explain select key, value from src order by key, value PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- escaped column names in order by are not working jira 3267 +explain select key, value from src order by key, value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/escape_sortby1.q.out ql/src/test/results/clientpositive/escape_sortby1.q.out index 6854d71..a6ddbfd 100644 --- ql/src/test/results/clientpositive/escape_sortby1.q.out +++ ql/src/test/results/clientpositive/escape_sortby1.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: explain +PREHOOK: query: -- escaped column names in sort by are not working jira 3267 +explain select key, value from src sort by key, value PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- escaped column names in sort by are not working jira 3267 +explain select key, value from src sort by key, value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/explain_dependency.q.out ql/src/test/results/clientpositive/explain_dependency.q.out index 006cc23..2f272bb 100644 --- ql/src/test/results/clientpositive/explain_dependency.q.out +++ ql/src/test/results/clientpositive/explain_dependency.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE VIEW V1 AS SELECT key, value from src +PREHOOK: query: -- This test is used for testing EXPLAIN DEPENDENCY command + +-- Create some views +CREATE VIEW V1 AS SELECT key, value from src PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW V1 AS SELECT key, value from src +POSTHOOK: query: -- This test is used for testing EXPLAIN DEPENDENCY command + +-- Create some views +CREATE VIEW V1 AS SELECT key, value from src POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@V1 PREHOOK: query: CREATE VIEW V2 AS SELECT ds, key, value FROM srcpart WHERE ds IS NOT NULL @@ -32,10 +38,12 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@v1 POSTHOOK: Input: default@v2 POSTHOOK: Output: default@V4 -PREHOOK: query: EXPLAIN DEPENDENCY +PREHOOK: query: -- Simple select queries, union queries and join queries +EXPLAIN DEPENDENCY SELECT key, count(1) FROM srcpart WHERE ds IS NOT NULL GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN DEPENDENCY +POSTHOOK: query: -- Simple select queries, union queries and join queries +EXPLAIN DEPENDENCY SELECT key, count(1) FROM srcpart WHERE ds IS NOT NULL GROUP BY key POSTHOOK: type: QUERY {"input_partitions":[{"partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} @@ -64,9 +72,11 @@ POSTHOOK: query: EXPLAIN DEPENDENCY SELECT S1.key, S2.value FROM src S1 JOIN srcpart S2 ON S1.key = S2.key WHERE ds IS NOT NULL POSTHOOK: type: QUERY {"input_partitions":[{"partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"},{"tablename":"default@src","tabletype":"MANAGED_TABLE"}]} -PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V1 +PREHOOK: query: -- With views +EXPLAIN DEPENDENCY SELECT * FROM V1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V1 +POSTHOOK: query: -- With views +EXPLAIN DEPENDENCY SELECT * FROM V1 POSTHOOK: type: QUERY {"input_partitions":[],"input_tables":[{"tablename":"default@v1","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@src","tabletype":"MANAGED_TABLE","tableParents":"[default@v1]"}]} PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V2 @@ -84,9 +94,13 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V4 POSTHOOK: type: QUERY {"input_partitions":[{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@v4","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v4]"},{"tablename":"default@v1","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v4]"},{"tablename":"default@src","tabletype":"MANAGED_TABLE","tableParents":"[default@v4, default@v1]"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE","tableParents":"[default@v2]"}]} -PREHOOK: query: CREATE VIEW V5 as SELECT * FROM srcpart where ds = '10' +PREHOOK: query: -- The table should show up in the explain dependency even if none +-- of the partitions are selected. +CREATE VIEW V5 as SELECT * FROM srcpart where ds = '10' PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW V5 as SELECT * FROM srcpart where ds = '10' +POSTHOOK: query: -- The table should show up in the explain dependency even if none +-- of the partitions are selected. +CREATE VIEW V5 as SELECT * FROM srcpart where ds = '10' POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@V5 PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V5 diff --git ql/src/test/results/clientpositive/explain_dependency2.q.out ql/src/test/results/clientpositive/explain_dependency2.q.out index 9056c58..d99a71a 100644 --- ql/src/test/results/clientpositive/explain_dependency2.q.out +++ ql/src/test/results/clientpositive/explain_dependency2.q.out @@ -1,31 +1,55 @@ -PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM src +PREHOOK: query: -- This test is used for testing EXPLAIN DEPENDENCY command + +-- select from a table which does not involve a map-reduce job +EXPLAIN DEPENDENCY SELECT * FROM src PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM src +POSTHOOK: query: -- This test is used for testing EXPLAIN DEPENDENCY command + +-- select from a table which does not involve a map-reduce job +EXPLAIN DEPENDENCY SELECT * FROM src POSTHOOK: type: QUERY {"input_partitions":[],"input_tables":[{"tablename":"default@src","tabletype":"MANAGED_TABLE"}]} -PREHOOK: query: EXPLAIN DEPENDENCY SELECT count(*) FROM src +PREHOOK: query: -- select from a table which involves a map-reduce job +EXPLAIN DEPENDENCY SELECT count(*) FROM src PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN DEPENDENCY SELECT count(*) FROM src +POSTHOOK: query: -- select from a table which involves a map-reduce job +EXPLAIN DEPENDENCY SELECT count(*) FROM src POSTHOOK: type: QUERY {"input_partitions":[],"input_tables":[{"tablename":"default@src","tabletype":"MANAGED_TABLE"}]} -PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM srcpart where ds is not null +PREHOOK: query: -- select from a partitioned table which does not involve a map-reduce job +-- and some partitions are being selected +EXPLAIN DEPENDENCY SELECT * FROM srcpart where ds is not null PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM srcpart where ds is not null +POSTHOOK: query: -- select from a partitioned table which does not involve a map-reduce job +-- and some partitions are being selected +EXPLAIN DEPENDENCY SELECT * FROM srcpart where ds is not null POSTHOOK: type: QUERY {"input_partitions":[{"partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} -PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM srcpart where ds = '1' +PREHOOK: query: -- select from a partitioned table which does not involve a map-reduce job +-- and none of the partitions are being selected +EXPLAIN DEPENDENCY SELECT * FROM srcpart where ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM srcpart where ds = '1' +POSTHOOK: query: -- select from a partitioned table which does not involve a map-reduce job +-- and none of the partitions are being selected +EXPLAIN DEPENDENCY SELECT * FROM srcpart where ds = '1' POSTHOOK: type: QUERY {"input_partitions":[],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} -PREHOOK: query: EXPLAIN DEPENDENCY SELECT count(*) FROM srcpart where ds is not null +PREHOOK: query: -- select from a partitioned table which involves a map-reduce job +-- and some partitions are being selected +EXPLAIN DEPENDENCY SELECT count(*) FROM srcpart where ds is not null PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN DEPENDENCY SELECT count(*) FROM srcpart where ds is not null +POSTHOOK: query: -- select from a partitioned table which involves a map-reduce job +-- and some partitions are being selected +EXPLAIN DEPENDENCY SELECT count(*) FROM srcpart where ds is not null POSTHOOK: type: QUERY {"input_partitions":[{"partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} -PREHOOK: query: EXPLAIN DEPENDENCY SELECT count(*) FROM srcpart where ds = '1' +PREHOOK: query: -- select from a partitioned table which involves a map-reduce job +-- and none of the partitions are being selected +EXPLAIN DEPENDENCY SELECT count(*) FROM srcpart where ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN DEPENDENCY SELECT count(*) FROM srcpart where ds = '1' +POSTHOOK: query: -- select from a partitioned table which involves a map-reduce job +-- and none of the partitions are being selected +EXPLAIN DEPENDENCY SELECT count(*) FROM srcpart where ds = '1' POSTHOOK: type: QUERY {"input_partitions":[],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} PREHOOK: query: create table tstsrcpart like srcpart @@ -33,13 +57,17 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tstsrcpart like srcpart POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tstsrcpart -PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM tstsrcpart where ds is not null +PREHOOK: query: -- select from a partitioned table with no partitions which does not involve a map-reduce job +EXPLAIN DEPENDENCY SELECT * FROM tstsrcpart where ds is not null PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM tstsrcpart where ds is not null +POSTHOOK: query: -- select from a partitioned table with no partitions which does not involve a map-reduce job +EXPLAIN DEPENDENCY SELECT * FROM tstsrcpart where ds is not null POSTHOOK: type: QUERY {"input_partitions":[],"input_tables":[{"tablename":"default@tstsrcpart","tabletype":"MANAGED_TABLE"}]} -PREHOOK: query: EXPLAIN DEPENDENCY SELECT count(*) FROM tstsrcpart where ds is not null +PREHOOK: query: -- select from a partitioned table with no partitions which involves a map-reduce job +EXPLAIN DEPENDENCY SELECT count(*) FROM tstsrcpart where ds is not null PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN DEPENDENCY SELECT count(*) FROM tstsrcpart where ds is not null +POSTHOOK: query: -- select from a partitioned table with no partitions which involves a map-reduce job +EXPLAIN DEPENDENCY SELECT count(*) FROM tstsrcpart where ds is not null POSTHOOK: type: QUERY {"input_partitions":[],"input_tables":[{"tablename":"default@tstsrcpart","tabletype":"MANAGED_TABLE"}]} diff --git ql/src/test/results/clientpositive/global_limit.q.out ql/src/test/results/clientpositive/global_limit.q.out index 790e289..395ef93 100644 --- ql/src/test/results/clientpositive/global_limit.q.out +++ ql/src/test/results/clientpositive/global_limit.q.out @@ -37,10 +37,12 @@ PREHOOK: Output: default@gl_src1 POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE gl_src1 POSTHOOK: type: LOAD POSTHOOK: Output: default@gl_src1 -PREHOOK: query: create table gl_tgt as select key from gl_src1 limit 1 +PREHOOK: query: -- need one file +create table gl_tgt as select key from gl_src1 limit 1 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@gl_src1 -POSTHOOK: query: create table gl_tgt as select key from gl_src1 limit 1 +POSTHOOK: query: -- need one file +create table gl_tgt as select key from gl_src1 limit 1 POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@gl_src1 POSTHOOK: Output: default@gl_tgt @@ -53,11 +55,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@gl_tgt #### A masked pattern was here #### 165 -PREHOOK: query: select 'x' as key_new , split(value,',') as value_new from gl_src1 ORDER BY key_new ASC, value_new[0] ASC limit 20 +PREHOOK: query: -- need two files +select 'x' as key_new , split(value,',') as value_new from gl_src1 ORDER BY key_new ASC, value_new[0] ASC limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@gl_src1 #### A masked pattern was here #### -POSTHOOK: query: select 'x' as key_new , split(value,',') as value_new from gl_src1 ORDER BY key_new ASC, value_new[0] ASC limit 20 +POSTHOOK: query: -- need two files +select 'x' as key_new , split(value,',') as value_new from gl_src1 ORDER BY key_new ASC, value_new[0] ASC limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@gl_src1 #### A masked pattern was here #### @@ -81,11 +85,13 @@ x ["val_11"] x ["val_11"] x ["val_114"] x ["val_114"] -PREHOOK: query: select key, value, split(value,',') as value_new from gl_src1 ORDER BY key ASC, value ASC, value_new[0] ASC limit 30 +PREHOOK: query: -- no sufficient files +select key, value, split(value,',') as value_new from gl_src1 ORDER BY key ASC, value ASC, value_new[0] ASC limit 30 PREHOOK: type: QUERY PREHOOK: Input: default@gl_src1 #### A masked pattern was here #### -POSTHOOK: query: select key, value, split(value,',') as value_new from gl_src1 ORDER BY key ASC, value ASC, value_new[0] ASC limit 30 +POSTHOOK: query: -- no sufficient files +select key, value, split(value,',') as value_new from gl_src1 ORDER BY key ASC, value ASC, value_new[0] ASC limit 30 POSTHOOK: type: QUERY POSTHOOK: Input: default@gl_src1 #### A masked pattern was here #### @@ -119,11 +125,13 @@ POSTHOOK: Input: default@gl_src1 26 val_26 ["val_26"] 26 val_26 ["val_26"] 26 val_26 ["val_26"] -PREHOOK: query: select key from gl_src1 ORDER BY key ASC limit 100 +PREHOOK: query: -- need all files +select key from gl_src1 ORDER BY key ASC limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@gl_src1 #### A masked pattern was here #### -POSTHOOK: query: select key from gl_src1 ORDER BY key ASC limit 100 +POSTHOOK: query: -- need all files +select key from gl_src1 ORDER BY key ASC limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@gl_src1 #### A masked pattern was here #### @@ -265,11 +273,13 @@ POSTHOOK: Input: default@gl_src1 26 26 26 -PREHOOK: query: select key, count(1) from gl_src1 group by key ORDER BY key ASC limit 5 +PREHOOK: query: -- not qualified cases +select key, count(1) from gl_src1 group by key ORDER BY key ASC limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@gl_src1 #### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from gl_src1 group by key ORDER BY key ASC limit 5 +POSTHOOK: query: -- not qualified cases +select key, count(1) from gl_src1 group by key ORDER BY key ASC limit 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@gl_src1 #### A masked pattern was here #### @@ -1033,11 +1043,13 @@ POSTHOOK: Input: default@gl_src1 374 484 495 -PREHOOK: query: select key from (select * from (select key,value from gl_src1 limit 10)t1 )t2 ORDER BY key ASC +PREHOOK: query: -- complicated queries +select key from (select * from (select key,value from gl_src1 limit 10)t1 )t2 ORDER BY key ASC PREHOOK: type: QUERY PREHOOK: Input: default@gl_src1 #### A masked pattern was here #### -POSTHOOK: query: select key from (select * from (select key,value from gl_src1 limit 10)t1 )t2 ORDER BY key ASC +POSTHOOK: query: -- complicated queries +select key from (select * from (select key,value from gl_src1 limit 10)t1 )t2 ORDER BY key ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@gl_src1 #### A masked pattern was here #### @@ -1097,9 +1109,11 @@ POSTHOOK: Lineage: gl_tgt.key EXPRESSION [(gl_src1)gl_src1.FieldSchema(name:key, 375 485 496 -PREHOOK: query: create table gl_src2 (key int, value string) stored as textfile +PREHOOK: query: -- empty table +create table gl_src2 (key int, value string) stored as textfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table gl_src2 (key int, value string) stored as textfile +POSTHOOK: query: -- empty table +create table gl_src2 (key int, value string) stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@gl_src2 POSTHOOK: Lineage: gl_tgt.key EXPRESSION [(gl_src1)gl_src1.FieldSchema(name:key, type:int, comment:null), ] @@ -1112,9 +1126,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@gl_src2 #### A masked pattern was here #### POSTHOOK: Lineage: gl_tgt.key EXPRESSION [(gl_src1)gl_src1.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: create table gl_src_part1 (key int, value string) partitioned by (p string) stored as textfile +PREHOOK: query: -- partition +create table gl_src_part1 (key int, value string) partitioned by (p string) stored as textfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table gl_src_part1 (key int, value string) partitioned by (p string) stored as textfile +POSTHOOK: query: -- partition +create table gl_src_part1 (key int, value string) partitioned by (p string) stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@gl_src_part1 POSTHOOK: Lineage: gl_tgt.key EXPRESSION [(gl_src1)gl_src1.FieldSchema(name:key, type:int, comment:null), ] diff --git ql/src/test/results/clientpositive/groupby10.q.out ql/src/test/results/clientpositive/groupby10.q.out index 1ecf1ca..0d867e6 100644 --- ql/src/test/results/clientpositive/groupby10.q.out +++ ql/src/test/results/clientpositive/groupby10.q.out @@ -613,12 +613,14 @@ POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, t 401 401 401 409 409 409 484 484 484 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- HIVE-3852 Multi-groupby optimization fails when same distinct column is used twice or more +EXPLAIN FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- HIVE-3852 Multi-groupby optimization fails when same distinct column is used twice or more +EXPLAIN FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key diff --git ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out index eb2cb55..aa4696c 100644 --- ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out +++ ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table t1 (int1 int, int2 int, str1 string, str2 string) +PREHOOK: query: -- This test covers HIVE-2332 + +create table t1 (int1 int, int2 int, str1 string, str2 string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table t1 (int1 int, int2 int, str1 string, str2 string) +POSTHOOK: query: -- This test covers HIVE-2332 + +create table t1 (int1 int, int2 int, str1 string, str2 string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@t1 PREHOOK: query: --disabled RS-dedup for keeping intention of test diff --git ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out index 2b15fab..28c8272 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out @@ -9,10 +9,12 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since 4 grouping sets would be generated for the query below, an additional MR job should be created +EXPLAIN SELECT a, b, count(*) from T1 group by a, b with cube PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since 4 grouping sets would be generated for the query below, an additional MR job should be created +EXPLAIN SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index ebf1720..77e40cd 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -1,6 +1,14 @@ -PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: query: -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, +-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, +-- this tests that the aggregate function stores the partial aggregate state correctly even if an +-- additional MR job is created for processing the grouping sets. +CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: query: -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, +-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, +-- this tests that the aggregate function stores the partial aggregate state correctly even if an +-- additional MR job is created for processing the grouping sets. +CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T1 PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets1.txt' INTO TABLE T1 @@ -15,10 +23,16 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets2.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The query below will execute in a single MR job, since 4 rows are generated per input row +-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and +-- hive.new.job.grouping.set.cardinality is more than 4. +EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The query below will execute in a single MR job, since 4 rows are generated per input row +-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and +-- hive.new.job.grouping.set.cardinality is more than 4. +EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -142,10 +156,14 @@ NULL 3 5.0 2 5 1 2.0 1 8 NULL 1.0 2 8 1 1.0 2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2. +-- The partial aggregation state should be maintained correctly across MR jobs. +EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2. +-- The partial aggregation state should be maintained correctly across MR jobs. +EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out index 54be3af..d448752 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: query: -- Set merging to false above to make the explain more readable + +CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: query: -- Set merging to false above to make the explain more readable + +CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T1 PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1 @@ -9,14 +13,16 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- This tests that cubes and rollups work fine inside sub-queries. +EXPLAIN SELECT * FROM (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 join (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 on subq1.a = subq2.a PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- This tests that cubes and rollups work fine inside sub-queries. +EXPLAIN SELECT * FROM (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 join @@ -295,14 +301,18 @@ POSTHOOK: Input: default@t1 2 3 1 2 NULL 2 2 3 1 2 2 1 2 3 1 2 3 1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since 4 grouping sets would be generated for each sub-query, an additional MR job should be created +-- for each of them +EXPLAIN SELECT * FROM (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 join (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 on subq1.a = subq2.a PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since 4 grouping sets would be generated for each sub-query, an additional MR job should be created +-- for each of them +EXPLAIN SELECT * FROM (SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 join diff --git ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out index c6c12bf..b35de8b 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: query: -- Set merging to false above to make the explain more readable + +CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: query: -- Set merging to false above to make the explain more readable + +CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T1 PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1 @@ -9,11 +13,13 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- This tests that cubes and rollups work fine where the source is a sub-query +EXPLAIN SELECT a, b, count(*) FROM (SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- This tests that cubes and rollups work fine where the source is a sub-query +EXPLAIN SELECT a, b, count(*) FROM (SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube POSTHOOK: type: QUERY @@ -189,11 +195,13 @@ NULL 3 1 5 2 1 8 NULL 1 8 1 1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since 4 grouping sets would be generated for the cube, an additional MR job should be created +EXPLAIN SELECT a, b, count(*) FROM (SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since 4 grouping sets would be generated for the cube, an additional MR job should be created +EXPLAIN SELECT a, b, count(*) FROM (SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out index c545c04..fdf577d 100644 --- ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out @@ -249,12 +249,14 @@ POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:str 10 1 16 1 18 1 -PREHOOK: query: explain +PREHOOK: query: -- no need to spray by distinct key first +explain from src insert overwrite table dest1 select key, count(distinct value) group by key insert overwrite table dest2 select key+key, count(distinct value) group by key+key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- no need to spray by distinct key first +explain from src insert overwrite table dest1 select key, count(distinct value) group by key insert overwrite table dest2 select key+key, count(distinct value) group by key+key diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out index 093ccf7..f5b1b19 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: create table e1 (key string, count int) +PREHOOK: query: -- HIVE-3849 Aliased column in where clause for multi-groupby single reducer cannot be resolved +create table e1 (key string, count int) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table e1 (key string, count int) +POSTHOOK: query: -- HIVE-3849 Aliased column in where clause for multi-groupby single reducer cannot be resolved +create table e1 (key string, count int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@e1 PREHOOK: query: create table e2 (key string, count int) diff --git ql/src/test/results/clientpositive/groupby_position.q.out ql/src/test/results/clientpositive/groupby_position.q.out index 2b9d1bb..535c5d8 100644 --- ql/src/test/results/clientpositive/groupby_position.q.out +++ ql/src/test/results/clientpositive/groupby_position.q.out @@ -8,12 +8,16 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@testTable2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Position Alias in GROUP BY and ORDER BY + +EXPLAIN FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Position Alias in GROUP BY and ORDER BY + +EXPLAIN FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 @@ -568,12 +572,16 @@ POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, 17 val_17 1 18 val_18 1 19 val_19 1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Position Alias in subquery + +EXPLAIN SELECT t.key, t.value FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t ORDER BY 2 DESC, 1 ASC PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Position Alias in subquery + +EXPLAIN SELECT t.key, t.value FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t ORDER BY 2 DESC, 1 ASC diff --git ql/src/test/results/clientpositive/groupby_ppd.q.out ql/src/test/results/clientpositive/groupby_ppd.q.out index fa78d4f..2512b99 100644 --- ql/src/test/results/clientpositive/groupby_ppd.q.out +++ ql/src/test/results/clientpositive/groupby_ppd.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: create table invites (id int, foo int, bar int) +PREHOOK: query: -- see HIVE-2382 +create table invites (id int, foo int, bar int) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table invites (id int, foo int, bar int) +POSTHOOK: query: -- see HIVE-2382 +create table invites (id int, foo int, bar int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@invites PREHOOK: query: explain select * from (select foo, bar from (select bar, foo from invites c union all select bar, foo from invites d) b) a group by bar, foo having bar=1 diff --git ql/src/test/results/clientpositive/groupby_sort_1.q.out ql/src/test/results/clientpositive/groupby_sort_1.q.out index 4d98c97..e6f3a7a 100644 --- ql/src/test/results/clientpositive/groupby_sort_1.q.out +++ ql/src/test/results/clientpositive/groupby_sort_1.q.out @@ -11,11 +11,13 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 @@ -28,11 +30,17 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl1 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key +-- matches the sorted key +-- addind a order by at the end to make the test results deterministic +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key +-- matches the sorted key +-- addind a order by at the end to make the test results deterministic +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY @@ -371,11 +379,13 @@ POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY @@ -596,11 +606,13 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 17 1 8 18 1 8 28 1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- It should work for sub-queries +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- It should work for sub-queries +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key POSTHOOK: type: QUERY @@ -985,11 +997,13 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 3 1 7 1 8 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- It should work for sub-queries with column aliases +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- It should work for sub-queries with column aliases +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k POSTHOOK: type: QUERY @@ -1396,11 +1410,15 @@ POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed +-- by a match to the sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed +-- by a match to the sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key POSTHOOK: type: QUERY @@ -1782,11 +1800,13 @@ POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val POSTHOOK: type: QUERY @@ -2049,11 +2069,13 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 1 17 1 8 1 18 1 8 1 28 1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- no map-side group by if the group by key contains a function +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- no map-side group by if the group by key contains a function +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 POSTHOOK: type: QUERY @@ -2323,13 +2345,21 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 3 4 1 7 8 1 8 9 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- it should not matter what follows the group by +-- test various cases + +-- group by followed by another group by +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- it should not matter what follows the group by +-- test various cases + +-- group by followed by another group by +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2618,7 +2648,8 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 6 1 14 1 16 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- group by followed by a union +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -2626,7 +2657,8 @@ SELECT key, count(1) FROM T1 GROUP BY key SELECT key, count(1) FROM T1 GROUP BY key ) subq1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- group by followed by a union +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -3146,7 +3178,8 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 1 8 2 8 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -3154,7 +3187,8 @@ SELECT key, count(1) FROM T1 GROUP BY key SELECT key + key as key, count(1) FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -3794,7 +3828,8 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 8 2 14 1 16 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- group by followed by a join +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -3802,7 +3837,8 @@ JOIN (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 ON subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- group by followed by a join +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -4144,14 +4180,16 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 3 2 7 2 8 4 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper +EXPLAIN EXTENDED SELECT * FROM (SELECT key, count(1) FROM T1 GROUP BY key) subq1 JOIN (SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 ON subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper +EXPLAIN EXTENDED SELECT * FROM (SELECT key, count(1) FROM T1 GROUP BY key) subq1 JOIN @@ -4535,11 +4573,13 @@ POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: INSERT OVERWRITE TABLE T2 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T2 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT OVERWRITE TABLE T2 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T2 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t2 @@ -4574,11 +4614,13 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key POSTHOOK: type: QUERY @@ -4875,11 +4917,15 @@ POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 3 1 7 1 8 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the +-- sorted keys +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the +-- sorted keys +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val POSTHOOK: type: QUERY @@ -5399,11 +5445,15 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the +-- sorted keys followed by anything +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the +-- sorted keys followed by anything +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 POSTHOOK: type: QUERY @@ -5861,13 +5911,15 @@ POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 1 17 2 1 8 1 18 2 1 8 1 28 2 1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- contants from sub-queries should work fine +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq group by key, constant, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- contants from sub-queries should work fine +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq @@ -6380,7 +6432,8 @@ POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 1 17 1 8 1 18 1 8 1 28 1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- multiple levels of contants from sub-queries should work fine +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( @@ -6389,7 +6442,8 @@ SELECT key, constant as constant2, val, 2 as constant3 from )subq2 group by key, constant3, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- multiple levels of contants from sub-queries should work fine +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( @@ -7430,12 +7484,14 @@ POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 17 1 8 18 1 8 28 1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- multi-table insert with a sub-query +EXPLAIN FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- multi-table insert with a sub-query +EXPLAIN FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val diff --git ql/src/test/results/clientpositive/groupby_sort_10.q.out ql/src/test/results/clientpositive/groupby_sort_10.q.out index 829d4a2..ea06850 100644 --- ql/src/test/results/clientpositive/groupby_sort_10.q.out +++ ql/src/test/results/clientpositive/groupby_sort_10.q.out @@ -5,21 +5,25 @@ POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds stri CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') SELECT * from src where key = 0 or key = 11 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@t1@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') SELECT * from src where key = 0 or key = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@t1@ds=1 POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN select distinct key from T1 +PREHOOK: query: -- The plan is converted to a map-side plan +EXPLAIN select distinct key from T1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select distinct key from T1 +POSTHOOK: query: -- The plan is converted to a map-side plan +EXPLAIN select distinct key from T1 POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -80,12 +84,14 @@ POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 0 11 -PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='2') +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='2') SELECT * from src where key = 0 or key = 11 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@t1@ds=2 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='2') +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='2') SELECT * from src where key = 0 or key = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -94,9 +100,13 @@ POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=2).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN select distinct key from T1 +PREHOOK: query: -- The plan is not converted to a map-side, since although the sorting columns and grouping +-- columns match, the user is querying multiple input partitions +EXPLAIN select distinct key from T1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select distinct key from T1 +POSTHOOK: query: -- The plan is not converted to a map-side, since although the sorting columns and grouping +-- columns match, the user is querying multiple input partitions +EXPLAIN select distinct key from T1 POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/groupby_sort_2.q.out ql/src/test/results/clientpositive/groupby_sort_2.q.out index 404afd0..c20b757 100644 --- ql/src/test/results/clientpositive/groupby_sort_2.q.out +++ ql/src/test/results/clientpositive/groupby_sort_2.q.out @@ -11,11 +11,13 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 @@ -28,11 +30,15 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl1 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan should not be converted to a map-side group by even though the group by key +-- matches the sorted key. Adding a order by at the end to make the test results deterministic +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT val, count(1) FROM T1 GROUP BY val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan should not be converted to a map-side group by even though the group by key +-- matches the sorted key. Adding a order by at the end to make the test results deterministic +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT val, count(1) FROM T1 GROUP BY val POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby_sort_3.q.out ql/src/test/results/clientpositive/groupby_sort_3.q.out index 5cb445d..7a6a809 100644 --- ql/src/test/results/clientpositive/groupby_sort_3.q.out +++ ql/src/test/results/clientpositive/groupby_sort_3.q.out @@ -11,11 +11,13 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 @@ -28,11 +30,13 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl1 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan should be converted to a map-side group by +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan should be converted to a map-side group by +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY @@ -193,11 +197,13 @@ POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:stri POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan should be converted to a map-side group by +EXPLAIN INSERT OVERWRITE TABLE outputTbl2 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan should be converted to a map-side group by +EXPLAIN INSERT OVERWRITE TABLE outputTbl2 SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby_sort_4.q.out ql/src/test/results/clientpositive/groupby_sort_4.q.out index caf7322..95ff90a 100644 --- ql/src/test/results/clientpositive/groupby_sort_4.q.out +++ ql/src/test/results/clientpositive/groupby_sort_4.q.out @@ -11,11 +11,13 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 @@ -28,11 +30,15 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl1 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan should not be converted to a map-side group by. +-- However, there should no hash-based aggregation on the map-side +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan should not be converted to a map-side group by. +-- However, there should no hash-based aggregation on the map-side +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY @@ -159,11 +165,15 @@ POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan should not be converted to a map-side group by. +-- Hash-based aggregations should be performed on the map-side +EXPLAIN INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan should not be converted to a map-side group by. +-- Hash-based aggregations should be performed on the map-side +EXPLAIN INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby_sort_5.q.out ql/src/test/results/clientpositive/groupby_sort_5.q.out index 637f790..23cfec3 100644 --- ql/src/test/results/clientpositive/groupby_sort_5.q.out +++ ql/src/test/results/clientpositive/groupby_sort_5.q.out @@ -11,11 +11,13 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 @@ -28,11 +30,17 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl1 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan should be converted to a map-side group by, since the +-- sorting columns and grouping columns match, and all the bucketing columns +-- are part of sorting columns +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan should be converted to a map-side group by, since the +-- sorting columns and grouping columns match, and all the bucketing columns +-- are part of sorting columns +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY @@ -219,11 +227,13 @@ POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:stri POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 @@ -234,11 +244,17 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan should be converted to a map-side group by, since the +-- sorting columns and grouping columns match, and all the bucketing columns +-- are part of sorting columns +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan should be converted to a map-side group by, since the +-- sorting columns and grouping columns match, and all the bucketing columns +-- are part of sorting columns +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY @@ -455,11 +471,13 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 @@ -492,11 +510,19 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan should not be converted to a map-side group by, since although the +-- sorting columns and grouping columns match, all the bucketing columns +-- are not part of sorting columns. However, no hash map aggregation is required +-- on the mapside. +EXPLAIN INSERT OVERWRITE TABLE outputTbl2 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan should not be converted to a map-side group by, since although the +-- sorting columns and grouping columns match, all the bucketing columns +-- are not part of sorting columns. However, no hash map aggregation is required +-- on the mapside. +EXPLAIN INSERT OVERWRITE TABLE outputTbl2 SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby_sort_6.q.out ql/src/test/results/clientpositive/groupby_sort_6.q.out index 73ce673..49ea4ac 100644 --- ql/src/test/results/clientpositive/groupby_sort_6.q.out +++ ql/src/test/results/clientpositive/groupby_sort_6.q.out @@ -8,11 +8,13 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should not be converted to a map-side group since no partition is being accessed +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should not be converted to a map-side group since no partition is being accessed +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key POSTHOOK: type: QUERY @@ -164,11 +166,13 @@ POSTHOOK: Output: default@t1 POSTHOOK: Output: default@t1@ds=2 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should not be converted to a map-side group since no partition is being accessed +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should not be converted to a map-side group since no partition is being accessed +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key POSTHOOK: type: QUERY @@ -327,11 +331,15 @@ POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should not be converted to a map-side group since the partition being accessed +-- is neither bucketed not sorted +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should not be converted to a map-side group since the partition being accessed +-- is neither bucketed not sorted +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby_sort_7.q.out ql/src/test/results/clientpositive/groupby_sort_7.q.out index 54bb37a..1c061ae 100644 --- ql/src/test/results/clientpositive/groupby_sort_7.q.out +++ ql/src/test/results/clientpositive/groupby_sort_7.q.out @@ -12,12 +12,14 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PAR POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 POSTHOOK: Output: default@t1@ds=1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t1@ds=1 PREHOOK: Output: default@t1@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@ds=1 @@ -31,11 +33,17 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl1 POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan should be converted to a map-side group by, since the +-- sorting columns and grouping columns match, and all the bucketing columns +-- are part of sorting columns +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan should be converted to a map-side group by, since the +-- sorting columns and grouping columns match, and all the bucketing columns +-- are part of sorting columns +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby_sort_8.q.out ql/src/test/results/clientpositive/groupby_sort_8.q.out index 959be5b..57f0a2e 100644 --- ql/src/test/results/clientpositive/groupby_sort_8.q.out +++ ql/src/test/results/clientpositive/groupby_sort_8.q.out @@ -12,22 +12,28 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PAR POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 POSTHOOK: Output: default@t1@ds=1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t1@ds=1 PREHOOK: Output: default@t1@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@ds=1 POSTHOOK: Output: default@t1@ds=1 POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan is not converted to a map-side, since although the sorting columns and grouping +-- columns match, the user is issueing a distinct +EXPLAIN select count(distinct key) from T1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan is not converted to a map-side, since although the sorting columns and grouping +-- columns match, the user is issueing a distinct +EXPLAIN select count(distinct key) from T1 POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/groupby_sort_9.q.out ql/src/test/results/clientpositive/groupby_sort_9.q.out index b235802..feec2f3 100644 --- ql/src/test/results/clientpositive/groupby_sort_9.q.out +++ ql/src/test/results/clientpositive/groupby_sort_9.q.out @@ -12,12 +12,14 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PAR POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 POSTHOOK: Output: default@t1@ds=1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t1@ds=1 PREHOOK: Output: default@t1@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@ds=1 @@ -38,10 +40,14 @@ POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, t POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1 PARTITION(ds=2).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan is not converted to a map-side, since although the sorting columns and grouping +-- columns match, the user is querying multiple input partitions +EXPLAIN select key, count(1) from T1 group by key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan is not converted to a map-side, since although the sorting columns and grouping +-- columns match, the user is querying multiple input partitions +EXPLAIN select key, count(1) from T1 group by key POSTHOOK: type: QUERY POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out index 13e28c0..b7ca0ee 100644 --- ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out +++ ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out @@ -11,11 +11,13 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 @@ -28,11 +30,17 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl1 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key +-- matches the sorted key +-- addind a order by at the end to make the test results deterministic +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key +-- matches the sorted key +-- addind a order by at the end to make the test results deterministic +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY @@ -371,11 +379,13 @@ POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY @@ -665,11 +675,13 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 17 1 8 18 1 8 28 1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- It should work for sub-queries +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- It should work for sub-queries +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key POSTHOOK: type: QUERY @@ -1054,11 +1066,13 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 3 1 7 1 8 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- It should work for sub-queries with column aliases +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- It should work for sub-queries with column aliases +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k POSTHOOK: type: QUERY @@ -1465,11 +1479,15 @@ POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed +-- by a match to the sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed +-- by a match to the sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key POSTHOOK: type: QUERY @@ -1851,11 +1869,13 @@ POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val POSTHOOK: type: QUERY @@ -2191,11 +2211,13 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 1 17 1 8 1 18 1 8 1 28 1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- no map-side group by if the group by key contains a function +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- no map-side group by if the group by key contains a function +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 POSTHOOK: type: QUERY @@ -2534,13 +2556,21 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 3 4 1 7 8 1 8 9 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- it should not matter what follows the group by +-- test various cases + +-- group by followed by another group by +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- it should not matter what follows the group by +-- test various cases + +-- group by followed by another group by +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2894,7 +2924,8 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 6 1 14 1 16 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- group by followed by a union +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -2902,7 +2933,8 @@ SELECT key, count(1) FROM T1 GROUP BY key SELECT key, count(1) FROM T1 GROUP BY key ) subq1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- group by followed by a union +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -3422,7 +3454,8 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 1 8 2 8 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -3430,7 +3463,8 @@ SELECT key, count(1) FROM T1 GROUP BY key SELECT key + key as key, count(1) FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -4135,7 +4169,8 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 8 2 14 1 16 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- group by followed by a join +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -4143,7 +4178,8 @@ JOIN (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 ON subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- group by followed by a join +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -4485,14 +4521,16 @@ POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 3 2 7 2 8 4 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper +EXPLAIN EXTENDED SELECT * FROM (SELECT key, count(1) FROM T1 GROUP BY key) subq1 JOIN (SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 ON subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper +EXPLAIN EXTENDED SELECT * FROM (SELECT key, count(1) FROM T1 GROUP BY key) subq1 JOIN @@ -4945,11 +4983,13 @@ POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: INSERT OVERWRITE TABLE T2 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T2 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT OVERWRITE TABLE T2 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T2 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t2 @@ -4984,11 +5024,13 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key POSTHOOK: type: QUERY @@ -5350,11 +5392,15 @@ POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 3 1 7 1 8 2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the +-- sorted keys +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the +-- sorted keys +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val POSTHOOK: type: QUERY @@ -5874,11 +5920,15 @@ POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comm POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the +-- sorted keys followed by anything +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the +-- sorted keys followed by anything +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 POSTHOOK: type: QUERY @@ -6336,13 +6386,15 @@ POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 1 17 2 1 8 1 18 2 1 8 1 28 2 1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- contants from sub-queries should work fine +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq group by key, constant, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- contants from sub-queries should work fine +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq @@ -6855,7 +6907,8 @@ POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 1 17 1 8 1 18 1 8 1 28 1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- multiple levels of contants from sub-queries should work fine +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( @@ -6864,7 +6917,8 @@ SELECT key, constant as constant2, val, 2 as constant3 from )subq2 group by key, constant3, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- multiple levels of contants from sub-queries should work fine +EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( @@ -7939,12 +7993,14 @@ POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comm 7 17 1 8 18 1 8 28 1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- multi-table insert with a sub-query +EXPLAIN FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- multi-table insert with a sub-query +EXPLAIN FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val diff --git ql/src/test/results/clientpositive/groupby_sort_test_1.q.out ql/src/test/results/clientpositive/groupby_sort_test_1.q.out index f46bcf4..9a36371 100644 --- ql/src/test/results/clientpositive/groupby_sort_test_1.q.out +++ ql/src/test/results/clientpositive/groupby_sort_test_1.q.out @@ -11,11 +11,13 @@ PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT OVERWRITE TABLE T1 select key, val from T1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 @@ -28,11 +30,15 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl1 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key +-- matches the sorted key. However, in test mode, the group by wont be converted. +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key +-- matches the sorted key. However, in test mode, the group by wont be converted. +EXPLAIN INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/hiveprofiler_script0.q.out ql/src/test/results/clientpositive/hiveprofiler_script0.q.out index c5ee87c..cc03f5d 100644 --- ql/src/test/results/clientpositive/hiveprofiler_script0.q.out +++ ql/src/test/results/clientpositive/hiveprofiler_script0.q.out @@ -1,4 +1,7 @@ -PREHOOK: query: SELECT TRANSFORM(src.key, src.value) +PREHOOK: query: -- checking that script operator does not cause NPE +-- Derby strangeness is causing the output collector for the Hive Profiler to not get output during DB read + +SELECT TRANSFORM(src.key, src.value) USING 'testgrep' AS (tkey, tvalue) FROM src PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/hiveprofiler_union0.q.out ql/src/test/results/clientpositive/hiveprofiler_union0.q.out index 0c102be..ac5c9dc 100644 --- ql/src/test/results/clientpositive/hiveprofiler_union0.q.out +++ ql/src/test/results/clientpositive/hiveprofiler_union0.q.out @@ -7,6 +7,16 @@ SELECT unioninput.* PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### +SEL_5 ==> UNION_6: 414 +FIL_10 ==> SEL_5: 414 +TS_0 ==> FIL_9: 500 +TS_3 ==> FIL_10: 500 +UNION_6 ==> SEL_7: 498 +SEL_2 ==> UNION_6: 84 +FIL_9 ==> SEL_2: 84 +main() ==> TS_3: 500 +main() ==> TS_0: 500 +SEL_7 ==> FS_8: 498 238 val_238 86 val_86 311 val_311 diff --git ql/src/test/results/clientpositive/index_auto.q.out ql/src/test/results/clientpositive/index_auto.q.out index 11806ec..4ad4d99 100644 --- ql/src/test/results/clientpositive/index_auto.q.out +++ ql/src/test/results/clientpositive/index_auto.q.out @@ -1,8 +1,14 @@ -PREHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -41,10 +47,13 @@ POSTHOOK: Output: default@default__src_src_index__ POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- manual indexing #### A masked pattern was here #### PREHOOK: type: QUERY PREHOOK: Input: default@default__src_src_index__ #### A masked pattern was here #### +POSTHOOK: query: -- manual indexing +#### A masked pattern was here #### POSTHOOK: type: QUERY POSTHOOK: Input: default@default__src_src_index__ #### A masked pattern was here #### @@ -138,9 +147,11 @@ POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(nam 97 val_97 98 val_98 98 val_98 -PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] diff --git ql/src/test/results/clientpositive/index_auto_empty.q.out ql/src/test/results/clientpositive/index_auto_empty.q.out index 68b6f36..d219457 100644 --- ql/src/test/results/clientpositive/index_auto_empty.q.out +++ ql/src/test/results/clientpositive/index_auto_empty.q.out @@ -1,11 +1,19 @@ -PREHOOK: query: CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: query: -- Test to ensure that an empty index result is propagated correctly + +-- Create temp, and populate it with some values in src. +CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: query: -- Test to ensure that an empty index result is propagated correctly + +-- Create temp, and populate it with some values in src. +CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@temp -PREHOOK: query: CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: query: -- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: query: -- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__temp_temp_index__ PREHOOK: query: ALTER INDEX temp_index ON temp REBUILD @@ -19,11 +27,13 @@ POSTHOOK: Output: default@default__temp_temp_index__ POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM default__temp_temp_index__ WHERE key = 86 +PREHOOK: query: -- query should not return any values +SELECT * FROM default__temp_temp_index__ WHERE key = 86 PREHOOK: type: QUERY PREHOOK: Input: default@default__temp_temp_index__ #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM default__temp_temp_index__ WHERE key = 86 +POSTHOOK: query: -- query should not return any values +SELECT * FROM default__temp_temp_index__ WHERE key = 86 POSTHOOK: type: QUERY POSTHOOK: Input: default@default__temp_temp_index__ #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/index_auto_file_format.q.out ql/src/test/results/clientpositive/index_auto_file_format.q.out index dadbacb..757a9d9 100644 --- ql/src/test/results/clientpositive/index_auto_file_format.q.out +++ ql/src/test/results/clientpositive/index_auto_file_format.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: query: -- test automatic use of index on different file formats +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: query: -- test automatic use of index on different file formats +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__src_src_index__ PREHOOK: query: ALTER INDEX src_index ON src REBUILD diff --git ql/src/test/results/clientpositive/index_auto_mult_tables.q.out ql/src/test/results/clientpositive/index_auto_mult_tables.q.out index 40ee2e4..fde147f 100644 --- ql/src/test/results/clientpositive/index_auto_mult_tables.q.out +++ ql/src/test/results/clientpositive/index_auto_mult_tables.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) diff --git ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out index 94b430f..963f847 100644 --- ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out +++ ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) @@ -220,9 +226,11 @@ POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(nam POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] diff --git ql/src/test/results/clientpositive/index_auto_multiple.q.out ql/src/test/results/clientpositive/index_auto_multiple.q.out index cdd4281..d4ee96c 100644 --- ql/src/test/results/clientpositive/index_auto_multiple.q.out +++ ql/src/test/results/clientpositive/index_auto_multiple.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE INDEX src_key_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: query: -- With multiple indexes, make sure we choose which to use in a consistent order + +CREATE INDEX src_key_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: CREATE INDEX src_key_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: query: -- With multiple indexes, make sure we choose which to use in a consistent order + +CREATE INDEX src_key_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__src_src_key_index__ PREHOOK: query: CREATE INDEX src_val_index ON TABLE src(value) as 'COMPACT' WITH DEFERRED REBUILD diff --git ql/src/test/results/clientpositive/index_auto_partitioned.q.out ql/src/test/results/clientpositive/index_auto_partitioned.q.out index 170b015..78fbb8b 100644 --- ql/src/test/results/clientpositive/index_auto_partitioned.q.out +++ ql/src/test/results/clientpositive/index_auto_partitioned.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: query: -- test automatic use of index on table with partitions +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: query: -- test automatic use of index on table with partitions +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__srcpart_src_part_index__ PREHOOK: query: ALTER INDEX src_part_index ON srcpart REBUILD diff --git ql/src/test/results/clientpositive/index_auto_self_join.q.out ql/src/test/results/clientpositive/index_auto_self_join.q.out index c324e45..492008c 100644 --- ql/src/test/results/clientpositive/index_auto_self_join.q.out +++ ql/src/test/results/clientpositive/index_auto_self_join.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) diff --git ql/src/test/results/clientpositive/index_auto_unused.q.out ql/src/test/results/clientpositive/index_auto_unused.q.out index 7aaa2bd..8ea6145 100644 --- ql/src/test/results/clientpositive/index_auto_unused.q.out +++ ql/src/test/results/clientpositive/index_auto_unused.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: query: -- test cases where the index should not be used automatically + +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: query: -- test cases where the index should not be used automatically + +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__src_src_index__ PREHOOK: query: ALTER INDEX src_index ON src REBUILD @@ -14,9 +18,11 @@ POSTHOOK: Output: default@default__src_src_index__ POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: query: -- min size too large (src is less than 5G) +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: query: -- min size too large (src is less than 5G) +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] @@ -104,9 +110,11 @@ POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(nam 97 val_97 98 val_98 98 val_98 -PREHOOK: query: EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: query: -- max size too small +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: query: -- max size too small +EXPLAIN SELECT * FROM src WHERE key > 80 AND key < 100 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] @@ -194,9 +202,11 @@ POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(nam 97 val_97 98 val_98 98 val_98 -PREHOOK: query: EXPLAIN SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key +PREHOOK: query: -- OR predicate not supported by compact indexes +EXPLAIN SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key +POSTHOOK: query: -- OR predicate not supported by compact indexes +EXPLAIN SELECT * FROM src WHERE key < 10 OR key > 480 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] @@ -297,9 +307,11 @@ POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(nam 5 val_5 8 val_8 9 val_9 -PREHOOK: query: DROP INDEX src_index on src +PREHOOK: query: -- columns are not covered by indexes +DROP INDEX src_index on src PREHOOK: type: DROPINDEX -POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: query: -- columns are not covered by indexes +DROP INDEX src_index on src POSTHOOK: type: DROPINDEX POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] @@ -432,9 +444,11 @@ POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(nam POSTHOOK: Lineage: default__src_src_val_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_val_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_val_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: query: -- required partitions have not been built yet +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: query: -- required partitions have not been built yet +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__srcpart_src_part_index__ POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] diff --git ql/src/test/results/clientpositive/index_auto_update.q.out ql/src/test/results/clientpositive/index_auto_update.q.out index f4664e4..217b77d 100644 --- ql/src/test/results/clientpositive/index_auto_update.q.out +++ ql/src/test/results/clientpositive/index_auto_update.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: query: -- Test if index is actually being used. + +-- Create temp, and populate it with some values in src. +CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: query: -- Test if index is actually being used. + +-- Create temp, and populate it with some values in src. +CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@temp PREHOOK: query: INSERT OVERWRITE TABLE temp SELECT * FROM src WHERE key < 50 @@ -13,9 +19,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@temp POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: query: -- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: query: -- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__temp_temp_index__ POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -33,9 +41,11 @@ POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.Fi POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE temp SELECT * FROM src +PREHOOK: query: -- overwrite temp table so index is out of date +EXPLAIN INSERT OVERWRITE TABLE temp SELECT * FROM src PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE temp SELECT * FROM src +POSTHOOK: query: -- overwrite temp table so index is out of date +EXPLAIN INSERT OVERWRITE TABLE temp SELECT * FROM src POSTHOOK: type: QUERY POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] @@ -245,9 +255,11 @@ POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN SELECT * FROM temp WHERE key = 86 +PREHOOK: query: -- query should return indexed values +EXPLAIN SELECT * FROM temp WHERE key = 86 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT * FROM temp WHERE key = 86 +POSTHOOK: query: -- query should return indexed values +EXPLAIN SELECT * FROM temp WHERE key = 86 POSTHOOK: type: QUERY POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] diff --git ql/src/test/results/clientpositive/index_bitmap_auto.q.out ql/src/test/results/clientpositive/index_bitmap_auto.q.out index 31b30cc..5526cb8 100644 --- ql/src/test/results/clientpositive/index_bitmap_auto.q.out +++ ql/src/test/results/clientpositive/index_bitmap_auto.q.out @@ -1,18 +1,24 @@ -PREHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing +-- without indexing +SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing +-- without indexing +SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 0 val_0 0 val_0 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- create indices +EXPLAIN CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- create indices +EXPLAIN CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX ABSTRACT SYNTAX TREE: @@ -113,7 +119,8 @@ POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldS POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] #### A masked pattern was here #### -PREHOOK: query: EXPLAIN +PREHOOK: query: -- manual indexing +EXPLAIN SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ WHERE key = 0) a @@ -124,7 +131,8 @@ FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bit a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- manual indexing +EXPLAIN SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ WHERE key = 0) a diff --git ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out index e64ac39..a014e39 100644 --- ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out +++ ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE INDEX src_part_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: query: -- test automatic use of index on table with partitions +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: CREATE INDEX src_part_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: query: -- test automatic use of index on table with partitions +CREATE INDEX src_part_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__srcpart_src_part_index__ PREHOOK: query: ALTER INDEX src_part_index ON srcpart REBUILD diff --git ql/src/test/results/clientpositive/index_bitmap_compression.q.out ql/src/test/results/clientpositive/index_bitmap_compression.q.out index 5562f91..abb0b09 100644 --- ql/src/test/results/clientpositive/index_bitmap_compression.q.out +++ ql/src/test/results/clientpositive/index_bitmap_compression.q.out @@ -15,9 +15,11 @@ POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldS POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] diff --git ql/src/test/results/clientpositive/index_compression.q.out ql/src/test/results/clientpositive/index_compression.q.out index 2ff3ac2..979a45a 100644 --- ql/src/test/results/clientpositive/index_compression.q.out +++ ql/src/test/results/clientpositive/index_compression.q.out @@ -14,9 +14,11 @@ POSTHOOK: Output: default@default__src_src_index__ POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT key, value FROM src WHERE key > 80 AND key < 100 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] diff --git ql/src/test/results/clientpositive/index_stale.q.out ql/src/test/results/clientpositive/index_stale.q.out index 1efc9d7..81aa310 100644 --- ql/src/test/results/clientpositive/index_stale.q.out +++ ql/src/test/results/clientpositive/index_stale.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: query: -- test that stale indexes are not used + +CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: query: -- test that stale indexes are not used + +CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@temp PREHOOK: query: INSERT OVERWRITE TABLE temp SELECT * FROM src WHERE key < 50 @@ -13,9 +17,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@temp POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: query: -- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: query: -- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__temp_temp_index__ POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -33,11 +39,13 @@ POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.Fi POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE temp SELECT * FROM src +PREHOOK: query: -- overwrite temp table so index is out of date +INSERT OVERWRITE TABLE temp SELECT * FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@temp -POSTHOOK: query: INSERT OVERWRITE TABLE temp SELECT * FROM src +POSTHOOK: query: -- overwrite temp table so index is out of date +INSERT OVERWRITE TABLE temp SELECT * FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@temp @@ -48,9 +56,11 @@ POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN SELECT * FROM temp WHERE key = 86 +PREHOOK: query: -- should return correct results bypassing index +EXPLAIN SELECT * FROM temp WHERE key = 86 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT * FROM temp WHERE key = 86 +POSTHOOK: query: -- should return correct results bypassing index +EXPLAIN SELECT * FROM temp WHERE key = 86 POSTHOOK: type: QUERY POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] diff --git ql/src/test/results/clientpositive/index_stale_partitioned.q.out ql/src/test/results/clientpositive/index_stale_partitioned.q.out index 281baac..f76401b 100644 --- ql/src/test/results/clientpositive/index_stale_partitioned.q.out +++ ql/src/test/results/clientpositive/index_stale_partitioned.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE temp(key STRING, val STRING) PARTITIONED BY (foo string) STORED AS TEXTFILE +PREHOOK: query: -- Test if index is actually being used. + +-- Create temp, and populate it with some values in src. +CREATE TABLE temp(key STRING, val STRING) PARTITIONED BY (foo string) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE temp(key STRING, val STRING) PARTITIONED BY (foo string) STORED AS TEXTFILE +POSTHOOK: query: -- Test if index is actually being used. + +-- Create temp, and populate it with some values in src. +CREATE TABLE temp(key STRING, val STRING) PARTITIONED BY (foo string) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@temp PREHOOK: query: ALTER TABLE temp ADD PARTITION (foo = 'bar') @@ -20,9 +26,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@temp@foo=bar POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: query: -- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD PREHOOK: type: CREATEINDEX -POSTHOOK: query: CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: query: -- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD POSTHOOK: type: CREATEINDEX POSTHOOK: Output: default@default__temp_temp_index__ POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -42,11 +50,13 @@ POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._offsets EXPRES POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar).key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE temp PARTITION (foo = 'bar') SELECT * FROM src +PREHOOK: query: -- overwrite temp table so index is out of date +INSERT OVERWRITE TABLE temp PARTITION (foo = 'bar') SELECT * FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@temp@foo=bar -POSTHOOK: query: INSERT OVERWRITE TABLE temp PARTITION (foo = 'bar') SELECT * FROM src +POSTHOOK: query: -- overwrite temp table so index is out of date +INSERT OVERWRITE TABLE temp PARTITION (foo = 'bar') SELECT * FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@temp@foo=bar @@ -57,12 +67,14 @@ POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT * FROM default__temp_temp_index__ WHERE key = 86 AND foo='bar' +PREHOOK: query: -- query should not return any values +SELECT * FROM default__temp_temp_index__ WHERE key = 86 AND foo='bar' PREHOOK: type: QUERY PREHOOK: Input: default@default__temp_temp_index__ PREHOOK: Input: default@default__temp_temp_index__@foo=bar #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM default__temp_temp_index__ WHERE key = 86 AND foo='bar' +POSTHOOK: query: -- query should not return any values +SELECT * FROM default__temp_temp_index__ WHERE key = 86 AND foo='bar' POSTHOOK: type: QUERY POSTHOOK: Input: default@default__temp_temp_index__ POSTHOOK: Input: default@default__temp_temp_index__@foo=bar diff --git ql/src/test/results/clientpositive/infer_bucket_sort.q.out ql/src/test/results/clientpositive/infer_bucket_sort.q.out index 7200ec5..0a27b8a 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -1,14 +1,22 @@ -PREHOOK: query: CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by, should be bucketed and sorted by group by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by, should be bucketed and sorted by group by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -55,12 +63,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key, value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -111,12 +121,14 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test join, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test join, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -171,12 +183,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -235,12 +249,14 @@ Bucket Columns: [key, value] Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -303,12 +319,14 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -375,12 +393,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -451,12 +471,14 @@ Bucket Columns: [value] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test distribute by, should only be bucketed by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src DISTRIBUTE BY key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test distribute by, should only be bucketed by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src DISTRIBUTE BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -531,12 +553,14 @@ Bucket Columns: [key] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test sort by, should be sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src SORT BY key ASC PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test sort by, should be sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src SORT BY key ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -615,12 +639,14 @@ Bucket Columns: [] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test sort by desc, should be sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src SORT BY key DESC PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test sort by desc, should be sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src SORT BY key DESC POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -703,12 +729,14 @@ Bucket Columns: [] Sort Columns: [Order(col:key, order:0)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test cluster by, should be bucketed and sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src CLUSTER BY key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test cluster by, should be bucketed and sorted by key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src CLUSTER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -795,12 +823,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test distribute by and sort by different keys, should be bucketed by one key sorted by the other +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src DISTRIBUTE BY key SORT BY value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test distribute by and sort by different keys, should be bucketed by one key sorted by the other +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src DISTRIBUTE BY key SORT BY value POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -891,12 +921,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test join in simple subquery, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value from (SELECT a.key, b.value FROM src a JOIN src b ON (a.key = b.key)) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test join in simple subquery, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value from (SELECT a.key, b.value FROM src a JOIN src b ON (a.key = b.key)) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -991,12 +1023,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test join in simple subquery renaming key column, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT k, value FROM (SELECT a.key as k, b.value FROM src a JOIN src b ON (a.key = b.key)) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test join in simple subquery renaming key column, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT k, value FROM (SELECT a.key as k, b.value FROM src a JOIN src b ON (a.key = b.key)) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -1095,12 +1129,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by in simple subquery, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, cnt from (SELECT key, count(*) as cnt FROM src GROUP BY key) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by in simple subquery, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, cnt from (SELECT key, count(*) as cnt FROM src GROUP BY key) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -1203,12 +1239,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by in simple subquery renaming key column, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT k, cnt FROM (SELECT key as k, count(*) as cnt FROM src GROUP BY key) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by in simple subquery renaming key column, should be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT k, cnt FROM (SELECT key as k, count(*) as cnt FROM src GROUP BY key) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -1315,12 +1353,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -1431,12 +1471,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by in subquery with expression on value, should still be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value + 1 FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by in subquery with expression on value, should still be bucketed and sorted on key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value + 1 FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -1551,12 +1593,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -1675,12 +1719,16 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the +-- key of the outer group by +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the +-- key of the outer group by +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -1803,12 +1851,16 @@ Bucket Columns: [value] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and +-- sorted by the column the group by key ends up in +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and +-- sorted by the column the group by key ends up in +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -1935,12 +1987,14 @@ Bucket Columns: [value] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -2071,12 +2125,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -2211,12 +2267,14 @@ Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by in subquery followed by transform script, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT TRANSFORM (a.key, a.value) USING 'cat' AS (key, value) FROM (SELECT key, count(1) AS value FROM src GROUP BY KEY) a PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by in subquery followed by transform script, should not be bucketed or sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT TRANSFORM (a.key, a.value) USING 'cat' AS (key, value) FROM (SELECT key, count(1) AS value FROM src GROUP BY KEY) a POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -2355,12 +2413,14 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Test group by on function, should be bucketed and sorted by key and value because the function is applied in the mapper +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT concat(key, "a") AS key, value, count(*) FROM src GROUP BY concat(key, "a"), value) a PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Test group by on function, should be bucketed and sorted by key and value because the function is applied in the mapper +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT concat(key, "a") AS key, value, count(*) FROM src GROUP BY concat(key, "a"), value) a POSTHOOK: type: QUERY POSTHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out index 52a0e20..af5b840 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out @@ -1,14 +1,24 @@ -PREHOOK: query: CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where joins may be auto converted to map joins. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where joins may be auto converted to map joins. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -55,7 +65,10 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +PREHOOK: query: -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin. +-- Tests a join which is not converted to a map join, the output should be bucketed and sorted. + +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -72,7 +85,10 @@ Logs: #### A masked pattern was here #### FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.MapRedTask -POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +POSTHOOK: query: -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin. +-- Tests a join which is not converted to a map join, the output should be bucketed and sorted. + +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index 43d4120..f016223 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -1,6 +1,14 @@ -PREHOOK: query: CREATE TABLE test_table_out (key STRING, value STRING, agg STRING) PARTITIONED BY (part STRING) +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, in particular, this tests +-- the grouping operators rollup/cube/grouping sets + +CREATE TABLE test_table_out (key STRING, value STRING, agg STRING) PARTITIONED BY (part STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table_out (key STRING, value STRING, agg STRING) PARTITIONED BY (part STRING) +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata, in particular, this tests +-- the grouping operators rollup/cube/grouping sets + +CREATE TABLE test_table_out (key STRING, value STRING, agg STRING) PARTITIONED BY (part STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table_out PREHOOK: query: CREATE TABLE test_table_out_2 (key STRING, value STRING, grouping_key STRING, agg STRING) PARTITIONED BY (part STRING) @@ -8,10 +16,12 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE test_table_out_2 (key STRING, value STRING, grouping_key STRING, agg STRING) PARTITIONED BY (part STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table_out_2 -PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +PREHOOK: query: -- Test rollup, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +POSTHOOK: query: -- Test rollup, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -172,12 +182,16 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +PREHOOK: query: -- Test rollup, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH ROLLUP PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table_out_2@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +POSTHOOK: query: -- Test rollup, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH ROLLUP POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -236,10 +250,12 @@ Bucket Columns: [key, value, grouping_key] Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:grouping_key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +PREHOOK: query: -- Test cube, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +POSTHOOK: query: -- Test cube, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] @@ -421,12 +437,16 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +PREHOOK: query: -- Test cube, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH CUBE PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table_out_2@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +POSTHOOK: query: -- Test cube, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH CUBE POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -499,10 +519,12 @@ Bucket Columns: [key, value, grouping_key] Sort Columns: [Order(col:key, order:1), Order(col:value, order:1), Order(col:grouping_key, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +PREHOOK: query: -- Test grouping sets, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') +POSTHOOK: query: -- Test grouping sets, should not be bucketed or sorted because its missing the grouping ID +EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table_out PARTITION(part=1).agg EXPRESSION [(src)src.null, ] @@ -705,12 +727,16 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +PREHOOK: query: -- Test grouping sets, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table_out_2@part=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') +POSTHOOK: query: -- Test grouping sets, should be bucketed and sorted on key, value, grouping_key + +INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1') SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out index 845011b..149afb9 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out @@ -1,16 +1,28 @@ -PREHOOK: query: CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where multi insert is used. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where multi insert is used. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table -PREHOOK: query: FROM src +PREHOOK: query: -- Simple case, neither partition should be bucketed or sorted + +FROM src INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT value, key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 PREHOOK: Output: default@test_table@part=2 -POSTHOOK: query: FROM src +POSTHOOK: query: -- Simple case, neither partition should be bucketed or sorted + +FROM src INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT value, key POSTHOOK: type: QUERY @@ -105,14 +117,18 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: FROM src +PREHOOK: query: -- The partitions should be bucketed and sorted by different keys + +FROM src INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT COUNT(*), value GROUP BY value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 PREHOOK: Output: default@test_table@part=2 -POSTHOOK: query: FROM src +POSTHOOK: query: -- The partitions should be bucketed and sorted by different keys + +FROM src INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT COUNT(*), value GROUP BY value POSTHOOK: type: QUERY @@ -219,14 +235,18 @@ Bucket Columns: [value] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 -PREHOOK: query: FROM src +PREHOOK: query: -- The first partition should be bucketed and sorted, the second should not + +FROM src INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 PREHOOK: Output: default@test_table@part=2 -POSTHOOK: query: FROM src +POSTHOOK: query: -- The first partition should be bucketed and sorted, the second should not + +FROM src INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, value POSTHOOK: type: QUERY @@ -345,14 +365,18 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: FROM src +PREHOOK: query: -- Test the multi group by single reducer optimization +-- Both partitions should be bucketed by key +FROM src INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, SUM(SUBSTR(value, 5)) GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 PREHOOK: Output: default@test_table@part=2 -POSTHOOK: query: FROM src +POSTHOOK: query: -- Test the multi group by single reducer optimization +-- Both partitions should be bucketed by key +FROM src INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, SUM(SUBSTR(value, 5)) GROUP BY key POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/infer_const_type.q.out ql/src/test/results/clientpositive/infer_const_type.q.out index dfb4695..641f908 100644 --- ql/src/test/results/clientpositive/infer_const_type.q.out +++ ql/src/test/results/clientpositive/infer_const_type.q.out @@ -112,7 +112,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@infertypes #### A masked pattern was here #### 127 32767 12345 -12345 906.0 -307.0 1234 -PREHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE +PREHOOK: query: -- all should return false as all numbers exceeed the largest number +-- which could be represented by the corresponding type +-- and string_col = long_const should return false +EXPLAIN SELECT * FROM infertypes WHERE ti = '128' OR si = 32768 OR i = '2147483648' OR @@ -120,7 +123,10 @@ PREHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE fl = 'float' OR db = 'double' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE +POSTHOOK: query: -- all should return false as all numbers exceeed the largest number +-- which could be represented by the corresponding type +-- and string_col = long_const should return false +EXPLAIN SELECT * FROM infertypes WHERE ti = '128' OR si = 32768 OR i = '2147483648' OR @@ -195,12 +201,14 @@ POSTHOOK: query: SELECT * FROM infertypes WHERE POSTHOOK: type: QUERY POSTHOOK: Input: default@infertypes #### A masked pattern was here #### -PREHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE +PREHOOK: query: -- for the query like: int_col = double, should return false +EXPLAIN SELECT * FROM infertypes WHERE ti = '127.0' OR si = 327.0 OR i = '-100.0' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT * FROM infertypes WHERE +POSTHOOK: query: -- for the query like: int_col = double, should return false +EXPLAIN SELECT * FROM infertypes WHERE ti = '127.0' OR si = 327.0 OR i = '-100.0' diff --git ql/src/test/results/clientpositive/init_file.q.out ql/src/test/results/clientpositive/init_file.q.out index e7e4508..6b47252 100644 --- ql/src/test/results/clientpositive/init_file.q.out +++ ql/src/test/results/clientpositive/init_file.q.out @@ -3,11 +3,17 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tbl_created_by_init(i int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tbl_created_by_init -PREHOOK: query: select * from tbl_created_by_init +PREHOOK: query: -- tbl_created_by_init is supposed to have been created for us +-- automatically by test_init_file.sql + +select * from tbl_created_by_init PREHOOK: type: QUERY PREHOOK: Input: default@tbl_created_by_init #### A masked pattern was here #### -POSTHOOK: query: select * from tbl_created_by_init +POSTHOOK: query: -- tbl_created_by_init is supposed to have been created for us +-- automatically by test_init_file.sql + +select * from tbl_created_by_init POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_created_by_init #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/innerjoin.q.out ql/src/test/results/clientpositive/innerjoin.q.out index 2365b92..cf8d952 100644 --- ql/src/test/results/clientpositive/innerjoin.q.out +++ ql/src/test/results/clientpositive/innerjoin.q.out @@ -1176,9 +1176,11 @@ POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type: 98 val_98 98 val_98 98 val_98 -PREHOOK: query: create table inner(i int) +PREHOOK: query: -- verify that INNER is a non-reserved word for backwards compatibility +create table inner(i int) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table inner(i int) +POSTHOOK: query: -- verify that INNER is a non-reserved word for backwards compatibility +create table inner(i int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@inner POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/input12_hadoop20.q.out ql/src/test/results/clientpositive/input12_hadoop20.q.out index a615698..5607017 100644 --- ql/src/test/results/clientpositive/input12_hadoop20.q.out +++ ql/src/test/results/clientpositive/input12_hadoop20.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 PREHOOK: query: CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE diff --git ql/src/test/results/clientpositive/input16.q.out ql/src/test/results/clientpositive/input16.q.out index c89fb7e..98524b0 100644 --- ql/src/test/results/clientpositive/input16.q.out +++ ql/src/test/results/clientpositive/input16.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: DROP TABLE INPUT16 +PREHOOK: query: -- TestSerDe is a user defined serde where the default delimiter is Ctrl-B +DROP TABLE INPUT16 PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE INPUT16 +POSTHOOK: query: -- TestSerDe is a user defined serde where the default delimiter is Ctrl-B +DROP TABLE INPUT16 POSTHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE INPUT16(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' STORED AS TEXTFILE PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/input39_hadoop20.q.out ql/src/test/results/clientpositive/input39_hadoop20.q.out index 7490641..d79aeac 100644 --- ql/src/test/results/clientpositive/input39_hadoop20.q.out +++ ql/src/test/results/clientpositive/input39_hadoop20.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: create table t1(key string, value string) partitioned by (ds string) +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + + +create table t1(key string, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table t1(key string, value string) partitioned by (ds string) +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + + +create table t1(key string, value string) partitioned by (ds string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@t1 PREHOOK: query: create table t2(key string, value string) partitioned by (ds string) diff --git ql/src/test/results/clientpositive/input_part10.q.out ql/src/test/results/clientpositive/input_part10.q.out index 0362d03..f972ca5 100644 --- ql/src/test/results/clientpositive/input_part10.q.out +++ ql/src/test/results/clientpositive/input_part10.q.out @@ -1,4 +1,7 @@ -PREHOOK: query: CREATE TABLE part_special ( +PREHOOK: query: -- EXCLUDE_OS_WINDOWS +-- excluded on windows because of difference in file name encoding logic + +CREATE TABLE part_special ( a STRING, b STRING ) PARTITIONED BY ( @@ -6,7 +9,10 @@ PREHOOK: query: CREATE TABLE part_special ( ts STRING ) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part_special ( +POSTHOOK: query: -- EXCLUDE_OS_WINDOWS +-- excluded on windows because of difference in file name encoding logic + +CREATE TABLE part_special ( a STRING, b STRING ) PARTITIONED BY ( diff --git ql/src/test/results/clientpositive/inputddl4.q.out ql/src/test/results/clientpositive/inputddl4.q.out index 9b88a3d..3785444 100644 --- ql/src/test/results/clientpositive/inputddl4.q.out +++ ql/src/test/results/clientpositive/inputddl4.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: CREATE TABLE INPUTDDL4(viewTime STRING, userid INT, +PREHOOK: query: -- a simple test to test sorted/clustered syntax + +CREATE TABLE INPUTDDL4(viewTime STRING, userid INT, page_url STRING, referrer_url STRING, friends ARRAY, properties MAP, ip STRING COMMENT 'IP Address of the User') @@ -6,7 +8,9 @@ PREHOOK: query: CREATE TABLE INPUTDDL4(viewTime STRING, userid INT, PARTITIONED BY(ds STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE INPUTDDL4(viewTime STRING, userid INT, +POSTHOOK: query: -- a simple test to test sorted/clustered syntax + +CREATE TABLE INPUTDDL4(viewTime STRING, userid INT, page_url STRING, referrer_url STRING, friends ARRAY, properties MAP, ip STRING COMMENT 'IP Address of the User') diff --git ql/src/test/results/clientpositive/inputddl5.q.out ql/src/test/results/clientpositive/inputddl5.q.out index 95661fc..99a7bfe 100644 --- ql/src/test/results/clientpositive/inputddl5.q.out +++ ql/src/test/results/clientpositive/inputddl5.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE INPUTDDL5(name STRING) STORED AS TEXTFILE +PREHOOK: query: -- test for internationalization +-- kv4.txt contains the utf-8 character 0xE982B5E993AE which we are verifying later on +CREATE TABLE INPUTDDL5(name STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE INPUTDDL5(name STRING) STORED AS TEXTFILE +POSTHOOK: query: -- test for internationalization +-- kv4.txt contains the utf-8 character 0xE982B5E993AE which we are verifying later on +CREATE TABLE INPUTDDL5(name STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@INPUTDDL5 PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv4.txt' INTO TABLE INPUTDDL5 diff --git ql/src/test/results/clientpositive/inputddl6.q.out ql/src/test/results/clientpositive/inputddl6.q.out index 83074db..52b8f0a 100644 --- ql/src/test/results/clientpositive/inputddl6.q.out +++ ql/src/test/results/clientpositive/inputddl6.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE INPUTDDL6(KEY STRING, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE +PREHOOK: query: -- test for describe extended table +-- test for describe extended table partition +-- test for alter table drop partition +CREATE TABLE INPUTDDL6(KEY STRING, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE INPUTDDL6(KEY STRING, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE +POSTHOOK: query: -- test for describe extended table +-- test for describe extended table partition +-- test for alter table drop partition +CREATE TABLE INPUTDDL6(KEY STRING, VALUE STRING) PARTITIONED BY(ds STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@INPUTDDL6 PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE INPUTDDL6 PARTITION (ds='2008-04-09') diff --git ql/src/test/results/clientpositive/inputddl7.q.out ql/src/test/results/clientpositive/inputddl7.q.out index cb081d0..d8e3215 100644 --- ql/src/test/results/clientpositive/inputddl7.q.out +++ ql/src/test/results/clientpositive/inputddl7.q.out @@ -1,6 +1,14 @@ -PREHOOK: query: CREATE TABLE T1(name STRING) STORED AS TEXTFILE +PREHOOK: query: -- test for loading into tables with the correct file format +-- test for loading into partitions with the correct file format + + +CREATE TABLE T1(name STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE T1(name STRING) STORED AS TEXTFILE +POSTHOOK: query: -- test for loading into tables with the correct file format +-- test for loading into partitions with the correct file format + + +CREATE TABLE T1(name STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T1 PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1 diff --git ql/src/test/results/clientpositive/insert1.q.out ql/src/test/results/clientpositive/insert1.q.out index 577d941..0c57916 100644 --- ql/src/test/results/clientpositive/insert1.q.out +++ ql/src/test/results/clientpositive/insert1.q.out @@ -220,9 +220,11 @@ STAGE PLANS: #### A masked pattern was here #### -PREHOOK: query: create database x +PREHOOK: query: -- HIVE-3465 +create database x PREHOOK: type: CREATEDATABASE -POSTHOOK: query: create database x +POSTHOOK: query: -- HIVE-3465 +create database x POSTHOOK: type: CREATEDATABASE POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] @@ -622,9 +624,11 @@ STAGE PLANS: #### A masked pattern was here #### -PREHOOK: query: CREATE DATABASE db2 +PREHOOK: query: -- HIVE-3676 +CREATE DATABASE db2 PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE db2 +POSTHOOK: query: -- HIVE-3676 +CREATE DATABASE db2 POSTHOOK: type: CREATEDATABASE POSTHOOK: Lineage: insert1.key SIMPLE [(insert2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: insert1.value SIMPLE [(insert2)a.FieldSchema(name:value, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/join14_hadoop20.q.out ql/src/test/results/clientpositive/join14_hadoop20.q.out index c54d033..41856dd 100644 --- ql/src/test/results/clientpositive/join14_hadoop20.q.out +++ ql/src/test/results/clientpositive/join14_hadoop20.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 PREHOOK: query: EXPLAIN diff --git ql/src/test/results/clientpositive/join28.q.out ql/src/test/results/clientpositive/join28.q.out index d2d33ab..4f1a892 100644 --- ql/src/test/results/clientpositive/join28.q.out +++ ql/src/test/results/clientpositive/join28.q.out @@ -3,7 +3,9 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value FROM @@ -11,7 +13,9 @@ FROM FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value FROM diff --git ql/src/test/results/clientpositive/join29.q.out ql/src/test/results/clientpositive/join29.q.out index dbd578d..6f08274 100644 --- ql/src/test/results/clientpositive/join29.q.out +++ ql/src/test/results/clientpositive/join29.q.out @@ -3,13 +3,17 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git ql/src/test/results/clientpositive/join31.q.out ql/src/test/results/clientpositive/join31.q.out index 8d91930..ac3abca 100644 --- ql/src/test/results/clientpositive/join31.q.out +++ ql/src/test/results/clientpositive/join31.q.out @@ -3,14 +3,18 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) group by subq1.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git ql/src/test/results/clientpositive/join32.q.out ql/src/test/results/clientpositive/join32.q.out index ec73ecc..92d81b9 100644 --- ql/src/test/results/clientpositive/join32.q.out +++ ql/src/test/results/clientpositive/join32.q.out @@ -3,13 +3,17 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git ql/src/test/results/clientpositive/join33.q.out ql/src/test/results/clientpositive/join33.q.out index ec73ecc..92d81b9 100644 --- ql/src/test/results/clientpositive/join33.q.out +++ ql/src/test/results/clientpositive/join33.q.out @@ -3,13 +3,17 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git ql/src/test/results/clientpositive/join34.q.out ql/src/test/results/clientpositive/join34.q.out index b2e6410..c108931 100644 --- ql/src/test/results/clientpositive/join34.q.out +++ ql/src/test/results/clientpositive/join34.q.out @@ -3,7 +3,9 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.value FROM @@ -13,7 +15,9 @@ FROM ) subq1 JOIN src1 x ON (x.key = subq1.key) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.value FROM diff --git ql/src/test/results/clientpositive/join35.q.out ql/src/test/results/clientpositive/join35.q.out index d1adf68..7074628 100644 --- ql/src/test/results/clientpositive/join35.q.out +++ ql/src/test/results/clientpositive/join35.q.out @@ -3,7 +3,9 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.cnt FROM @@ -13,7 +15,9 @@ FROM ) subq1 JOIN src1 x ON (x.key = subq1.key) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.cnt FROM diff --git ql/src/test/results/clientpositive/join41.q.out ql/src/test/results/clientpositive/join41.q.out index 3fb6670..a10b7a9 100644 --- ql/src/test/results/clientpositive/join41.q.out +++ ql/src/test/results/clientpositive/join41.q.out @@ -103,10 +103,12 @@ POSTHOOK: Input: default@s1 0 val_0 NULL NULL 0 val_0 NULL NULL 0 val_0 NULL NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Make sure the big table is chosen correctly as part of HIVE-4146 +EXPLAIN SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Make sure the big table is chosen correctly as part of HIVE-4146 +EXPLAIN SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/join_filters_overlap.q.out ql/src/test/results/clientpositive/join_filters_overlap.q.out index 62917ce..4f79d38 100644 --- ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -1,13 +1,19 @@ -PREHOOK: query: create table a as SELECT 100 as key, a.value as value FROM src LATERAL VIEW explode(array(40, 50, 60)) a as value limit 3 +PREHOOK: query: -- HIVE-3411 Filter predicates on outer join overlapped on single alias is not handled properly + +create table a as SELECT 100 as key, a.value as value FROM src LATERAL VIEW explode(array(40, 50, 60)) a as value limit 3 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src -POSTHOOK: query: create table a as SELECT 100 as key, a.value as value FROM src LATERAL VIEW explode(array(40, 50, 60)) a as value limit 3 +POSTHOOK: query: -- HIVE-3411 Filter predicates on outer join overlapped on single alias is not handled properly + +create table a as SELECT 100 as key, a.value as value FROM src LATERAL VIEW explode(array(40, 50, 60)) a as value limit 3 POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: default@a -PREHOOK: query: explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +PREHOOK: query: -- overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +POSTHOOK: query: -- overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) 50)) (= (. (TOK_TABLE_OR_COL b) value) 50))) (TOK_TABREF (TOK_TABNAME a) c) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)) (= (. (TOK_TABLE_OR_COL a) value) 60)) (= (. (TOK_TABLE_OR_COL c) value) 60)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) @@ -209,9 +215,11 @@ POSTHOOK: Input: default@a 100 40 NULL NULL NULL NULL 100 50 100 50 NULL NULL 100 60 NULL NULL 100 60 -PREHOOK: query: explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +PREHOOK: query: -- overlap on b +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +POSTHOOK: query: -- overlap on b +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) 50)) (= (. (TOK_TABLE_OR_COL b) value) 50))) (TOK_TABREF (TOK_TABNAME a) c) (AND (AND (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)) (= (. (TOK_TABLE_OR_COL b) value) 60)) (= (. (TOK_TABLE_OR_COL c) value) 60)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) @@ -413,9 +421,11 @@ POSTHOOK: Input: default@a NULL NULL 100 40 NULL NULL 100 50 100 50 NULL NULL NULL NULL 100 60 100 60 -PREHOOK: query: explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +PREHOOK: query: -- overlap on b with two filters for each +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +POSTHOOK: query: -- overlap on b with two filters for each +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (AND (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) 50)) (= (. (TOK_TABLE_OR_COL b) value) 50)) (> (. (TOK_TABLE_OR_COL b) value) 10))) (TOK_TABREF (TOK_TABNAME a) c) (AND (AND (AND (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)) (= (. (TOK_TABLE_OR_COL b) value) 60)) (> (. (TOK_TABLE_OR_COL b) value) 20)) (= (. (TOK_TABLE_OR_COL c) value) 60)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) @@ -617,9 +627,11 @@ POSTHOOK: Input: default@a NULL NULL 100 40 NULL NULL 100 50 100 50 NULL NULL NULL NULL 100 60 100 60 -PREHOOK: query: explain extended select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: query: -- overlap on a, b +explain extended select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: query: -- overlap on a, b +explain extended select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) 50)) (= (. (TOK_TABLE_OR_COL b) value) 50))) (TOK_TABREF (TOK_TABNAME a) c) (AND (AND (= (. (TOK_TABLE_OR_COL b) key) (. (TOK_TABLE_OR_COL c) key)) (= (. (TOK_TABLE_OR_COL b) value) 60)) (= (. (TOK_TABLE_OR_COL c) value) 60))) (TOK_TABREF (TOK_TABNAME a) d) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL a) value) 40)) (= (. (TOK_TABLE_OR_COL d) value) 40)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) @@ -838,9 +850,11 @@ NULL NULL 100 40 NULL NULL NULL NULL NULL NULL 100 60 100 60 NULL NULL 100 50 100 50 NULL NULL NULL NULL 100 60 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: query: -- triple overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: query: -- triple overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME a)) (TOK_TABREF (TOK_TABNAME a) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) 50)) (= (. (TOK_TABLE_OR_COL b) value) 50))) (TOK_TABREF (TOK_TABNAME a) c) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)) (= (. (TOK_TABLE_OR_COL a) value) 60)) (= (. (TOK_TABLE_OR_COL c) value) 60))) (TOK_TABREF (TOK_TABNAME a) d) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL a) value) 40)) (= (. (TOK_TABLE_OR_COL d) value) 40)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) diff --git ql/src/test/results/clientpositive/join_nullsafe.q.out ql/src/test/results/clientpositive/join_nullsafe.q.out index 413607e..2844571 100644 --- ql/src/test/results/clientpositive/join_nullsafe.q.out +++ ql/src/test/results/clientpositive/join_nullsafe.q.out @@ -9,9 +9,11 @@ PREHOOK: Output: default@myinput1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/in8.txt' INTO TABLE myinput1 POSTHOOK: type: LOAD POSTHOOK: Output: default@myinput1 -PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value +PREHOOK: query: -- merging +explain select * from myinput1 a join myinput1 b on a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value PREHOOK: type: QUERY -POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value +POSTHOOK: query: -- merging +explain select * from myinput1 a join myinput1 b on a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME myinput1) a) (TOK_TABREF (TOK_TABNAME myinput1) b) (<=> (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) value)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL b) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL b) value))))) @@ -829,11 +831,13 @@ NULL NULL NULL NULL NULL NULL NULL 10 10 NULL NULL 10 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value +PREHOOK: query: -- outer joins +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value +POSTHOOK: query: -- outer joins +SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### @@ -890,11 +894,13 @@ NULL 35 48 NULL 10 NULL NULL 10 48 NULL NULL NULL 100 100 100 100 -PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value +PREHOOK: query: -- map joins +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 #### A masked pattern was here #### -POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value +POSTHOOK: query: -- map joins +SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value ORDER BY a.key, a.value, b.key, b.value POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### @@ -928,9 +934,11 @@ NULL 35 10 NULL NULL 35 48 NULL 10 NULL NULL 10 100 100 100 100 -PREHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: query: -- smbs +CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: query: -- smbs +CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@smb_input1 PREHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS diff --git ql/src/test/results/clientpositive/join_vc.q.out ql/src/test/results/clientpositive/join_vc.q.out index e70d418..f0f5905 100644 --- ql/src/test/results/clientpositive/join_vc.q.out +++ ql/src/test/results/clientpositive/join_vc.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: explain select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.value limit 3 +PREHOOK: query: -- see HIVE-4033 earlier a flag named hasVC was not initialized correctly in MapOperator.java, resulting in NPE for following query. order by and limit in the query is not relevant, problem would be evident even without those. They are there to keep .q.out file small and sorted. + +explain select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.value limit 3 PREHOOK: type: QUERY -POSTHOOK: query: explain select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.value limit 3 +POSTHOOK: query: -- see HIVE-4033 earlier a flag named hasVC was not initialized correctly in MapOperator.java, resulting in NPE for following query. order by and limit in the query is not relevant, problem would be evident even without those. They are there to keep .q.out file small and sorted. + +explain select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.value limit 3 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) t1) (TOK_TABREF (TOK_TABNAME src) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key))) (TOK_TABREF (TOK_TABNAME src) t3) (= (. (TOK_TABLE_OR_COL t2) value) (. (TOK_TABLE_OR_COL t3) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t3) BLOCK__OFFSET__INSIDE__FILE)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t3) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t3) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL t3) value))) (TOK_LIMIT 3))) diff --git ql/src/test/results/clientpositive/join_view.q.out ql/src/test/results/clientpositive/join_view.q.out index 97ab059..6913793 100644 --- ql/src/test/results/clientpositive/join_view.q.out +++ ql/src/test/results/clientpositive/join_view.q.out @@ -16,9 +16,13 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table invites2 (foo int, bar string) partitioned by (ds string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@invites2 -PREHOOK: query: create view v as select invites.bar, invites2.foo, invites2.ds from invites join invites2 on invites.ds=invites2.ds +PREHOOK: query: -- test join views: see HIVE-1989 + +create view v as select invites.bar, invites2.foo, invites2.ds from invites join invites2 on invites.ds=invites2.ds PREHOOK: type: CREATEVIEW -POSTHOOK: query: create view v as select invites.bar, invites2.foo, invites2.ds from invites join invites2 on invites.ds=invites2.ds +POSTHOOK: query: -- test join views: see HIVE-1989 + +create view v as select invites.bar, invites2.foo, invites2.ds from invites join invites2 on invites.ds=invites2.ds POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@v PREHOOK: query: explain select * from v where ds='2011-09-01' diff --git ql/src/test/results/clientpositive/keyword_1.q.out ql/src/test/results/clientpositive/keyword_1.q.out index 2e55c82..8ee3d86 100644 --- ql/src/test/results/clientpositive/keyword_1.q.out +++ ql/src/test/results/clientpositive/keyword_1.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table test_user (user string, `group` string) +PREHOOK: query: -- SORT_BEFORE_DIFF + +create table test_user (user string, `group` string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table test_user (user string, `group` string) +POSTHOOK: query: -- SORT_BEFORE_DIFF + +create table test_user (user string, `group` string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_user PREHOOK: query: grant select on table test_user to user hive_test diff --git ql/src/test/results/clientpositive/lateral_view.q.out ql/src/test/results/clientpositive/lateral_view.q.out index e1cb9b2..61bb6ea 100644 --- ql/src/test/results/clientpositive/lateral_view.q.out +++ ql/src/test/results/clientpositive/lateral_view.q.out @@ -440,21 +440,25 @@ STAGE PLANS: limit: 3 -PREHOOK: query: SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol SORT BY key ASC, myCol ASC LIMIT 1 +PREHOOK: query: -- Verify that * selects columns from both tables +SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol SORT BY key ASC, myCol ASC LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol SORT BY key ASC, myCol ASC LIMIT 1 +POSTHOOK: query: -- Verify that * selects columns from both tables +SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol SORT BY key ASC, myCol ASC LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] 0 val_0 1 -PREHOOK: query: SELECT myTable.* FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LIMIT 3 +PREHOOK: query: -- TABLE.* should be supported +SELECT myTable.* FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT myTable.* FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LIMIT 3 +POSTHOOK: query: -- TABLE.* should be supported +SELECT myTable.* FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -462,11 +466,13 @@ POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, ty 1 2 3 -PREHOOK: query: SELECT myTable.myCol, myTable2.myCol2 FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array('a', 'b', 'c')) myTable2 AS myCol2 LIMIT 9 +PREHOOK: query: -- Multiple lateral views should result in a Cartesian product +SELECT myTable.myCol, myTable2.myCol2 FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array('a', 'b', 'c')) myTable2 AS myCol2 LIMIT 9 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT myTable.myCol, myTable2.myCol2 FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array('a', 'b', 'c')) myTable2 AS myCol2 LIMIT 9 +POSTHOOK: query: -- Multiple lateral views should result in a Cartesian product +SELECT myTable.myCol, myTable2.myCol2 FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array('a', 'b', 'c')) myTable2 AS myCol2 LIMIT 9 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -480,11 +486,13 @@ POSTHOOK: Lineage: tmp_pyang_lv.inputs SIMPLE [(src)src.FieldSchema(name:key, ty 3 a 3 b 3 c -PREHOOK: query: SELECT myTable2.* FROM src LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 LIMIT 3 +PREHOOK: query: -- Should be able to reference tables generated earlier +SELECT myTable2.* FROM src LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT myTable2.* FROM src LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 LIMIT 3 +POSTHOOK: query: -- Should be able to reference tables generated earlier +SELECT myTable2.* FROM src LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/lateral_view_cp.q.out ql/src/test/results/clientpositive/lateral_view_cp.q.out index 217c21a..7c342c1 100644 --- ql/src/test/results/clientpositive/lateral_view_cp.q.out +++ ql/src/test/results/clientpositive/lateral_view_cp.q.out @@ -19,9 +19,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@array_valued_src POSTHOOK: Lineage: array_valued_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: array_valued_src.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain select count(val) from (select a.key as key, b.value as array_val from src a join array_valued_src b on a.key=b.key) i lateral view explode (array_val) c as val +PREHOOK: query: -- replace sel(*) to sel(exprs) for reflecting CP result properly +explain select count(val) from (select a.key as key, b.value as array_val from src a join array_valued_src b on a.key=b.key) i lateral view explode (array_val) c as val PREHOOK: type: QUERY -POSTHOOK: query: explain select count(val) from (select a.key as key, b.value as array_val from src a join array_valued_src b on a.key=b.key) i lateral view explode (array_val) c as val +POSTHOOK: query: -- replace sel(*) to sel(exprs) for reflecting CP result properly +explain select count(val) from (select a.key as key, b.value as array_val from src a join array_valued_src b on a.key=b.key) i lateral view explode (array_val) c as val POSTHOOK: type: QUERY POSTHOOK: Lineage: array_valued_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: array_valued_src.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/leadlag.q.out ql/src/test/results/clientpositive/leadlag.q.out index 082823b..439c0ca 100644 --- ql/src/test/results/clientpositive/leadlag.q.out +++ ql/src/test/results/clientpositive/leadlag.q.out @@ -2,7 +2,8 @@ PREHOOK: query: DROP TABLE part PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE part POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE part( +PREHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -14,7 +15,8 @@ PREHOOK: query: CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part( +POSTHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -85,7 +87,8 @@ Manufacturer#5 almond antique medium spring khaki 2 2 1611.66 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 1464.48 7672.66 23 -23 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 2. testLagWithWindowingNoPTF +select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, p_retailprice, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1, @@ -94,7 +97,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 2. testLagWithWindowingNoPTF +select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, p_retailprice, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1, @@ -129,13 +133,15 @@ Manufacturer#5 almond antique medium spring khaki 2 2 1611.66 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 1464.48 7672.66 23 -23 -PREHOOK: query: select p1.p_mfgr, p1.p_name, +PREHOOK: query: -- 3. testJoinWithLag +select p1.p_mfgr, p1.p_name, p1.p_size, p1.p_size - lag(p1.p_size,1,p1.p_size) over( distribute by p1.p_mfgr sort by p1.p_name) as deltaSz from part p1 join part p2 on p1.p_partkey = p2.p_partkey PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p1.p_mfgr, p1.p_name, +POSTHOOK: query: -- 3. testJoinWithLag +select p1.p_mfgr, p1.p_name, p1.p_size, p1.p_size - lag(p1.p_size,1,p1.p_size) over( distribute by p1.p_mfgr sort by p1.p_name) as deltaSz from part p1 join part p2 on p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY @@ -169,14 +175,16 @@ Manufacturer#5 almond antique medium spring khaki 6 -25 Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 4. testLagInSum +select p_mfgr,p_name, p_size, sum(p_size - lag(p_size,1)) over(distribute by p_mfgr sort by p_mfgr ) as deltaSum from part window w1 as (rows between 2 preceding and 2 following) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 4. testLagInSum +select p_mfgr,p_name, p_size, sum(p_size - lag(p_size,1)) over(distribute by p_mfgr sort by p_mfgr ) as deltaSum from part window w1 as (rows between 2 preceding and 2 following) @@ -209,14 +217,16 @@ Manufacturer#5 almond antique medium spring khaki 6 -8 Manufacturer#5 almond antique sky peru orange 2 -8 Manufacturer#5 almond aquamarine dodger light gainsboro 46 -8 Manufacturer#5 almond azure blanched chiffon midnight 23 -8 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 5. testLagInSumOverWindow +select p_mfgr,p_name, p_size, sum(p_size - lag(p_size,1)) over w1 as deltaSum from part window w1 as (distribute by p_mfgr sort by p_mfgr rows between 2 preceding and 2 following) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 5. testLagInSumOverWindow +select p_mfgr,p_name, p_size, sum(p_size - lag(p_size,1)) over w1 as deltaSum from part window w1 as (distribute by p_mfgr sort by p_mfgr rows between 2 preceding and 2 following) @@ -249,7 +259,8 @@ Manufacturer#5 almond antique medium spring khaki 6 15 Manufacturer#5 almond antique sky peru orange 2 -8 Manufacturer#5 almond aquamarine dodger light gainsboro 46 17 Manufacturer#5 almond azure blanched chiffon midnight 23 21 -PREHOOK: query: select p_mfgr, p_name, p_size, r1, +PREHOOK: query: -- 6. testRankInLead +select p_mfgr, p_name, p_size, r1, lead(r1,1,r1) over (distribute by p_mfgr sort by p_name) as deltaRank from ( select p_mfgr, p_name, p_size, @@ -259,7 +270,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, r1, +POSTHOOK: query: -- 6. testRankInLead +select p_mfgr, p_name, p_size, r1, lead(r1,1,r1) over (distribute by p_mfgr sort by p_name) as deltaRank from ( select p_mfgr, p_name, p_size, @@ -295,7 +307,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 3 Manufacturer#5 almond antique sky peru orange 2 3 4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 5 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 7. testLeadWithPTF +select p_mfgr, p_name, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, p_size, p_size - lead(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz @@ -306,7 +319,8 @@ order by p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 7. testLeadWithPTF +select p_mfgr, p_name, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, p_size, p_size - lead(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz @@ -343,7 +357,8 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 4 Manufacturer#5 almond antique sky peru orange 3 3 2 -44 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 23 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 0 -PREHOOK: query: select p_name, p_retailprice, +PREHOOK: query: -- 8. testOverNoPartitionMultipleAggregate +select p_name, p_retailprice, lead(p_retailprice) over() as l1 , lag(p_retailprice) over() as l2 from part @@ -351,7 +366,8 @@ order by p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_name, p_retailprice, +POSTHOOK: query: -- 8. testOverNoPartitionMultipleAggregate +select p_name, p_retailprice, lead(p_retailprice) over() as l1 , lag(p_retailprice) over() as l2 from part diff --git ql/src/test/results/clientpositive/leadlag_queries.q.out ql/src/test/results/clientpositive/leadlag_queries.q.out index 357b26d..75bee1e 100644 --- ql/src/test/results/clientpositive/leadlag_queries.q.out +++ ql/src/test/results/clientpositive/leadlag_queries.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: CREATE TABLE part( +PREHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -10,7 +11,8 @@ PREHOOK: query: CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part( +POSTHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -29,7 +31,8 @@ PREHOOK: Output: default@part POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part POSTHOOK: type: LOAD POSTHOOK: Output: default@part -PREHOOK: query: select p_mfgr, p_retailprice, +PREHOOK: query: -- 1. testLeadUDAF +select p_mfgr, p_retailprice, lead(p_retailprice) over (partition by p_mfgr order by p_name) as l1, lead(p_retailprice,1) over (partition by p_mfgr order by p_name) as l2, lead(p_retailprice,1,10) over (partition by p_mfgr order by p_name) as l3, @@ -39,7 +42,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, +POSTHOOK: query: -- 1. testLeadUDAF +select p_mfgr, p_retailprice, lead(p_retailprice) over (partition by p_mfgr order by p_name) as l1, lead(p_retailprice,1) over (partition by p_mfgr order by p_name) as l2, lead(p_retailprice,1,10) over (partition by p_mfgr order by p_name) as l3, @@ -75,14 +79,16 @@ Manufacturer#5 1611.66 1788.73 1788.73 1788.73 1788.73 -177.06999999999994 Manufacturer#5 1788.73 1018.1 1018.1 1018.1 1018.1 770.63 Manufacturer#5 1018.1 1464.48 1464.48 1464.48 1464.48 -446.38 Manufacturer#5 1464.48 NULL NULL 10.0 1464.48 0.0 -PREHOOK: query: select p_mfgr, p_name, p_retailprice, +PREHOOK: query: -- 2.testLeadUDAFPartSz1 +select p_mfgr, p_name, p_retailprice, lead(p_retailprice,1) over (partition by p_mfgr, p_name ), p_retailprice - lead(p_retailprice,1,p_retailprice) over (partition by p_mfgr, p_name) from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +POSTHOOK: query: -- 2.testLeadUDAFPartSz1 +select p_mfgr, p_name, p_retailprice, lead(p_retailprice,1) over (partition by p_mfgr, p_name ), p_retailprice - lead(p_retailprice,1,p_retailprice) over (partition by p_mfgr, p_name) from part @@ -115,7 +121,8 @@ Manufacturer#5 almond antique medium spring khaki 1611.66 NULL 0.0 Manufacturer#5 almond antique sky peru orange 1788.73 NULL 0.0 Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 NULL 0.0 Manufacturer#5 almond azure blanched chiffon midnight 1464.48 NULL 0.0 -PREHOOK: query: select p_mfgr, p_retailprice, +PREHOOK: query: -- 3.testLagUDAF +select p_mfgr, p_retailprice, lag(p_retailprice,1) over (partition by p_mfgr order by p_name) as l1, lag(p_retailprice) over (partition by p_mfgr order by p_name) as l2, lag(p_retailprice,1, p_retailprice) over (partition by p_mfgr order by p_name) as l3, @@ -125,7 +132,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, +POSTHOOK: query: -- 3.testLagUDAF +select p_mfgr, p_retailprice, lag(p_retailprice,1) over (partition by p_mfgr order by p_name) as l1, lag(p_retailprice) over (partition by p_mfgr order by p_name) as l2, lag(p_retailprice,1, p_retailprice) over (partition by p_mfgr order by p_name) as l3, @@ -161,14 +169,16 @@ Manufacturer#5 1611.66 1789.69 1789.69 1789.69 1789.69 -178.02999999999997 Manufacturer#5 1788.73 1611.66 1611.66 1611.66 1611.66 177.06999999999994 Manufacturer#5 1018.1 1788.73 1788.73 1788.73 1788.73 -770.63 Manufacturer#5 1464.48 1018.1 1018.1 1018.1 1018.1 446.38 -PREHOOK: query: select p_mfgr, p_name, p_retailprice, +PREHOOK: query: -- 4.testLagUDAFPartSz1 +select p_mfgr, p_name, p_retailprice, lag(p_retailprice,1) over (partition by p_mfgr, p_name ), p_retailprice - lag(p_retailprice,1,p_retailprice) over (partition by p_mfgr, p_name) from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +POSTHOOK: query: -- 4.testLagUDAFPartSz1 +select p_mfgr, p_name, p_retailprice, lag(p_retailprice,1) over (partition by p_mfgr, p_name ), p_retailprice - lag(p_retailprice,1,p_retailprice) over (partition by p_mfgr, p_name) from part @@ -201,7 +211,8 @@ Manufacturer#5 almond antique medium spring khaki 1611.66 NULL 0.0 Manufacturer#5 almond antique sky peru orange 1788.73 NULL 0.0 Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 NULL 0.0 Manufacturer#5 almond azure blanched chiffon midnight 1464.48 NULL 0.0 -PREHOOK: query: select p_mfgr, p_retailprice, +PREHOOK: query: -- 5.testLeadLagUDAF +select p_mfgr, p_retailprice, lead(p_retailprice,1) over (partition by p_mfgr order by p_name) as l1, lead(p_retailprice,1, p_retailprice) over (partition by p_mfgr order by p_name) as l2, p_retailprice - lead(p_retailprice,1,p_retailprice) over (partition by p_mfgr order by p_name), @@ -211,7 +222,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, +POSTHOOK: query: -- 5.testLeadLagUDAF +select p_mfgr, p_retailprice, lead(p_retailprice,1) over (partition by p_mfgr order by p_name) as l1, lead(p_retailprice,1, p_retailprice) over (partition by p_mfgr order by p_name) as l2, p_retailprice - lead(p_retailprice,1,p_retailprice) over (partition by p_mfgr order by p_name), diff --git ql/src/test/results/clientpositive/load_dyn_part14.q.out ql/src/test/results/clientpositive/load_dyn_part14.q.out index 650c622..23d8010 100644 --- ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -1,7 +1,13 @@ -PREHOOK: query: create table if not exists nzhang_part14 (key string) +PREHOOK: query: -- EXCLUDE_OS_WINDOWS +-- excluded on windows because of difference in file name encoding logic + +create table if not exists nzhang_part14 (key string) partitioned by (value string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table if not exists nzhang_part14 (key string) +POSTHOOK: query: -- EXCLUDE_OS_WINDOWS +-- excluded on windows because of difference in file name encoding logic + +create table if not exists nzhang_part14 (key string) partitioned by (value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@nzhang_part14 diff --git ql/src/test/results/clientpositive/mapjoin1.q.out ql/src/test/results/clientpositive/mapjoin1.q.out index 0f9902b..ce0025c 100644 --- ql/src/test/results/clientpositive/mapjoin1.q.out +++ ql/src/test/results/clientpositive/mapjoin1.q.out @@ -21,10 +21,12 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1114788.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- const filter on outer join +EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- const filter on outer join +EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -124,10 +126,12 @@ POSTHOOK: Input: default@src 165 val_165 165 val_165 165 val_165 165 val_165 409 val_409 409 val_409 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- func filter on outer join +EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- func filter on outer join +EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -227,11 +231,13 @@ NULL NULL 255 val_255 NULL NULL 278 val_278 98 val_98 98 val_98 98 val_98 98 val_98 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- field filter on outer join +EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- field filter on outer join +EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index d9b771f..3626d8a 100644 --- ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) PREHOOK: type: QUERY -POSTHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key))))) diff --git ql/src/test/results/clientpositive/mapjoin_subquery.q.out ql/src/test/results/clientpositive/mapjoin_subquery.q.out index b195e53..7bfdd1d 100644 --- ql/src/test/results/clientpositive/mapjoin_subquery.q.out +++ ql/src/test/results/clientpositive/mapjoin_subquery.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN SELECT subq.key1, z.value FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 FROM src1 x JOIN src y ON (x.key = y.key)) subq JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN SELECT subq.key1, z.value FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 diff --git ql/src/test/results/clientpositive/mapjoin_subquery2.q.out ql/src/test/results/clientpositive/mapjoin_subquery2.q.out index 7770786..4e0823f 100644 --- ql/src/test/results/clientpositive/mapjoin_subquery2.q.out +++ ql/src/test/results/clientpositive/mapjoin_subquery2.q.out @@ -49,14 +49,18 @@ PREHOOK: Output: default@z POSTHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z POSTHOOK: type: LOAD POSTHOOK: Output: default@z -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name FROM (SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 FROM y JOIN x ON (x.id = y.id)) subq JOIN z ON (subq.key1 = z.id) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name FROM (SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 diff --git ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index 34496f4..1f8ce49 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: create table srcpart_merge_dp like srcpart +PREHOOK: query: -- this test verifies that the block merge task that can follow a query to generate dynamic +-- partitions does not produce incorrect results by dropping partitions + +create table srcpart_merge_dp like srcpart PREHOOK: type: CREATETABLE -POSTHOOK: query: create table srcpart_merge_dp like srcpart +POSTHOOK: query: -- this test verifies that the block merge task that can follow a query to generate dynamic +-- partitions does not produce incorrect results by dropping partitions + +create table srcpart_merge_dp like srcpart POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@srcpart_merge_dp PREHOOK: query: create table srcpart_merge_dp_rc like srcpart diff --git ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 2c448b4..f3489f6 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table srcpart_merge_dp like srcpart +PREHOOK: query: -- this is to test the case where some dynamic partitions are merged and some are moved + +create table srcpart_merge_dp like srcpart PREHOOK: type: CREATETABLE -POSTHOOK: query: create table srcpart_merge_dp like srcpart +POSTHOOK: query: -- this is to test the case where some dynamic partitions are merged and some are moved + +create table srcpart_merge_dp like srcpart POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@srcpart_merge_dp PREHOOK: query: create table srcpart_merge_dp_rc like srcpart diff --git ql/src/test/results/clientpositive/mergejoins_mixed.q.out ql/src/test/results/clientpositive/mergejoins_mixed.q.out index 9d0283d..b76fa01 100644 --- ql/src/test/results/clientpositive/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/mergejoins_mixed.q.out @@ -1,12 +1,18 @@ -PREHOOK: query: create table a (key string, value string) +PREHOOK: query: -- HIVE-3464 + +create table a (key string, value string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table a (key string, value string) +POSTHOOK: query: -- HIVE-3464 + +create table a (key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@a -PREHOOK: query: explain +PREHOOK: query: -- (a-b-c-d) +explain select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- (a-b-c-d) +explain select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -519,10 +525,12 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain +PREHOOK: query: -- ((a-b-d)-c) (reordered) +explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- ((a-b-d)-c) (reordered) +explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -1029,10 +1037,12 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain +PREHOOK: query: -- (((a-b)-c)-d) +explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- (((a-b)-c)-d) +explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -1853,10 +1863,12 @@ STAGE PLANS: Fetch Operator limit: -1 -PREHOOK: query: explain +PREHOOK: query: -- ((a-b)-c-d) +explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (c.key=d.key) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- ((a-b)-c-d) +explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (c.key=d.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/metadataonly1.q.out ql/src/test/results/clientpositive/metadataonly1.q.out index 11d2412..aa6402e 100644 --- ql/src/test/results/clientpositive/metadataonly1.q.out +++ ql/src/test/results/clientpositive/metadataonly1.q.out @@ -1529,10 +1529,12 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2008-04-08 2008-04-09 -PREHOOK: query: alter table TEST2 add partition (ds='01:10:10', hr='01') +PREHOOK: query: -- HIVE-3594 URI encoding for temporary path +alter table TEST2 add partition (ds='01:10:10', hr='01') PREHOOK: type: ALTERTABLE_ADDPARTS PREHOOK: Input: default@test2 -POSTHOOK: query: alter table TEST2 add partition (ds='01:10:10', hr='01') +POSTHOOK: query: -- HIVE-3594 URI encoding for temporary path +alter table TEST2 add partition (ds='01:10:10', hr='01') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Input: default@test2 POSTHOOK: Output: default@test2@ds=01%3A10%3A10/hr=01 diff --git ql/src/test/results/clientpositive/multiMapJoin1.q.out ql/src/test/results/clientpositive/multiMapJoin1.q.out index 85193ac..ecc55bd 100644 --- ql/src/test/results/clientpositive/multiMapJoin1.q.out +++ ql/src/test/results/clientpositive/multiMapJoin1.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: create table smallTbl1(key string, value string) +PREHOOK: query: -- Join of a big table with 2 small tables on different keys should be performed as a single MR job +create table smallTbl1(key string, value string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table smallTbl1(key string, value string) +POSTHOOK: query: -- Join of a big table with 2 small tables on different keys should be performed as a single MR job +create table smallTbl1(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@smallTbl1 PREHOOK: query: insert overwrite table smallTbl1 select * from src where key < 10 @@ -636,7 +638,8 @@ POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 1660 -PREHOOK: query: explain +PREHOOK: query: -- join with 4 tables on different keys is also executed as a single MR job +explain select count(*) FROM ( SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, @@ -651,7 +654,8 @@ select count(*) FROM ) secondjoin JOIN smallTbl3 on (secondjoin.key2 = smallTbl3.key) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- join with 4 tables on different keys is also executed as a single MR job +explain select count(*) FROM ( SELECT firstjoin.key1 as key1, firstjoin.key2 as key2, smallTbl2.key as key3, diff --git ql/src/test/results/clientpositive/nomore_ambiguous_table_col.q.out ql/src/test/results/clientpositive/nomore_ambiguous_table_col.q.out index 217e2f4..1377a1a 100644 --- ql/src/test/results/clientpositive/nomore_ambiguous_table_col.q.out +++ ql/src/test/results/clientpositive/nomore_ambiguous_table_col.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: drop table ambiguous +PREHOOK: query: -- was negative/ambiguous_table_col.q + +drop table ambiguous PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ambiguous +POSTHOOK: query: -- was negative/ambiguous_table_col.q + +drop table ambiguous POSTHOOK: type: DROPTABLE PREHOOK: query: create table ambiguous (key string, value string) PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out index c0bdb2c..e3a00af 100644 --- ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out +++ ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: explain select nkey, nkey + 1 from (select key + 1 as nkey, value from src) a +PREHOOK: query: -- negative, references twice for result of funcion +explain select nkey, nkey + 1 from (select key + 1 as nkey, value from src) a PREHOOK: type: QUERY -POSTHOOK: query: explain select nkey, nkey + 1 from (select key + 1 as nkey, value from src) a +POSTHOOK: query: -- negative, references twice for result of funcion +explain select nkey, nkey + 1 from (select key + 1 as nkey, value from src) a POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) 1) nkey) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL nkey)) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL nkey) 1))))) diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out index 7044216..a8ae41b 100644 --- ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: explain select * from src limit 10 +PREHOOK: query: -- backward compatible (minimal) +explain select * from src limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from src limit 10 +POSTHOOK: query: -- backward compatible (minimal) +explain select * from src limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) @@ -96,9 +98,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 278 val_278 2008-04-08 11 98 val_98 2008-04-08 11 484 val_484 2008-04-08 11 -PREHOOK: query: explain select key from src limit 10 +PREHOOK: query: -- negative, select expression +explain select key from src limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select key from src limit 10 +POSTHOOK: query: -- negative, select expression +explain select key from src limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) @@ -150,9 +154,11 @@ POSTHOOK: Input: default@src 278 98 484 -PREHOOK: query: explain select * from srcpart where key > 100 limit 10 +PREHOOK: query: -- negative, filter on non-partition column +explain select * from srcpart where key > 100 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from srcpart where key > 100 limit 10 +POSTHOOK: query: -- negative, filter on non-partition column +explain select * from srcpart where key > 100 limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10))) @@ -222,9 +228,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 265 val_265 2008-04-08 11 193 val_193 2008-04-08 11 401 val_401 2008-04-08 11 -PREHOOK: query: explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10 +PREHOOK: query: -- negative, table sampling +explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10 +POSTHOOK: query: -- negative, table sampling +explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_PERCENT 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) @@ -282,9 +290,11 @@ POSTHOOK: Input: default@src 278 val_278 98 val_98 484 val_484 -PREHOOK: query: explain select * from src limit 10 +PREHOOK: query: -- backward compatible (more) +explain select * from src limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from src limit 10 +POSTHOOK: query: -- backward compatible (more) +explain select * from src limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10))) @@ -380,9 +390,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 278 val_278 2008-04-08 11 98 val_98 2008-04-08 11 484 val_484 2008-04-08 11 -PREHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 +PREHOOK: query: -- select expression +explain select cast(key as int) * 10, upper(value) from src limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 +POSTHOOK: query: -- select expression +explain select cast(key as int) * 10, upper(value) from src limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (* (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL key)) 10)) (TOK_SELEXPR (TOK_FUNCTION upper (TOK_TABLE_OR_COL value)))) (TOK_LIMIT 10))) @@ -426,9 +438,11 @@ POSTHOOK: Input: default@src 2780 VAL_278 980 VAL_98 4840 VAL_484 -PREHOOK: query: explain select key from src where key < 100 limit 10 +PREHOOK: query: -- filter on non-partition column +explain select key from src where key < 100 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select key from src where key < 100 limit 10 +POSTHOOK: query: -- filter on non-partition column +explain select key from src where key < 100 limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10))) @@ -474,9 +488,11 @@ POSTHOOK: Input: default@src 17 0 57 -PREHOOK: query: explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10 +PREHOOK: query: -- select expr for partitioned table +explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10 +POSTHOOK: query: -- select expr for partitioned table +explain select key from srcpart where ds='2008-04-08' AND hr='11' limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11'))) (TOK_LIMIT 10))) @@ -520,9 +536,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 278 98 484 -PREHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10 +PREHOOK: query: -- virtual columns +explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10 +POSTHOOK: query: -- virtual columns +explain select *, BLOCK__OFFSET__INSIDE__FILE from src where key < 10 limit 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_LIMIT 10))) @@ -572,9 +590,11 @@ POSTHOOK: Input: default@src 17 val_17 910 0 val_0 968 57 val_57 1024 -PREHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30 +PREHOOK: query: -- virtual columns on partitioned table +explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30 PREHOOK: type: QUERY -POSTHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30 +POSTHOOK: query: -- virtual columns on partitioned table +explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart where key < 10 limit 30 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 10)) (TOK_LIMIT 30))) @@ -656,9 +676,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 2 val_2 2008-04-09 11 4004 5 val_5 2008-04-09 11 4540 9 val_9 2008-04-09 11 5398 -PREHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) +PREHOOK: query: -- bucket sampling +explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) PREHOOK: type: QUERY -POSTHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) +POSTHOOK: query: -- bucket sampling +explain select *, BLOCK__OFFSET__INSIDE__FILE from src TABLESAMPLE (BUCKET 1 OUT OF 40 ON key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLEBUCKETSAMPLE 1 40 (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))))) @@ -785,9 +807,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 77 val_77 2008-04-09 12 2622 187 val_187 2008-04-09 12 4516 448 val_448 2008-04-09 12 5636 -PREHOOK: query: explain select * from src TABLESAMPLE (0.25 PERCENT) +PREHOOK: query: -- split sampling +explain select * from src TABLESAMPLE (0.25 PERCENT) PREHOOK: type: QUERY -POSTHOOK: query: explain select * from src TABLESAMPLE (0.25 PERCENT) +POSTHOOK: query: -- split sampling +explain select * from src TABLESAMPLE (0.25 PERCENT) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_PERCENT 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) @@ -879,9 +903,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 86 val_86 2008-04-09 11 12 238 val_238 2008-04-09 12 0 86 val_86 2008-04-09 12 12 -PREHOOK: query: explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 +PREHOOK: query: -- non deterministic func +explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 +POSTHOOK: query: -- non deterministic func +explain select key, value, BLOCK__OFFSET__INSIDE__FILE from srcpart where ds="2008-04-09" AND rand() > 1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL ds) "2008-04-09") (> (TOK_FUNCTION rand) 1))))) @@ -924,9 +950,11 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -PREHOOK: query: explain select key, count(value) from src group by key +PREHOOK: query: -- negative, groupby +explain select key, count(value) from src group by key PREHOOK: type: QUERY -POSTHOOK: query: explain select key, count(value) from src group by key +POSTHOOK: query: -- negative, groupby +explain select key, count(value) from src group by key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) @@ -999,9 +1027,11 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain select distinct key, value from src +PREHOOK: query: -- negative, distinct +explain select distinct key, value from src PREHOOK: type: QUERY -POSTHOOK: query: explain select distinct key, value from src +POSTHOOK: query: -- negative, distinct +explain select distinct key, value from src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) @@ -1075,9 +1105,11 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain create table srcx as select distinct key, value from src +PREHOOK: query: -- negative, CTAS +explain create table srcx as select distinct key, value from src PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: explain create table srcx as select distinct key, value from src +POSTHOOK: query: -- negative, CTAS +explain create table srcx as select distinct key, value from src POSTHOOK: type: CREATETABLE_AS_SELECT ABSTRACT SYNTAX TREE: (TOK_CREATETABLE (TOK_TABNAME srcx) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))) @@ -1170,9 +1202,11 @@ STAGE PLANS: Stats-Aggr Operator -PREHOOK: query: explain analyze table src compute statistics +PREHOOK: query: -- negative, analyze +explain analyze table src compute statistics PREHOOK: type: QUERY -POSTHOOK: query: explain analyze table src compute statistics +POSTHOOK: query: -- negative, analyze +explain analyze table src compute statistics POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_ANALYZE (TOK_TAB (TOK_TABNAME src))) @@ -1193,9 +1227,11 @@ STAGE PLANS: Stats-Aggr Operator -PREHOOK: query: explain select a.* from (select * from src) a +PREHOOK: query: -- negative, subq +explain select a.* from (select * from src) a PREHOOK: type: QUERY -POSTHOOK: query: explain select a.* from (select * from src) a +POSTHOOK: query: -- negative, subq +explain select a.* from (select * from src) a POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))))) @@ -1230,9 +1266,11 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain select * from src join src src2 on src.key=src2.key +PREHOOK: query: -- negative, join +explain select * from src join src src2 on src.key=src2.key PREHOOK: type: QUERY -POSTHOOK: query: explain select * from src join src src2 on src.key=src2.key +POSTHOOK: query: -- negative, join +explain select * from src join src src2 on src.key=src2.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME src) src2) (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) diff --git ql/src/test/results/clientpositive/orc_empty_strings.q.out ql/src/test/results/clientpositive/orc_empty_strings.q.out index a1e1f17..adc452b 100644 --- ql/src/test/results/clientpositive/orc_empty_strings.q.out +++ ql/src/test/results/clientpositive/orc_empty_strings.q.out @@ -18,11 +18,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_orc POSTHOOK: Lineage: test_orc.key SIMPLE [] -PREHOOK: query: SELECT * FROM test_orc +PREHOOK: query: -- Test reading a column which is just empty strings + +SELECT * FROM test_orc PREHOOK: type: QUERY PREHOOK: Input: default@test_orc #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM test_orc +POSTHOOK: query: -- Test reading a column which is just empty strings + +SELECT * FROM test_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@test_orc #### A masked pattern was here #### @@ -47,11 +51,15 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@test_orc POSTHOOK: Lineage: test_orc.key SIMPLE [] POSTHOOK: Lineage: test_orc.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT * FROM test_orc +PREHOOK: query: -- Test reading a column which has some empty strings + +SELECT * FROM test_orc PREHOOK: type: QUERY PREHOOK: Input: default@test_orc #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM test_orc +POSTHOOK: query: -- Test reading a column which has some empty strings + +SELECT * FROM test_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@test_orc #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/part_inherit_tbl_props.q.out ql/src/test/results/clientpositive/part_inherit_tbl_props.q.out index c1639fb..ed26384 100644 --- ql/src/test/results/clientpositive/part_inherit_tbl_props.q.out +++ ql/src/test/results/clientpositive/part_inherit_tbl_props.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table mytbl (c1 tinyint) partitioned by (c2 string) tblproperties ('a'='myval','b'='yourval','c'='noval') +PREHOOK: query: -- The property needs to be unset at the end of the test till HIVE-3109/HIVE-3112 is fixed + +create table mytbl (c1 tinyint) partitioned by (c2 string) tblproperties ('a'='myval','b'='yourval','c'='noval') PREHOOK: type: CREATETABLE -POSTHOOK: query: create table mytbl (c1 tinyint) partitioned by (c2 string) tblproperties ('a'='myval','b'='yourval','c'='noval') +POSTHOOK: query: -- The property needs to be unset at the end of the test till HIVE-3109/HIVE-3112 is fixed + +create table mytbl (c1 tinyint) partitioned by (c2 string) tblproperties ('a'='myval','b'='yourval','c'='noval') POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@mytbl PREHOOK: query: alter table mytbl add partition (c2 = 'v1') diff --git ql/src/test/results/clientpositive/part_inherit_tbl_props_with_star.q.out ql/src/test/results/clientpositive/part_inherit_tbl_props_with_star.q.out index af8e43a..ec0b3c2 100644 --- ql/src/test/results/clientpositive/part_inherit_tbl_props_with_star.q.out +++ ql/src/test/results/clientpositive/part_inherit_tbl_props_with_star.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table mytbl (c1 tinyint) partitioned by (c2 string) tblproperties ('a'='myval','b'='yourval','c'='noval') +PREHOOK: query: -- The property needs to be unset at the end of the test till HIVE-3109/HIVE-3112 is fixed + +create table mytbl (c1 tinyint) partitioned by (c2 string) tblproperties ('a'='myval','b'='yourval','c'='noval') PREHOOK: type: CREATETABLE -POSTHOOK: query: create table mytbl (c1 tinyint) partitioned by (c2 string) tblproperties ('a'='myval','b'='yourval','c'='noval') +POSTHOOK: query: -- The property needs to be unset at the end of the test till HIVE-3109/HIVE-3112 is fixed + +create table mytbl (c1 tinyint) partitioned by (c2 string) tblproperties ('a'='myval','b'='yourval','c'='noval') POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@mytbl PREHOOK: query: alter table mytbl add partition (c2 = 'v1') diff --git ql/src/test/results/clientpositive/partition_wise_fileformat10.q.out ql/src/test/results/clientpositive/partition_wise_fileformat10.q.out index 6634f48..390338c 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat10.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat10.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: create table prt(key string, value string) partitioned by (dt string) +PREHOOK: query: -- This tests that the schema can be changed for binary serde data +create table prt(key string, value string) partitioned by (dt string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table prt(key string, value string) partitioned by (dt string) +POSTHOOK: query: -- This tests that the schema can be changed for binary serde data +create table prt(key string, value string) partitioned by (dt string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@prt PREHOOK: query: insert overwrite table prt partition(dt='1') select * from src where key = 238 diff --git ql/src/test/results/clientpositive/partition_wise_fileformat11.q.out ql/src/test/results/clientpositive/partition_wise_fileformat11.q.out index f10991b..a0325c8 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat11.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat11.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@partition_test_partitioned PREHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' diff --git ql/src/test/results/clientpositive/partition_wise_fileformat12.q.out ql/src/test/results/clientpositive/partition_wise_fileformat12.q.out index 80596b2..256bf1b 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat12.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat12.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@partition_test_partitioned PREHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' diff --git ql/src/test/results/clientpositive/partition_wise_fileformat13.q.out ql/src/test/results/clientpositive/partition_wise_fileformat13.q.out index f3e3ab1..6a85d74 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat13.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat13.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: create table T1(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: query: -- This tests that the schema can be changed for partitioned tables for binary serde data for joins +create table T1(key string, value string) partitioned by (dt string) stored as rcfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table T1(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: query: -- This tests that the schema can be changed for partitioned tables for binary serde data for joins +create table T1(key string, value string) partitioned by (dt string) stored as rcfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T1 PREHOOK: query: alter table T1 set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' diff --git ql/src/test/results/clientpositive/partition_wise_fileformat14.q.out ql/src/test/results/clientpositive/partition_wise_fileformat14.q.out index 41f8d8b..1c52b08 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat14.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat14.q.out @@ -90,7 +90,8 @@ POSTHOOK: Lineage: tbl1 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(nam POSTHOOK: Lineage: tbl1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select /*+mapjoin(subq1)*/ count(*) from +PREHOOK: query: -- The subquery itself is being map-joined. Multiple partitions of tbl1 with different schemas are being read for tbl2 +select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 @@ -102,7 +103,8 @@ PREHOOK: Input: default@tbl1@ds=2 PREHOOK: Input: default@tbl2 PREHOOK: Input: default@tbl2@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select /*+mapjoin(subq1)*/ count(*) from +POSTHOOK: query: -- The subquery itself is being map-joined. Multiple partitions of tbl1 with different schemas are being read for tbl2 +select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 @@ -121,7 +123,10 @@ POSTHOOK: Lineage: tbl1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name: POSTHOOK: Lineage: tbl2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 40 -PREHOOK: query: select /*+mapjoin(subq1)*/ count(*) from +PREHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a bucketized mapside join. Multiple partitions of tbl1 with different schemas are being read for each +-- bucket of tbl2 +select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 @@ -133,7 +138,10 @@ PREHOOK: Input: default@tbl1@ds=2 PREHOOK: Input: default@tbl2 PREHOOK: Input: default@tbl2@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select /*+mapjoin(subq1)*/ count(*) from +POSTHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a bucketized mapside join. Multiple partitions of tbl1 with different schemas are being read for each +-- bucket of tbl2 +select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 @@ -152,7 +160,10 @@ POSTHOOK: Lineage: tbl1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name: POSTHOOK: Lineage: tbl2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 40 -PREHOOK: query: select /*+mapjoin(subq1)*/ count(*) from +PREHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. Multiple partitions of tbl1 with different schemas are being read for a +-- given file of tbl2 +select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 @@ -164,7 +175,10 @@ PREHOOK: Input: default@tbl1@ds=2 PREHOOK: Input: default@tbl2 PREHOOK: Input: default@tbl2@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select /*+mapjoin(subq1)*/ count(*) from +POSTHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. Multiple partitions of tbl1 with different schemas are being read for a +-- given file of tbl2 +select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 @@ -183,7 +197,9 @@ POSTHOOK: Lineage: tbl1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name: POSTHOOK: Lineage: tbl2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 40 -PREHOOK: query: select /*+mapjoin(subq1)*/ count(*) from +PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side +-- join should be performed. Multiple partitions of tbl1 with different schemas are being read for tbl2 +select /*+mapjoin(subq1)*/ count(*) from (select a.key+1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key+1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 @@ -195,7 +211,9 @@ PREHOOK: Input: default@tbl1@ds=2 PREHOOK: Input: default@tbl2 PREHOOK: Input: default@tbl2@ds=1 #### A masked pattern was here #### -POSTHOOK: query: select /*+mapjoin(subq1)*/ count(*) from +POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side +-- join should be performed. Multiple partitions of tbl1 with different schemas are being read for tbl2 +select /*+mapjoin(subq1)*/ count(*) from (select a.key+1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key+1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 diff --git ql/src/test/results/clientpositive/partition_wise_fileformat15.q.out ql/src/test/results/clientpositive/partition_wise_fileformat15.q.out index e54b0ce..0b3f282 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat15.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat15.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: create table partition_test_partitioned(key string, value string) +PREHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table partition_test_partitioned(key string, value string) +POSTHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@partition_test_partitioned diff --git ql/src/test/results/clientpositive/partition_wise_fileformat16.q.out ql/src/test/results/clientpositive/partition_wise_fileformat16.q.out index 237223e..c4cebcb 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat16.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat16.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: create table partition_test_partitioned(key string, value string) +PREHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as textfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table partition_test_partitioned(key string, value string) +POSTHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@partition_test_partitioned diff --git ql/src/test/results/clientpositive/partition_wise_fileformat8.q.out ql/src/test/results/clientpositive/partition_wise_fileformat8.q.out index 03f6ddf..94856c4 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat8.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat8.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@partition_test_partitioned PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src diff --git ql/src/test/results/clientpositive/partition_wise_fileformat9.q.out ql/src/test/results/clientpositive/partition_wise_fileformat9.q.out index 72071fb..7dd8781 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat9.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat9.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@partition_test_partitioned PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src diff --git ql/src/test/results/clientpositive/plan_json.q.out ql/src/test/results/clientpositive/plan_json.q.out index e132949..30bf0bb 100644 --- ql/src/test/results/clientpositive/plan_json.q.out +++ ql/src/test/results/clientpositive/plan_json.q.out @@ -1,5 +1,9 @@ -PREHOOK: query: EXPLAIN FORMATTED SELECT count(1) FROM src +PREHOOK: query: -- explain plan json: the query gets the formatted json output of the query plan of the hive query + +EXPLAIN FORMATTED SELECT count(1) FROM src PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED SELECT count(1) FROM src +POSTHOOK: query: -- explain plan json: the query gets the formatted json output of the query plan of the hive query + +EXPLAIN FORMATTED SELECT count(1) FROM src POSTHOOK: type: QUERY {"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Reduce Operator Tree:":{"GBY_4":{"SEL_5":{"FS_6":{"File Output Operator":{"GlobalTableId:":"0","compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}},"Split Sample:":{},"Alias -> Map Operator Tree:":{"src":{"TS_0":{"SEL_1":{"GBY_2":{"RS_3":{"Reduce Output Operator":{"Map-reduce partition columns:":[],"sort order:":"","tag:":"-1","value expressions:":[{"type:":"bigint","expr:":"_col0"}],"key expressions:":[]}}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))"} diff --git ql/src/test/results/clientpositive/ppd_constant_where.q.out ql/src/test/results/clientpositive/ppd_constant_where.q.out index fe13ab1..a99056a 100644 --- ql/src/test/results/clientpositive/ppd_constant_where.q.out +++ ql/src/test/results/clientpositive/ppd_constant_where.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM srcpart WHERE ds = '2008-04-08' and 'a' = 'a' +PREHOOK: query: -- Test that the partition pruner does not fail when there is a constant expression in the filter + +EXPLAIN SELECT COUNT(*) FROM srcpart WHERE ds = '2008-04-08' and 'a' = 'a' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM srcpart WHERE ds = '2008-04-08' and 'a' = 'a' +POSTHOOK: query: -- Test that the partition pruner does not fail when there is a constant expression in the filter + +EXPLAIN SELECT COUNT(*) FROM srcpart WHERE ds = '2008-04-08' and 'a' = 'a' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT))) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= 'a' 'a'))))) diff --git ql/src/test/results/clientpositive/ppd_repeated_alias.q.out ql/src/test/results/clientpositive/ppd_repeated_alias.q.out index 2f1b18a..4b1e630 100644 --- ql/src/test/results/clientpositive/ppd_repeated_alias.q.out +++ ql/src/test/results/clientpositive/ppd_repeated_alias.q.out @@ -16,13 +16,15 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table pokes2 (foo int, bar int, blah int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@pokes2 -PREHOOK: query: explain +PREHOOK: query: -- Q1: predicate should not be pushed on the right side of a left outer join +explain SELECT a.foo as foo1, b.foo as foo2, b.bar FROM pokes a LEFT OUTER JOIN pokes2 b ON a.foo=b.foo WHERE b.bar=3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- Q1: predicate should not be pushed on the right side of a left outer join +explain SELECT a.foo as foo1, b.foo as foo2, b.bar FROM pokes a LEFT OUTER JOIN pokes2 b ON a.foo=b.foo @@ -105,14 +107,16 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain +PREHOOK: query: -- Q2: predicate should not be pushed on the right side of a left outer join +explain SELECT * FROM (SELECT a.foo as foo1, b.foo as foo2, b.bar FROM pokes a LEFT OUTER JOIN pokes2 b ON a.foo=b.foo) a WHERE a.bar=3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- Q2: predicate should not be pushed on the right side of a left outer join +explain SELECT * FROM (SELECT a.foo as foo1, b.foo as foo2, b.bar FROM pokes a LEFT OUTER JOIN pokes2 b @@ -196,14 +200,16 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain +PREHOOK: query: -- Q3: predicate should be pushed +explain SELECT * FROM (SELECT a.foo as foo1, b.foo as foo2, a.bar FROM pokes a JOIN pokes2 b ON a.foo=b.foo) a WHERE a.bar=3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- Q3: predicate should be pushed +explain SELECT * FROM (SELECT a.foo as foo1, b.foo as foo2, a.bar FROM pokes a JOIN pokes2 b @@ -287,9 +293,11 @@ STAGE PLANS: limit: -1 -PREHOOK: query: explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 +PREHOOK: query: -- Q4: here, the filter c.bar should be created under the first join but above the second +explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 PREHOOK: type: QUERY -POSTHOOK: query: explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 +POSTHOOK: query: -- Q4: here, the filter c.bar should be created under the first join but above the second +explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME pokes) c) (TOK_TABREF (TOK_TABNAME pokes) b) (= (. (TOK_TABLE_OR_COL c) foo) (. (TOK_TABLE_OR_COL b) foo)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) foo)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) bar)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) blah))))) c) (TOK_TABREF (TOK_TABNAME pokes) d))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) foo)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL d) bar))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL d) foo) 1) (= (. (TOK_TABLE_OR_COL c) bar) 2))))) diff --git ql/src/test/results/clientpositive/ppd_union_view.q.out ql/src/test/results/clientpositive/ppd_union_view.q.out index edb46db..e3e404e 100644 --- ql/src/test/results/clientpositive/ppd_union_view.q.out +++ ql/src/test/results/clientpositive/ppd_union_view.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: drop view v +PREHOOK: query: -- test predicate pushdown on a view with a union + +drop view v PREHOOK: type: DROPVIEW -POSTHOOK: query: drop view v +POSTHOOK: query: -- test predicate pushdown on a view with a union + +drop view v POSTHOOK: type: DROPVIEW PREHOOK: query: create table t1_new (key string, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/protectmode.q.out ql/src/test/results/clientpositive/protectmode.q.out index 411bd8d..410049b 100644 --- ql/src/test/results/clientpositive/protectmode.q.out +++ ql/src/test/results/clientpositive/protectmode.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: drop table tbl1 +PREHOOK: query: -- protect mode: syntax to change protect mode works and queries are not blocked if a table or partition is not in protect mode + +drop table tbl1 PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table tbl1 +POSTHOOK: query: -- protect mode: syntax to change protect mode works and queries are not blocked if a table or partition is not in protect mode + +drop table tbl1 POSTHOOK: type: DROPTABLE PREHOOK: query: drop table tbl2 PREHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/ptf.q.out ql/src/test/results/clientpositive/ptf.q.out index 92fdba7..7056c9c 100644 --- ql/src/test/results/clientpositive/ptf.q.out +++ ql/src/test/results/clientpositive/ptf.q.out @@ -2,7 +2,8 @@ PREHOOK: query: DROP TABLE part PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE part POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE part( +PREHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -14,7 +15,8 @@ PREHOOK: query: CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part( +POSTHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -83,7 +85,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 2. testJoinWithNoop +select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr @@ -91,7 +94,8 @@ sort by j.p_name) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 2. testJoinWithNoop +select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part p1 join part p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr @@ -127,14 +131,16 @@ Manufacturer#5 almond antique medium spring khaki 6 -25 Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 -PREHOOK: query: select p_mfgr, p_name, p_size +PREHOOK: query: -- 3. testOnlyPTF +select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size +POSTHOOK: query: -- 3. testOnlyPTF +select p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) @@ -167,7 +173,8 @@ Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 4. testPTFAlias +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -178,7 +185,8 @@ from noop(on part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 4. testPTFAlias +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -215,7 +223,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 5. testPTFAndWhereWithWindowing +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz @@ -226,7 +235,8 @@ from noop(on part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 5. testPTFAndWhereWithWindowing +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz @@ -263,7 +273,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 6. testSWQAndPTFAndGBy +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz @@ -275,7 +286,8 @@ group by p_mfgr, p_name, p_size PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 6. testSWQAndPTFAndGBy +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz @@ -312,7 +324,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: select abc.* +PREHOOK: query: -- 7. testJoin +select abc.* from noop(on part partition by p_mfgr order by p_name @@ -320,7 +333,8 @@ order by p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select abc.* +POSTHOOK: query: -- 7. testJoin +select abc.* from noop(on part partition by p_mfgr order by p_name @@ -356,7 +370,8 @@ POSTHOOK: Input: default@part 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de -PREHOOK: query: select abc.* +PREHOOK: query: -- 8. testJoinRight +select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name @@ -364,7 +379,8 @@ order by p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select abc.* +POSTHOOK: query: -- 8. testJoinRight +select abc.* from part p1 join noop(on part partition by p_mfgr order by p_name @@ -400,7 +416,8 @@ POSTHOOK: Input: default@part 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 9. testNoopWithMap +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr @@ -408,7 +425,8 @@ order by p_name, p_size desc) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 9. testNoopWithMap +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part partition by p_mfgr @@ -442,7 +460,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 10. testNoopWithMapWithWindowing +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -452,7 +471,8 @@ from noopwithmap(on part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 10. testNoopWithMapWithWindowing +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -488,7 +508,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -498,7 +519,8 @@ order by p_name) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 11. testHavingWithWindowingPTFNoGBY +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -534,7 +556,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 12. testFunctionChain +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -545,7 +568,8 @@ order by p_mfgr, p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 12. testFunctionChain +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -582,7 +606,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 13. testPTFAndWindowingInSubQ +select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, count(p_size) over (partition by p_mfgr order by p_name) as cd, @@ -596,7 +621,8 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 13. testPTFAndWindowingInSubQ +select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, count(p_size) over (partition by p_mfgr order by p_name) as cd, @@ -636,7 +662,8 @@ Manufacturer#5 almond antique medium spring khaki 2 6208.18 Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.970000000001 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.3099999999995 -PREHOOK: query: select abc.p_mfgr, abc.p_name, +PREHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, @@ -649,7 +676,8 @@ order by p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select abc.p_mfgr, abc.p_name, +POSTHOOK: query: -- 14. testPTFJoinWithWindowingWithCount +select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, count(abc.p_name) over (distribute by abc.p_mfgr sort by abc.p_name) as cd, @@ -690,14 +718,16 @@ Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35000000000 Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 -PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size +PREHOOK: query: -- 15. testDistinctInSelectWithPTF +select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size +POSTHOOK: query: -- 15. testDistinctInSelectWithPTF +select DISTINCT p_mfgr, p_name, p_size from noop(on part partition by p_mfgr order by p_name) @@ -729,13 +759,15 @@ Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -PREHOOK: query: create view IF NOT EXISTS mfgr_price_view as +PREHOOK: query: -- 16. testViewAsTableInputToPTF +create view IF NOT EXISTS mfgr_price_view as select p_mfgr, p_brand, sum(p_retailprice) as s from part group by p_mfgr, p_brand PREHOOK: type: CREATEVIEW -POSTHOOK: query: create view IF NOT EXISTS mfgr_price_view as +POSTHOOK: query: -- 16. testViewAsTableInputToPTF +create view IF NOT EXISTS mfgr_price_view as select p_mfgr, p_brand, sum(p_retailprice) as s from part @@ -778,7 +810,8 @@ Manufacturer#4 Brand#42 2581.6800000000003 7337.620000000001 Manufacturer#5 Brand#51 1611.66 1611.66 Manufacturer#5 Brand#52 3254.17 4865.83 Manufacturer#5 Brand#53 2806.83 7672.66 -PREHOOK: query: CREATE TABLE part_4( +PREHOOK: query: -- 17. testMultipleInserts2SWQsWithPTF +CREATE TABLE part_4( p_mfgr STRING, p_name STRING, p_size INT, @@ -786,7 +819,8 @@ r INT, dr INT, s DOUBLE) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part_4( +POSTHOOK: query: -- 17. testMultipleInserts2SWQsWithPTF +CREATE TABLE part_4( p_mfgr STRING, p_name STRING, p_size INT, @@ -969,7 +1003,8 @@ Manufacturer#5 almond antique medium spring khaki 6 37 8 2 2 0.4 31 Manufacturer#5 almond azure blanched chiffon midnight 23 108 23 5 5 1.0 2 Manufacturer#5 almond antique blue firebrick mint 31 31 31 1 1 0.2 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 85 46 4 4 0.8 6 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 @@ -987,7 +1022,8 @@ from noop(on PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 18. testMulti2OperatorsFunctionChainWithMap +select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 @@ -1046,7 +1082,8 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 19. testMulti3OperatorsFunctionChain +select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -1064,7 +1101,8 @@ from noop(on PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 19. testMulti3OperatorsFunctionChain +select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -1123,7 +1161,8 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 @@ -1139,7 +1178,8 @@ from noop(on PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 20. testMultiOperatorChainWithNoWindowing +select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, p_size, sum(p_size) over (partition by p_mfgr order by p_name) as s1 @@ -1196,7 +1236,8 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 @@ -1214,7 +1255,8 @@ from noopwithmap(on PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 21. testMultiOperatorChainEndsWithNoopMap +select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, p_size, sum(p_size) over (partition by p_mfgr,p_name rows between unbounded preceding and current row) as s1 @@ -1273,7 +1315,8 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, p_size, @@ -1290,7 +1333,8 @@ from noop(on PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 +select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, p_size, @@ -1348,7 +1392,8 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_mfgr) as r, dense_rank() over (partition by p_mfgr order by p_mfgr) as dr, p_size, @@ -1363,7 +1408,8 @@ from noopwithmap(on PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 +select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_mfgr) as r, dense_rank() over (partition by p_mfgr order by p_mfgr) as dr, p_size, diff --git ql/src/test/results/clientpositive/ptf_decimal.q.out ql/src/test/results/clientpositive/ptf_decimal.q.out index aa6d59c..2090829 100644 --- ql/src/test/results/clientpositive/ptf_decimal.q.out +++ ql/src/test/results/clientpositive/ptf_decimal.q.out @@ -2,7 +2,8 @@ PREHOOK: query: DROP TABLE IF EXISTS part PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE IF EXISTS part POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE part( +PREHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -14,7 +15,8 @@ PREHOOK: query: CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part( +POSTHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -33,7 +35,9 @@ PREHOOK: Output: default@part POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part POSTHOOK: type: LOAD POSTHOOK: Output: default@part -PREHOOK: query: select p_mfgr, p_retailprice, +PREHOOK: query: -- 1. aggregate functions with decimal type + +select p_mfgr, p_retailprice, lead(p_retailprice) over (partition by p_mfgr ORDER BY p_name) as c1, lag(p_retailprice) over (partition by p_mfgr ORDER BY p_name) as c2, first_value(p_retailprice) over (partition by p_mfgr ORDER BY p_name) as c3, @@ -42,7 +46,9 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, +POSTHOOK: query: -- 1. aggregate functions with decimal type + +select p_mfgr, p_retailprice, lead(p_retailprice) over (partition by p_mfgr ORDER BY p_name) as c1, lag(p_retailprice) over (partition by p_mfgr ORDER BY p_name) as c2, first_value(p_retailprice) over (partition by p_mfgr ORDER BY p_name) as c3, @@ -77,7 +83,9 @@ Manufacturer#5 1611.66 1788.73 1789.69 1789.69 1611.66 Manufacturer#5 1788.73 1018.1 1611.66 1789.69 1788.73 Manufacturer#5 1018.1 1464.48 1788.73 1789.69 1018.1 Manufacturer#5 1464.48 NULL 1018.1 1789.69 1464.48 -PREHOOK: query: select p_mfgr, p_retailprice, +PREHOOK: query: -- 2. ranking functions with decimal type + +select p_mfgr, p_retailprice, row_number() over (PARTITION BY p_mfgr ORDER BY p_retailprice) as c1, rank() over (PARTITION BY p_mfgr ORDER BY p_retailprice) as c2, dense_rank() over (PARTITION BY p_mfgr ORDER BY p_retailprice) as c3, @@ -88,7 +96,9 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, +POSTHOOK: query: -- 2. ranking functions with decimal type + +select p_mfgr, p_retailprice, row_number() over (PARTITION BY p_mfgr ORDER BY p_retailprice) as c1, rank() over (PARTITION BY p_mfgr ORDER BY p_retailprice) as c2, dense_rank() over (PARTITION BY p_mfgr ORDER BY p_retailprice) as c3, @@ -125,13 +135,17 @@ Manufacturer#5 1464.48 2 2 2 0.25 0.4 2 Manufacturer#5 1611.66 3 3 3 0.5 0.6 3 Manufacturer#5 1788.73 4 4 4 0.75 0.8 4 Manufacturer#5 1789.69 5 5 5 1.0 1.0 5 -PREHOOK: query: select p_mfgr, p_retailprice, +PREHOOK: query: -- 3. order by decimal + +select p_mfgr, p_retailprice, lag(p_retailprice) over (partition by p_mfgr ORDER BY p_retailprice desc) as c1 from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, +POSTHOOK: query: -- 3. order by decimal + +select p_mfgr, p_retailprice, lag(p_retailprice) over (partition by p_mfgr ORDER BY p_retailprice desc) as c1 from part POSTHOOK: type: QUERY @@ -163,13 +177,17 @@ Manufacturer#5 1788.73 1789.69 Manufacturer#5 1611.66 1788.73 Manufacturer#5 1464.48 1611.66 Manufacturer#5 1018.1 1464.48 -PREHOOK: query: select p_mfgr, p_retailprice, +PREHOOK: query: -- 4. partition by decimal + +select p_mfgr, p_retailprice, lag(p_retailprice) over (partition by p_retailprice) as c1 from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_retailprice, +POSTHOOK: query: -- 4. partition by decimal + +select p_mfgr, p_retailprice, lag(p_retailprice) over (partition by p_retailprice) as c1 from part POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/ptf_general_queries.q.out ql/src/test/results/clientpositive/ptf_general_queries.q.out index c408ba6..a7c5384 100644 --- ql/src/test/results/clientpositive/ptf_general_queries.q.out +++ ql/src/test/results/clientpositive/ptf_general_queries.q.out @@ -2,7 +2,8 @@ PREHOOK: query: DROP TABLE part PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE part POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE part( +PREHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -14,7 +15,8 @@ PREHOOK: query: CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part( +POSTHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -33,14 +35,16 @@ PREHOOK: Output: default@part POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part POSTHOOK: type: LOAD POSTHOOK: Output: default@part -PREHOOK: query: select p_mfgr, p_name, p_size +PREHOOK: query: -- 1. testNoPTFNoWindowing +select p_mfgr, p_name, p_size from part distribute by p_mfgr sort by p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size +POSTHOOK: query: -- 1. testNoPTFNoWindowing +select p_mfgr, p_name, p_size from part distribute by p_mfgr sort by p_name @@ -73,7 +77,8 @@ Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 Manufacturer#4 almond aquamarine yellow dodger mint 7 Manufacturer#4 almond azure aquamarine papaya violet 12 Manufacturer#5 almond azure blanched chiffon midnight 23 -PREHOOK: query: select p_mfgr,p_name, p_retailprice, +PREHOOK: query: -- 2. testUDAFsNoWindowingNoPTFNoGBY +select p_mfgr,p_name, p_retailprice, sum(p_retailprice) over(partition by p_mfgr order by p_mfgr) as s, min(p_retailprice) over(partition by p_mfgr order by p_mfgr) as mi, max(p_retailprice) over(partition by p_mfgr order by p_mfgr) as ma, @@ -82,7 +87,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +POSTHOOK: query: -- 2. testUDAFsNoWindowingNoPTFNoGBY +select p_mfgr,p_name, p_retailprice, sum(p_retailprice) over(partition by p_mfgr order by p_mfgr) as s, min(p_retailprice) over(partition by p_mfgr order by p_mfgr) as mi, max(p_retailprice) over(partition by p_mfgr order by p_mfgr) as ma, @@ -117,11 +123,13 @@ Manufacturer#5 almond antique medium spring khaki 1611.66 7672.66 1018.1 1789.69 Manufacturer#5 almond antique sky peru orange 1788.73 7672.66 1018.1 1789.69 1534.532 Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 7672.66 1018.1 1789.69 1534.532 Manufacturer#5 almond azure blanched chiffon midnight 1464.48 7672.66 1018.1 1789.69 1534.532 -PREHOOK: query: select 'tst1' as key, count(1) as value from part +PREHOOK: query: -- 3. testConstExprInSelect +select 'tst1' as key, count(1) as value from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select 'tst1' as key, count(1) as value from part +POSTHOOK: query: -- 3. testConstExprInSelect +select 'tst1' as key, count(1) as value from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/ptf_npath.q.out ql/src/test/results/clientpositive/ptf_npath.q.out index 3e30090..1e19211 100644 --- ql/src/test/results/clientpositive/ptf_npath.q.out +++ ql/src/test/results/clientpositive/ptf_npath.q.out @@ -29,7 +29,8 @@ PREHOOK: Output: default@flights_tiny POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/flights_tiny.txt' OVERWRITE INTO TABLE flights_tiny POSTHOOK: type: LOAD POSTHOOK: Output: default@flights_tiny -PREHOOK: query: select origin_city_name, fl_num, year, month, day_of_month, sz, tpath +PREHOOK: query: -- 1. basic Npath test +select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from npath(on flights_tiny distribute by fl_num @@ -41,7 +42,8 @@ from npath(on PREHOOK: type: QUERY PREHOOK: Input: default@flights_tiny #### A masked pattern was here #### -POSTHOOK: query: select origin_city_name, fl_num, year, month, day_of_month, sz, tpath +POSTHOOK: query: -- 1. basic Npath test +select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from npath(on flights_tiny distribute by fl_num @@ -69,7 +71,8 @@ Washington 7291 2010 10 27 2 [{"origin_city_name":"Washington","dest_city_name": Chicago 897 2010 10 20 4 [{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":20,"arr_delay":24.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":21,"arr_delay":77.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":22,"arr_delay":24.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":24,"arr_delay":113.0,"fl_num":"897"}] Chicago 897 2010 10 21 3 [{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":21,"arr_delay":77.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":22,"arr_delay":24.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":24,"arr_delay":113.0,"fl_num":"897"}] Chicago 897 2010 10 22 2 [{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":22,"arr_delay":24.0,"fl_num":"897"},{"origin_city_name":"Chicago","dest_city_name":"New York","year":2010,"month":10,"day_of_month":24,"arr_delay":113.0,"fl_num":"897"}] -PREHOOK: query: select origin_city_name, fl_num, year, month, day_of_month, sz, tpath +PREHOOK: query: -- 2. Npath on 1 partition +select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from npath(on flights_tiny sort by year, month, day_of_month @@ -81,7 +84,8 @@ where fl_num = 1142 PREHOOK: type: QUERY PREHOOK: Input: default@flights_tiny #### A masked pattern was here #### -POSTHOOK: query: select origin_city_name, fl_num, year, month, day_of_month, sz, tpath +POSTHOOK: query: -- 2. Npath on 1 partition +select origin_city_name, fl_num, year, month, day_of_month, sz, tpath from npath(on flights_tiny sort by year, month, day_of_month diff --git ql/src/test/results/clientpositive/ptf_rcfile.q.out ql/src/test/results/clientpositive/ptf_rcfile.q.out index 49dd80e..8d83f44 100644 --- ql/src/test/results/clientpositive/ptf_rcfile.q.out +++ ql/src/test/results/clientpositive/ptf_rcfile.q.out @@ -33,7 +33,8 @@ PREHOOK: Output: default@part_rc POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part.rc' overwrite into table part_rc POSTHOOK: type: LOAD POSTHOOK: Output: default@part_rc -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- testWindowingPTFWithPartRC +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -43,7 +44,8 @@ order by p_name) PREHOOK: type: QUERY PREHOOK: Input: default@part_rc #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- testWindowingPTFWithPartRC +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 diff --git ql/src/test/results/clientpositive/ptf_seqfile.q.out ql/src/test/results/clientpositive/ptf_seqfile.q.out index ee3d9b8..7d36c8f 100644 --- ql/src/test/results/clientpositive/ptf_seqfile.q.out +++ ql/src/test/results/clientpositive/ptf_seqfile.q.out @@ -33,7 +33,8 @@ PREHOOK: Output: default@part_seq POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part.seq' overwrite into table part_seq POSTHOOK: type: LOAD POSTHOOK: Output: default@part_seq -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- testWindowingPTFWithPartSeqFile +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 @@ -43,7 +44,8 @@ order by p_name) PREHOOK: type: QUERY PREHOOK: Input: default@part_seq #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- testWindowingPTFWithPartSeqFile +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, sum(p_retailprice) over (partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s1 diff --git ql/src/test/results/clientpositive/rand_partitionpruner1.q.out ql/src/test/results/clientpositive/rand_partitionpruner1.q.out index 08cc745..2f006c6 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner1.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner1.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: explain extended select * from src where rand(1) < 0.1 +PREHOOK: query: -- scanning un-partitioned data +explain extended select * from src where rand(1) < 0.1 PREHOOK: type: QUERY -POSTHOOK: query: explain extended select * from src where rand(1) < 0.1 +POSTHOOK: query: -- scanning un-partitioned data +explain extended select * from src where rand(1) < 0.1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_FUNCTION rand 1) 0.1)))) diff --git ql/src/test/results/clientpositive/rand_partitionpruner2.q.out ql/src/test/results/clientpositive/rand_partitionpruner2.q.out index c7512ec..3092947 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner2.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner2.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table tmptable(key string, value string, hr string, ds string) +PREHOOK: query: -- scanning partitioned data + +create table tmptable(key string, value string, hr string, ds string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tmptable(key string, value string, hr string, ds string) +POSTHOOK: query: -- scanning partitioned data + +create table tmptable(key string, value string, hr string, ds string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: explain extended diff --git ql/src/test/results/clientpositive/rand_partitionpruner3.q.out ql/src/test/results/clientpositive/rand_partitionpruner3.q.out index f432b9b..600a834 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner3.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner3.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: explain extended select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' +PREHOOK: query: -- complex predicates in the where clause + +explain extended select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' PREHOOK: type: QUERY -POSTHOOK: query: explain extended select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' +POSTHOOK: query: -- complex predicates in the where clause + +explain extended select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_WHERE (and (and (and (< (TOK_FUNCTION rand 1) 0.1) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08')) (not (or (> (TOK_TABLE_OR_COL key) 50) (< (TOK_TABLE_OR_COL key) 10)))) (like (. (TOK_TABLE_OR_COL a) hr) '%2'))))) @@ -123,9 +127,11 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 26 val_26 2008-04-08 12 18 val_18 2008-04-08 12 37 val_37 2008-04-08 12 -PREHOOK: query: explain extended select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' +PREHOOK: query: -- without rand for comparison +explain extended select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' PREHOOK: type: QUERY -POSTHOOK: query: explain extended select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' +POSTHOOK: query: -- without rand for comparison +explain extended select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2' POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (not (or (> (TOK_TABLE_OR_COL key) 50) (< (TOK_TABLE_OR_COL key) 10)))) (like (. (TOK_TABLE_OR_COL a) hr) '%2'))))) diff --git ql/src/test/results/clientpositive/rcfile_merge1.q.out ql/src/test/results/clientpositive/rcfile_merge1.q.out index c050270..385dde5 100644 --- ql/src/test/results/clientpositive/rcfile_merge1.q.out +++ ql/src/test/results/clientpositive/rcfile_merge1.q.out @@ -20,12 +20,14 @@ POSTHOOK: query: CREATE TABLE rcfile_merge1b (key INT, value STRING) PARTITIONED BY (ds STRING, part STRING) STORED AS RCFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@rcfile_merge1b -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Use non block-level merge +EXPLAIN INSERT OVERWRITE TABLE rcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 100) as part FROM src PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Use non block-level merge +EXPLAIN INSERT OVERWRITE TABLE rcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 100) as part FROM src @@ -1224,7 +1226,8 @@ POSTHOOK: Lineage: rcfile_merge1b PARTITION(ds=1,part=99).key EXPRESSION [(src)s POSTHOOK: Lineage: rcfile_merge1b PARTITION(ds=1,part=99).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge1b PARTITION(ds=1,part=9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge1b PARTITION(ds=1,part=9).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(HASH(c)) FROM ( +PREHOOK: query: -- Verify +SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM rcfile_merge1 WHERE ds='1' ) t @@ -1330,7 +1333,8 @@ PREHOOK: Input: default@rcfile_merge1@ds=1/part=97 PREHOOK: Input: default@rcfile_merge1@ds=1/part=98 PREHOOK: Input: default@rcfile_merge1@ds=1/part=99 #### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( +POSTHOOK: query: -- Verify +SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM rcfile_merge1 WHERE ds='1' ) t diff --git ql/src/test/results/clientpositive/reducesink_dedup.q.out ql/src/test/results/clientpositive/reducesink_dedup.q.out index a0b8169..32dfdd4 100644 --- ql/src/test/results/clientpositive/reducesink_dedup.q.out +++ ql/src/test/results/clientpositive/reducesink_dedup.q.out @@ -2,7 +2,8 @@ PREHOOK: query: DROP TABLE part PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE part POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE part( +PREHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -14,7 +15,8 @@ PREHOOK: query: CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part( +POSTHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, diff --git ql/src/test/results/clientpositive/rename_column.q.out ql/src/test/results/clientpositive/rename_column.q.out index 0892f03..f77ba41 100644 --- ql/src/test/results/clientpositive/rename_column.q.out +++ ql/src/test/results/clientpositive/rename_column.q.out @@ -151,9 +151,11 @@ src_thrift srcbucket srcbucket2 srcpart -PREHOOK: query: CREATE DATABASE kv_rename_test_db +PREHOOK: query: -- Using non-default Database +CREATE DATABASE kv_rename_test_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE kv_rename_test_db +POSTHOOK: query: -- Using non-default Database +CREATE DATABASE kv_rename_test_db POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE kv_rename_test_db PREHOOK: type: SWITCHDATABASE diff --git ql/src/test/results/clientpositive/rename_partition_location.q.out ql/src/test/results/clientpositive/rename_partition_location.q.out index fce291e..3086aa2 100644 --- ql/src/test/results/clientpositive/rename_partition_location.q.out +++ ql/src/test/results/clientpositive/rename_partition_location.q.out @@ -1,8 +1,14 @@ -PREHOOK: query: CREATE TABLE rename_partition_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: query: -- This test verifies that if the tables location changes, renaming a partition will not change +-- the partition location accordingly + +CREATE TABLE rename_partition_table (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE #### A masked pattern was here #### PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE rename_partition_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: query: -- This test verifies that if the tables location changes, renaming a partition will not change +-- the partition location accordingly + +CREATE TABLE rename_partition_table (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE #### A masked pattern was here #### POSTHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/rename_table_location.q.out ql/src/test/results/clientpositive/rename_table_location.q.out index 51cfff2..bf79ea3 100644 --- ql/src/test/results/clientpositive/rename_table_location.q.out +++ ql/src/test/results/clientpositive/rename_table_location.q.out @@ -1,8 +1,14 @@ -PREHOOK: query: CREATE TABLE rename_partition_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: query: -- This test verifies that if the tables location changes, renaming a table will not change +-- the table location scheme + +CREATE TABLE rename_partition_table (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE #### A masked pattern was here #### PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE rename_partition_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: query: -- This test verifies that if the tables location changes, renaming a table will not change +-- the table location scheme + +CREATE TABLE rename_partition_table (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE #### A masked pattern was here #### POSTHOOK: type: CREATETABLE @@ -27,7 +33,10 @@ POSTHOOK: Input: default@rename_partition_table POSTHOOK: Output: default@rename_partition_table POSTHOOK: Lineage: rename_partition_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rename_partition_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: ALTER TABLE rename_partition_table RENAME TO rename_partition_table_renamed +PREHOOK: query: -- If the metastore attempts to change the scheme of the table back to the default pfile, it will get +-- an exception related to the source and destination file systems not matching + +ALTER TABLE rename_partition_table RENAME TO rename_partition_table_renamed PREHOOK: type: ALTERTABLE_RENAME PREHOOK: Input: default@rename_partition_table PREHOOK: Output: default@rename_partition_table diff --git ql/src/test/results/clientpositive/sample1.q.out ql/src/test/results/clientpositive/sample1.q.out index e8eeb39..113e641 100644 --- ql/src/test/results/clientpositive/sample1.q.out +++ ql/src/test/results/clientpositive/sample1.q.out @@ -3,12 +3,14 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING, dt STRING, hr STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- no input pruning, no sample filter +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s WHERE s.ds='2008-04-08' and s.hr='11' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- no input pruning, no sample filter +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s WHERE s.ds='2008-04-08' and s.hr='11' diff --git ql/src/test/results/clientpositive/sample10.q.out ql/src/test/results/clientpositive/sample10.q.out index b58be83..e4fecbe 100644 --- ql/src/test/results/clientpositive/sample10.q.out +++ ql/src/test/results/clientpositive/sample10.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets PREHOOK: type: CREATETABLE -POSTHOOK: query: create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) + +create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@srcpartbucket PREHOOK: query: insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10 diff --git ql/src/test/results/clientpositive/sample2.q.out ql/src/test/results/clientpositive/sample2.q.out index 0a1cb5a..63f13f9 100644 --- ql/src/test/results/clientpositive/sample2.q.out +++ ql/src/test/results/clientpositive/sample2.q.out @@ -3,11 +3,15 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- input pruning, no sample filter +-- default table sample columns +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2) s PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- input pruning, no sample filter +-- default table sample columns +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2) s POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/sample3.q.out ql/src/test/results/clientpositive/sample3.q.out index f608e2d..187baa6 100644 --- ql/src/test/results/clientpositive/sample3.q.out +++ ql/src/test/results/clientpositive/sample3.q.out @@ -1,8 +1,10 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: -- no input pruning, sample filter +EXPLAIN SELECT s.key FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- no input pruning, sample filter +EXPLAIN SELECT s.key FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/sample4.q.out ql/src/test/results/clientpositive/sample4.q.out index 496c5f4..2bfc93b 100644 --- ql/src/test/results/clientpositive/sample4.q.out +++ ql/src/test/results/clientpositive/sample4.q.out @@ -3,11 +3,15 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- bucket column is the same as table sample +-- No need for sample filter +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2 on key) s PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- bucket column is the same as table sample +-- No need for sample filter +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2 on key) s POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/sample5.q.out ql/src/test/results/clientpositive/sample5.q.out index 49c4bf1..5560cc0 100644 --- ql/src/test/results/clientpositive/sample5.q.out +++ ql/src/test/results/clientpositive/sample5.q.out @@ -3,12 +3,14 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest1 SELECT s.* +PREHOOK: query: -- no input pruning, sample filter +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest1 SELECT s.* -- here's another test FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest1 SELECT s.* +POSTHOOK: query: -- no input pruning, sample filter +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest1 SELECT s.* -- here's another test FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -293,12 +295,12 @@ STAGE PLANS: #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* +PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* -- here's another test FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s PREHOOK: type: QUERY PREHOOK: Input: default@srcbucket PREHOOK: Output: default@dest1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* +POSTHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* -- here's another test FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket diff --git ql/src/test/results/clientpositive/sample6.q.out ql/src/test/results/clientpositive/sample6.q.out index 5663ae6..cd78d8b 100644 --- ql/src/test/results/clientpositive/sample6.q.out +++ ql/src/test/results/clientpositive/sample6.q.out @@ -3,11 +3,13 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- both input pruning and sample filter +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- both input pruning and sample filter +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/sample7.q.out ql/src/test/results/clientpositive/sample7.q.out index 9276210..e3cc5d0 100644 --- ql/src/test/results/clientpositive/sample7.q.out +++ ql/src/test/results/clientpositive/sample7.q.out @@ -3,12 +3,14 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- both input pruning and sample filter +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s WHERE s.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- both input pruning and sample filter +EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s WHERE s.key > 100 diff --git ql/src/test/results/clientpositive/sample8.q.out ql/src/test/results/clientpositive/sample8.q.out index 58c1135..8f26dc8 100644 --- ql/src/test/results/clientpositive/sample8.q.out +++ ql/src/test/results/clientpositive/sample8.q.out @@ -1,4 +1,5 @@ -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- sampling with join and alias +EXPLAIN EXTENDED SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON key) s JOIN srcpart TABLESAMPLE (BUCKET 1 OUT OF 10 ON key) t @@ -6,7 +7,8 @@ WHERE t.key = s.key and t.value = s.value and s.ds='2008-04-08' and s.hr='11' an DISTRIBUTE BY key, value SORT BY key, value PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- sampling with join and alias +EXPLAIN EXTENDED SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON key) s JOIN srcpart TABLESAMPLE (BUCKET 1 OUT OF 10 ON key) t diff --git ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out index 86fc077..96956b2 100644 --- ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out +++ ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out @@ -2,9 +2,27 @@ PREHOOK: query: USE default PREHOOK: type: SWITCHDATABASE POSTHOOK: query: USE default POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: create table sih_i_part (key int, value string) partitioned by (p string) +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create file inputs +create table sih_i_part (key int, value string) partitioned by (p string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table sih_i_part (key int, value string) partitioned by (p string) +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create file inputs +create table sih_i_part (key int, value string) partitioned by (p string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@sih_i_part PREHOOK: query: insert overwrite table sih_i_part partition (p='1') select key, value from src @@ -75,18 +93,21 @@ POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchem POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(1) from sih_src tablesample(1 percent) +PREHOOK: query: -- Sample split, running locally limited by num tasks +select count(1) from sih_src tablesample(1 percent) PREHOOK: type: QUERY PREHOOK: Input: default@sih_src #### A masked pattern was here #### 1500 -PREHOOK: query: select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key +PREHOOK: query: -- sample two tables +select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@sih_src PREHOOK: Input: default@sih_src2 #### A masked pattern was here #### 3084 -PREHOOK: query: select count(1) from sih_src tablesample(1 percent) +PREHOOK: query: -- sample split, running locally limited by max bytes +select count(1) from sih_src tablesample(1 percent) PREHOOK: type: QUERY PREHOOK: Input: default@sih_src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/script_env_var1.q.out ql/src/test/results/clientpositive/script_env_var1.q.out index 082f3cc..071aa23 100644 --- ql/src/test/results/clientpositive/script_env_var1.q.out +++ ql/src/test/results/clientpositive/script_env_var1.q.out @@ -1,10 +1,14 @@ -PREHOOK: query: SELECT count(1) FROM +PREHOOK: query: -- Verifies that script operator ID environment variables have unique values +-- in each instance of the script operator. +SELECT count(1) FROM ( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT count(1) FROM +POSTHOOK: query: -- Verifies that script operator ID environment variables have unique values +-- in each instance of the script operator. +SELECT count(1) FROM ( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/script_env_var2.q.out ql/src/test/results/clientpositive/script_env_var2.q.out index df99e7c..8e2b8e1 100644 --- ql/src/test/results/clientpositive/script_env_var2.q.out +++ ql/src/test/results/clientpositive/script_env_var2.q.out @@ -1,10 +1,12 @@ -PREHOOK: query: SELECT count(1) FROM +PREHOOK: query: -- Same test as script_env_var1, but test setting the variable name +SELECT count(1) FROM ( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT count(1) FROM +POSTHOOK: query: -- Same test as script_env_var1, but test setting the variable name +SELECT count(1) FROM ( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/script_pipe.q.out ql/src/test/results/clientpositive/script_pipe.q.out index 925cec3..796d6ed 100644 --- ql/src/test/results/clientpositive/script_pipe.q.out +++ ql/src/test/results/clientpositive/script_pipe.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: EXPLAIN SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp +PREHOOK: query: -- Tests exception in ScriptOperator.close() by passing to the operator a small amount of data +EXPLAIN SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp +POSTHOOK: query: -- Tests exception in ScriptOperator.close() by passing to the operator a small amount of data +EXPLAIN SELECT TRANSFORM(*) USING 'true' AS a, b, c FROM (SELECT * FROM src LIMIT 1) tmp POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 1))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST TOK_ALLCOLREF) TOK_SERDE TOK_RECORDWRITER 'true' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST a b c)))))) @@ -59,9 +61,11 @@ STAGE PLANS: limit: -1 -PREHOOK: query: EXPLAIN SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src +PREHOOK: query: -- Tests exception in ScriptOperator.processOp() by passing extra data needed to fill pipe buffer +EXPLAIN SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src +POSTHOOK: query: -- Tests exception in ScriptOperator.processOp() by passing extra data needed to fill pipe buffer +EXPLAIN SELECT TRANSFORM(key, value, key, value, key, value, key, value, key, value, key, value) USING 'head -n 1' as a,b,c,d FROM src POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) TOK_SERDE TOK_RECORDWRITER 'head -n 1' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST a b c d)))))) diff --git ql/src/test/results/clientpositive/semicolon.q.out ql/src/test/results/clientpositive/semicolon.q.out index 580f0f3..5889833 100644 --- ql/src/test/results/clientpositive/semicolon.q.out +++ ql/src/test/results/clientpositive/semicolon.q.out @@ -1,8 +1,14 @@ -PREHOOK: query: SELECT COUNT(1) FROM src +PREHOOK: query: -- comment +-- comment; +-- comment +SELECT COUNT(1) FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT COUNT(1) FROM src +POSTHOOK: query: -- comment +-- comment; +-- comment +SELECT COUNT(1) FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/show_columns.q.out ql/src/test/results/clientpositive/show_columns.q.out index 4970acb..10bd3ce 100644 --- ql/src/test/results/clientpositive/show_columns.q.out +++ ql/src/test/results/clientpositive/show_columns.q.out @@ -37,9 +37,11 @@ POSTHOOK: Input: default@shcol_test key value ds -PREHOOK: query: CREATE DATABASE test_db +PREHOOK: query: -- SHOW COLUMNS +CREATE DATABASE test_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE test_db +POSTHOOK: query: -- SHOW COLUMNS +CREATE DATABASE test_db POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE test_db PREHOOK: type: SWITCHDATABASE @@ -50,9 +52,11 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE foo(a INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: test_db@foo -PREHOOK: query: USE test_db +PREHOOK: query: -- SHOW COLUMNS basic syntax tests +USE test_db PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE test_db +POSTHOOK: query: -- SHOW COLUMNS basic syntax tests +USE test_db POSTHOOK: type: SWITCHDATABASE PREHOOK: query: SHOW COLUMNS from foo PREHOOK: type: SHOWCOLUMNS @@ -72,9 +76,11 @@ POSTHOOK: Input: test_db@foo # col_name a -PREHOOK: query: CREATE DATABASE `database` +PREHOOK: query: -- SHOW COLUMNS from a database with a name that requires escaping +CREATE DATABASE `database` PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE `database` +POSTHOOK: query: -- SHOW COLUMNS from a database with a name that requires escaping +CREATE DATABASE `database` POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE `database` PREHOOK: type: SWITCHDATABASE diff --git ql/src/test/results/clientpositive/show_create_table_alter.q.out ql/src/test/results/clientpositive/show_create_table_alter.q.out index f345cb9..8ed807d 100644 --- ql/src/test/results/clientpositive/show_create_table_alter.q.out +++ ql/src/test/results/clientpositive/show_create_table_alter.q.out @@ -1,7 +1,11 @@ -PREHOOK: query: CREATE EXTERNAL TABLE tmp_showcrt1 (key smallint, value float) +PREHOOK: query: -- Test SHOW CREATE TABLE on an external, clustered and sorted table. Then test the query again after ALTERs. + +CREATE EXTERNAL TABLE tmp_showcrt1 (key smallint, value float) CLUSTERED BY (key) SORTED BY (value DESC) INTO 5 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE EXTERNAL TABLE tmp_showcrt1 (key smallint, value float) +POSTHOOK: query: -- Test SHOW CREATE TABLE on an external, clustered and sorted table. Then test the query again after ALTERs. + +CREATE EXTERNAL TABLE tmp_showcrt1 (key smallint, value float) CLUSTERED BY (key) SORTED BY (value DESC) INTO 5 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmp_showcrt1 @@ -29,11 +33,13 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( #### A masked pattern was here #### -PREHOOK: query: ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('comment'='temporary table', 'EXTERNAL'='FALSE') +PREHOOK: query: -- Add a comment to the table, change the EXTERNAL property, and test SHOW CREATE TABLE on the change. +ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('comment'='temporary table', 'EXTERNAL'='FALSE') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@tmp_showcrt1 PREHOOK: Output: default@tmp_showcrt1 -POSTHOOK: query: ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('comment'='temporary table', 'EXTERNAL'='FALSE') +POSTHOOK: query: -- Add a comment to the table, change the EXTERNAL property, and test SHOW CREATE TABLE on the change. +ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('comment'='temporary table', 'EXTERNAL'='FALSE') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@tmp_showcrt1 POSTHOOK: Output: default@tmp_showcrt1 @@ -63,11 +69,13 @@ LOCATION TBLPROPERTIES ( 'EXTERNAL'='FALSE', #### A masked pattern was here #### -PREHOOK: query: ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('comment'='changed comment', 'EXTERNAL'='TRUE') +PREHOOK: query: -- Alter the table comment, change the EXTERNAL property back and test SHOW CREATE TABLE on the change. +ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('comment'='changed comment', 'EXTERNAL'='TRUE') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@tmp_showcrt1 PREHOOK: Output: default@tmp_showcrt1 -POSTHOOK: query: ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('comment'='changed comment', 'EXTERNAL'='TRUE') +POSTHOOK: query: -- Alter the table comment, change the EXTERNAL property back and test SHOW CREATE TABLE on the change. +ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('comment'='changed comment', 'EXTERNAL'='TRUE') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@tmp_showcrt1 POSTHOOK: Output: default@tmp_showcrt1 @@ -96,11 +104,13 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( #### A masked pattern was here #### -PREHOOK: query: ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('SORTBUCKETCOLSPREFIX'='FALSE') +PREHOOK: query: -- Change the 'SORTBUCKETCOLSPREFIX' property and test SHOW CREATE TABLE. The output should not change. +ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('SORTBUCKETCOLSPREFIX'='FALSE') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@tmp_showcrt1 PREHOOK: Output: default@tmp_showcrt1 -POSTHOOK: query: ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('SORTBUCKETCOLSPREFIX'='FALSE') +POSTHOOK: query: -- Change the 'SORTBUCKETCOLSPREFIX' property and test SHOW CREATE TABLE. The output should not change. +ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('SORTBUCKETCOLSPREFIX'='FALSE') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@tmp_showcrt1 POSTHOOK: Output: default@tmp_showcrt1 @@ -129,11 +139,13 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( #### A masked pattern was here #### -PREHOOK: query: ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('storage_handler'='org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler') +PREHOOK: query: -- Alter the storage handler of the table, and test SHOW CREATE TABLE. +ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('storage_handler'='org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@tmp_showcrt1 PREHOOK: Output: default@tmp_showcrt1 -POSTHOOK: query: ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('storage_handler'='org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler') +POSTHOOK: query: -- Alter the storage handler of the table, and test SHOW CREATE TABLE. +ALTER TABLE tmp_showcrt1 SET TBLPROPERTIES ('storage_handler'='org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@tmp_showcrt1 POSTHOOK: Output: default@tmp_showcrt1 diff --git ql/src/test/results/clientpositive/show_create_table_db_table.q.out ql/src/test/results/clientpositive/show_create_table_db_table.q.out index 20e1099..77b91bd 100644 --- ql/src/test/results/clientpositive/show_create_table_db_table.q.out +++ ql/src/test/results/clientpositive/show_create_table_db_table.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE DATABASE tmp_feng comment 'for show create table test' +PREHOOK: query: -- Test SHOW CREATE TABLE on a table name of format "db.table". + +CREATE DATABASE tmp_feng comment 'for show create table test' PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE tmp_feng comment 'for show create table test' +POSTHOOK: query: -- Test SHOW CREATE TABLE on a table name of format "db.table". + +CREATE DATABASE tmp_feng comment 'for show create table test' POSTHOOK: type: CREATEDATABASE PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES diff --git ql/src/test/results/clientpositive/show_create_table_delimited.q.out ql/src/test/results/clientpositive/show_create_table_delimited.q.out index 2a0d163..3329825 100644 --- ql/src/test/results/clientpositive/show_create_table_delimited.q.out +++ ql/src/test/results/clientpositive/show_create_table_delimited.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint) +PREHOOK: query: -- Test SHOW CREATE TABLE on a table with delimiters, stored format, and location. + +CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY '|' MAP KEYS TERMINATED BY '\045' LINES TERMINATED BY '\n' STORED AS textfile #### A masked pattern was here #### PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint) +POSTHOOK: query: -- Test SHOW CREATE TABLE on a table with delimiters, stored format, and location. + +CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY '|' MAP KEYS TERMINATED BY '\045' LINES TERMINATED BY '\n' STORED AS textfile #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/show_create_table_partitioned.q.out ql/src/test/results/clientpositive/show_create_table_partitioned.q.out index bf02e28..99533c7 100644 --- ql/src/test/results/clientpositive/show_create_table_partitioned.q.out +++ ql/src/test/results/clientpositive/show_create_table_partitioned.q.out @@ -1,8 +1,12 @@ -PREHOOK: query: CREATE EXTERNAL TABLE tmp_showcrt1 (key string, newvalue boolean COMMENT 'a new value') +PREHOOK: query: -- Test SHOW CREATE TABLE on a table with partitions and column comments. + +CREATE EXTERNAL TABLE tmp_showcrt1 (key string, newvalue boolean COMMENT 'a new value') COMMENT 'temporary table' PARTITIONED BY (value bigint COMMENT 'some value') PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE EXTERNAL TABLE tmp_showcrt1 (key string, newvalue boolean COMMENT 'a new value') +POSTHOOK: query: -- Test SHOW CREATE TABLE on a table with partitions and column comments. + +CREATE EXTERNAL TABLE tmp_showcrt1 (key string, newvalue boolean COMMENT 'a new value') COMMENT 'temporary table' PARTITIONED BY (value bigint COMMENT 'some value') POSTHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/show_create_table_serde.q.out ql/src/test/results/clientpositive/show_create_table_serde.q.out index ea294e6..4579714 100644 --- ql/src/test/results/clientpositive/show_create_table_serde.q.out +++ ql/src/test/results/clientpositive/show_create_table_serde.q.out @@ -1,10 +1,16 @@ -PREHOOK: query: CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint) +PREHOOK: query: -- Test SHOW CREATE TABLE on a table with serde. + +-- without a storage handler +CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint) COMMENT 'temporary table' ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint) +POSTHOOK: query: -- Test SHOW CREATE TABLE on a table with serde. + +-- without a storage handler +CREATE TABLE tmp_showcrt1 (key int, value string, newvalue bigint) COMMENT 'temporary table' ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' @@ -40,12 +46,14 @@ POSTHOOK: query: DROP TABLE tmp_showcrt1 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tmp_showcrt1 POSTHOOK: Output: default@tmp_showcrt1 -PREHOOK: query: CREATE EXTERNAL TABLE tmp_showcrt1 (key string, value boolean) +PREHOOK: query: -- with a storage handler and serde properties +CREATE EXTERNAL TABLE tmp_showcrt1 (key string, value boolean) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED BY 'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler' WITH SERDEPROPERTIES ('field.delim'=',', 'serialization.format'='$') PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE EXTERNAL TABLE tmp_showcrt1 (key string, value boolean) +POSTHOOK: query: -- with a storage handler and serde properties +CREATE EXTERNAL TABLE tmp_showcrt1 (key string, value boolean) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' STORED BY 'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler' WITH SERDEPROPERTIES ('field.delim'=',', 'serialization.format'='$') diff --git ql/src/test/results/clientpositive/show_create_table_view.q.out ql/src/test/results/clientpositive/show_create_table_view.q.out index 9398281..035d454 100644 --- ql/src/test/results/clientpositive/show_create_table_view.q.out +++ ql/src/test/results/clientpositive/show_create_table_view.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE VIEW tmp_copy_src AS SELECT * FROM src +PREHOOK: query: -- Test SHOW CREATE TABLE on a view name. + +CREATE VIEW tmp_copy_src AS SELECT * FROM src PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW tmp_copy_src AS SELECT * FROM src +POSTHOOK: query: -- Test SHOW CREATE TABLE on a view name. + +CREATE VIEW tmp_copy_src AS SELECT * FROM src POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@tmp_copy_src PREHOOK: query: SHOW CREATE TABLE tmp_copy_src diff --git ql/src/test/results/clientpositive/show_tables.q.out ql/src/test/results/clientpositive/show_tables.q.out index cff8e96..d434c0e 100644 --- ql/src/test/results/clientpositive/show_tables.q.out +++ ql/src/test/results/clientpositive/show_tables.q.out @@ -70,9 +70,11 @@ POSTHOOK: query: SHOW TABLES LIKE 'shtb_test1|shtb_test2' POSTHOOK: type: SHOWTABLES shtb_test1 shtb_test2 -PREHOOK: query: CREATE DATABASE test_db +PREHOOK: query: -- SHOW TABLES FROM/IN database +CREATE DATABASE test_db PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE test_db +POSTHOOK: query: -- SHOW TABLES FROM/IN database +CREATE DATABASE test_db POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE test_db PREHOOK: type: SWITCHDATABASE @@ -93,9 +95,11 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE baz(a INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: test_db@baz -PREHOOK: query: USE default +PREHOOK: query: -- SHOW TABLES basic syntax tests +USE default PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default +POSTHOOK: query: -- SHOW TABLES basic syntax tests +USE default POSTHOOK: type: SWITCHDATABASE PREHOOK: query: SHOW TABLES FROM test_db PREHOOK: type: SHOWTABLES @@ -147,9 +151,11 @@ PREHOOK: query: SHOW TABLES IN test_db LIKE "nomatch" PREHOOK: type: SHOWTABLES POSTHOOK: query: SHOW TABLES IN test_db LIKE "nomatch" POSTHOOK: type: SHOWTABLES -PREHOOK: query: CREATE DATABASE `database` +PREHOOK: query: -- SHOW TABLES from a database with a name that requires escaping +CREATE DATABASE `database` PREHOOK: type: CREATEDATABASE -POSTHOOK: query: CREATE DATABASE `database` +POSTHOOK: query: -- SHOW TABLES from a database with a name that requires escaping +CREATE DATABASE `database` POSTHOOK: type: CREATEDATABASE PREHOOK: query: USE `database` PREHOOK: type: SWITCHDATABASE diff --git ql/src/test/results/clientpositive/skewjoinopt1.q.out ql/src/test/results/clientpositive/skewjoinopt1.q.out index 0ebec74..4438256 100644 --- ql/src/test/results/clientpositive/skewjoinopt1.q.out +++ ql/src/test/results/clientpositive/skewjoinopt1.q.out @@ -24,10 +24,16 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- a simple join query with skew on both the tables on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- a simple join query with skew on both the tables on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -233,10 +239,14 @@ POSTHOOK: Input: default@t2 8 18 8 18 8 28 8 18 8 28 8 18 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- test outer joins also + +EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- test outer joins also + +EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -444,10 +454,14 @@ NULL NULL 5 15 8 18 8 18 8 28 8 18 8 28 8 18 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- an aggregation at the end should not change anything + +EXPLAIN SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- an aggregation at the end should not change anything + +EXPLAIN SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt10.q.out ql/src/test/results/clientpositive/skewjoinopt10.q.out index db8bd89..26d7a4b 100644 --- ql/src/test/results/clientpositive/skewjoinopt10.q.out +++ ql/src/test/results/clientpositive/skewjoinopt10.q.out @@ -28,10 +28,16 @@ POSTHOOK: Input: default@t1 POSTHOOK: Output: default@array_valued_t1 POSTHOOK: Lineage: array_valued_t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: array_valued_t1.value EXPRESSION [(t1)t1.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed by a lateral view +-- adding a order by at the end to make the results deterministic + +explain select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed by a lateral view +-- adding a order by at the end to make the results deterministic + +explain select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val POSTHOOK: type: QUERY POSTHOOK: Lineage: array_valued_t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/skewjoinopt11.q.out ql/src/test/results/clientpositive/skewjoinopt11.q.out index 0ca2088..f1d5408 100644 --- ql/src/test/results/clientpositive/skewjoinopt11.q.out +++ ql/src/test/results/clientpositive/skewjoinopt11.q.out @@ -22,7 +22,12 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed +-- by a union. Both sides of a union consist of a join, which should have used +-- skew join compile time optimization. +-- adding an order by at the end to make the results deterministic + +EXPLAIN select * from ( select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key @@ -30,7 +35,12 @@ select * from select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key ) subq1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed +-- by a union. Both sides of a union consist of a join, which should have used +-- skew join compile time optimization. +-- adding an order by at the end to make the results deterministic + +EXPLAIN select * from ( select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key diff --git ql/src/test/results/clientpositive/skewjoinopt12.q.out ql/src/test/results/clientpositive/skewjoinopt12.q.out index c100463..b99f275 100644 --- ql/src/test/results/clientpositive/skewjoinopt12.q.out +++ ql/src/test/results/clientpositive/skewjoinopt12.q.out @@ -24,10 +24,18 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values +-- is common to both the tables. The join key matches the skewed key set. +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values +-- is common to both the tables. The join key matches the skewed key set. +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt13.q.out ql/src/test/results/clientpositive/skewjoinopt13.q.out index 9f4fff9..b816ae3 100644 --- ql/src/test/results/clientpositive/skewjoinopt13.q.out +++ ql/src/test/results/clientpositive/skewjoinopt13.q.out @@ -33,13 +33,27 @@ PREHOOK: Output: default@t3 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3 POSTHOOK: type: LOAD POSTHOOK: Output: default@t3 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- This test is for skewed join compile time optimization for more than 2 tables. +-- The join key for table 3 is different from the join key used for joining +-- tables 1 and 2. Table 3 is skewed, but since one of the join sources for table +-- 3 consist of a sub-query which contains a join, the compile time skew join +-- optimization is not performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN select * from T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- This test is for skewed join compile time optimization for more than 2 tables. +-- The join key for table 3 is different from the join key used for joining +-- tables 1 and 2. Table 3 is skewed, but since one of the join sources for table +-- 3 consist of a sub-query which contains a join, the compile time skew join +-- optimization is not performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN select * from T1 a join T2 b on a.key = b.key diff --git ql/src/test/results/clientpositive/skewjoinopt14.q.out ql/src/test/results/clientpositive/skewjoinopt14.q.out index a2da6ca..1b83f95 100644 --- ql/src/test/results/clientpositive/skewjoinopt14.q.out +++ ql/src/test/results/clientpositive/skewjoinopt14.q.out @@ -35,13 +35,29 @@ PREHOOK: Output: default@t3 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3 POSTHOOK: type: LOAD POSTHOOK: Output: default@t3 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- This test is for skewed join compile time optimization for more than 2 tables. +-- The join key for table 3 is different from the join key used for joining +-- tables 1 and 2. Tables 1 and 3 are skewed. Since one of the join sources for table +-- 3 consist of a sub-query which contains a join, the compile time skew join +-- optimization is not enabled for table 3, but it is used for the first join between +-- tables 1 and 2 +-- adding a order by at the end to make the results deterministic + +EXPLAIN select * from T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- This test is for skewed join compile time optimization for more than 2 tables. +-- The join key for table 3 is different from the join key used for joining +-- tables 1 and 2. Tables 1 and 3 are skewed. Since one of the join sources for table +-- 3 consist of a sub-query which contains a join, the compile time skew join +-- optimization is not enabled for table 3, but it is used for the first join between +-- tables 1 and 2 +-- adding a order by at the end to make the results deterministic + +EXPLAIN select * from T1 a join T2 b on a.key = b.key diff --git ql/src/test/results/clientpositive/skewjoinopt15.q.out ql/src/test/results/clientpositive/skewjoinopt15.q.out index 2b9fd3b..ebd09bc 100644 --- ql/src/test/results/clientpositive/skewjoinopt15.q.out +++ ql/src/test/results/clientpositive/skewjoinopt15.q.out @@ -9,9 +9,11 @@ PREHOOK: Output: default@tmpt1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE tmpT1 POSTHOOK: type: LOAD POSTHOOK: Output: default@tmpt1 -PREHOOK: query: CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) +PREHOOK: query: -- testing skew on other data types - int +CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) +POSTHOOK: query: -- testing skew on other data types - int +CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T1 PREHOOK: query: INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1 @@ -58,10 +60,22 @@ POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:st POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key EXPRESSION [(tmpt2)tmpt2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- The skewed key is a integer column. +-- Otherwise this test is similar to skewjoinopt1.q +-- Both the joined tables are skewed, and the joined column +-- is an integer +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- The skewed key is a integer column. +-- Otherwise this test is similar to skewjoinopt1.q +-- Both the joined tables are skewed, and the joined column +-- is an integer +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ] @@ -275,10 +289,14 @@ POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string 8 18 8 18 8 28 8 18 8 28 8 18 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- test outer joins also + +EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- test outer joins also + +EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ] @@ -494,10 +512,14 @@ NULL NULL 5 15 8 18 8 18 8 28 8 18 8 28 8 18 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- an aggregation at the end should not change anything + +EXPLAIN SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- an aggregation at the end should not change anything + +EXPLAIN SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/skewjoinopt16.q.out ql/src/test/results/clientpositive/skewjoinopt16.q.out index a285398..9e4da52 100644 --- ql/src/test/results/clientpositive/skewjoinopt16.q.out +++ ql/src/test/results/clientpositive/skewjoinopt16.q.out @@ -24,10 +24,18 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the both the columns +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the both the columns +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt17.q.out ql/src/test/results/clientpositive/skewjoinopt17.q.out index 8327115..9ec4de8 100644 --- ql/src/test/results/clientpositive/skewjoinopt17.q.out +++ ql/src/test/results/clientpositive/skewjoinopt17.q.out @@ -24,10 +24,22 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the first skewed column +-- The skewed value for the jon key is common to both the tables. +-- In this case, the skewed join value is not repeated in the filter. +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the first skewed column +-- The skewed value for the jon key is common to both the tables. +-- In this case, the skewed join value is not repeated in the filter. +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -275,10 +287,18 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the both the columns +-- In this case, the skewed join value is repeated in the filter. + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the both the columns +-- In this case, the skewed join value is repeated in the filter. + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt18.q.out ql/src/test/results/clientpositive/skewjoinopt18.q.out index b0acba9..a8d43c3 100644 --- ql/src/test/results/clientpositive/skewjoinopt18.q.out +++ ql/src/test/results/clientpositive/skewjoinopt18.q.out @@ -9,9 +9,11 @@ PREHOOK: Output: default@tmpt1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE tmpT1 POSTHOOK: type: LOAD POSTHOOK: Output: default@tmpt1 -PREHOOK: query: CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) +PREHOOK: query: -- testing skew on other data types - int +CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) +POSTHOOK: query: -- testing skew on other data types - int +CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T1 PREHOOK: query: INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1 @@ -24,10 +26,16 @@ POSTHOOK: Input: default@tmpt1 POSTHOOK: Output: default@t1 POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +PREHOOK: query: -- Tke skewed column is same in both the tables, however it is +-- INT in one of the tables, and STRING in the other table + +CREATE TABLE T2(key STRING, val STRING) SKEWED BY (key) ON ((3)) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +POSTHOOK: query: -- Tke skewed column is same in both the tables, however it is +-- INT in one of the tables, and STRING in the other table + +CREATE TABLE T2(key STRING, val STRING) SKEWED BY (key) ON ((3)) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T2 @@ -41,10 +49,20 @@ POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Once HIVE-3445 is fixed, the compile time skew join optimization would be +-- applicable here. Till the above jira is fixed, it would be performed as a +-- regular join +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Once HIVE-3445 is fixed, the compile time skew join optimization would be +-- applicable here. Till the above jira is fixed, it would be performed as a +-- regular join +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/skewjoinopt19.q.out ql/src/test/results/clientpositive/skewjoinopt19.q.out index 7dfd167..a5eed8d 100644 --- ql/src/test/results/clientpositive/skewjoinopt19.q.out +++ ql/src/test/results/clientpositive/skewjoinopt19.q.out @@ -24,10 +24,20 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- add a test where the skewed key is also the bucketized key +-- it should not matter, and the compile time skewed join +-- optimization is performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- add a test where the skewed key is also the bucketized key +-- it should not matter, and the compile time skewed join +-- optimization is performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt2.q.out ql/src/test/results/clientpositive/skewjoinopt2.q.out index a8c2a46..7ffe8fa 100644 --- ql/src/test/results/clientpositive/skewjoinopt2.q.out +++ ql/src/test/results/clientpositive/skewjoinopt2.q.out @@ -24,10 +24,22 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- a simple query with skew on both the tables on the join key +-- multiple skew values are present for the skewed keys +-- but the skewed values do not overlap. +-- The join values are a superset of the skewed keys. +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- a simple query with skew on both the tables on the join key +-- multiple skew values are present for the skewed keys +-- but the skewed values do not overlap. +-- The join values are a superset of the skewed keys. +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -245,10 +257,14 @@ POSTHOOK: Input: default@t2 3 13 3 13 8 18 8 18 8 18 8 18 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- test outer joins also + +EXPLAIN SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- test outer joins also + +EXPLAIN SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -470,10 +486,14 @@ POSTHOOK: Input: default@t2 8 28 NULL NULL 8 18 8 18 8 18 8 18 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- a group by at the end should not change anything + +EXPLAIN SELECT a.key, count(1) FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- a group by at the end should not change anything + +EXPLAIN SELECT a.key, count(1) FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt20.q.out ql/src/test/results/clientpositive/skewjoinopt20.q.out index 53a557f..3965470 100644 --- ql/src/test/results/clientpositive/skewjoinopt20.q.out +++ ql/src/test/results/clientpositive/skewjoinopt20.q.out @@ -24,10 +24,20 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- add a test where the skewed key is also the bucketized/sorted key +-- it should not matter, and the compile time skewed join +-- optimization is performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- add a test where the skewed key is also the bucketized/sorted key +-- it should not matter, and the compile time skewed join +-- optimization is performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt3.q.out ql/src/test/results/clientpositive/skewjoinopt3.q.out index 8e2dd43..1d21859 100644 --- ql/src/test/results/clientpositive/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/skewjoinopt3.q.out @@ -24,10 +24,20 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- a simple query with skew on both the tables. One of the skewed +-- value is common to both the tables. The skewed value should not be +-- repeated in the filter. +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- a simple query with skew on both the tables. One of the skewed +-- value is common to both the tables. The skewed value should not be +-- repeated in the filter. +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -233,10 +243,14 @@ POSTHOOK: Input: default@t2 8 18 8 18 8 28 8 18 8 28 8 18 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- test outer joins also + +EXPLAIN SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- test outer joins also + +EXPLAIN SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt4.q.out ql/src/test/results/clientpositive/skewjoinopt4.q.out index d72a7c6..3d91729 100644 --- ql/src/test/results/clientpositive/skewjoinopt4.q.out +++ ql/src/test/results/clientpositive/skewjoinopt4.q.out @@ -22,10 +22,18 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- only of the tables of the join (the left table of the join) is skewed +-- the skewed filter would still be applied to both the tables +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- only of the tables of the join (the left table of the join) is skewed +-- the skewed filter would still be applied to both the tables +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -231,10 +239,12 @@ POSTHOOK: Input: default@t2 8 18 8 18 8 28 8 18 8 28 8 18 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- the order of the join should not matter, just confirming +EXPLAIN SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- the order of the join should not matter, just confirming +EXPLAIN SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt5.q.out ql/src/test/results/clientpositive/skewjoinopt5.q.out index e3ea463..70db0f6 100644 --- ql/src/test/results/clientpositive/skewjoinopt5.q.out +++ ql/src/test/results/clientpositive/skewjoinopt5.q.out @@ -24,10 +24,18 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the first skewed column +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the first skewed column +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt6.q.out ql/src/test/results/clientpositive/skewjoinopt6.q.out index 5ec116a..9def39c 100644 --- ql/src/test/results/clientpositive/skewjoinopt6.q.out +++ ql/src/test/results/clientpositive/skewjoinopt6.q.out @@ -24,10 +24,20 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values +-- is common to both the tables. The join key is a subset of the skewed key set: +-- it only contains the first skewed key for both the tables +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values +-- is common to both the tables. The join key is a subset of the skewed key set: +-- it only contains the first skewed key for both the tables +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt7.q.out ql/src/test/results/clientpositive/skewjoinopt7.q.out index 341e507..78d9afc 100644 --- ql/src/test/results/clientpositive/skewjoinopt7.q.out +++ ql/src/test/results/clientpositive/skewjoinopt7.q.out @@ -35,10 +35,20 @@ PREHOOK: Output: default@t3 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3 POSTHOOK: type: LOAD POSTHOOK: Output: default@t3 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- This test is for validating skewed join compile time optimization for more than +-- 2 tables. The join key is the same, and so a 3-way join would be performed. +-- 2 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- This test is for validating skewed join compile time optimization for more than +-- 2 tables. The join key is the same, and so a 3-way join would be performed. +-- 2 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt8.q.out ql/src/test/results/clientpositive/skewjoinopt8.q.out index 57f495b..726cc01 100644 --- ql/src/test/results/clientpositive/skewjoinopt8.q.out +++ ql/src/test/results/clientpositive/skewjoinopt8.q.out @@ -33,10 +33,20 @@ PREHOOK: Output: default@t3 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T3.txt' INTO TABLE T3 POSTHOOK: type: LOAD POSTHOOK: Output: default@t3 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- This test is for validating skewed join compile time optimization for more than +-- 2 tables. The join key is the same, and so a 3-way join would be performed. +-- 1 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- This test is for validating skewed join compile time optimization for more than +-- 2 tables. The join key is the same, and so a 3-way join would be performed. +-- 1 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: diff --git ql/src/test/results/clientpositive/skewjoinopt9.q.out ql/src/test/results/clientpositive/skewjoinopt9.q.out index a3111d7..c17d2a4 100644 --- ql/src/test/results/clientpositive/skewjoinopt9.q.out +++ ql/src/test/results/clientpositive/skewjoinopt9.q.out @@ -22,7 +22,10 @@ PREHOOK: Output: default@t2 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T2.txt' INTO TABLE T2 POSTHOOK: type: LOAD POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- no skew join compile time optimization would be performed if one of the +-- join sources is a sub-query consisting of a union all +-- adding a order by at the end to make the results deterministic +EXPLAIN select * from ( select key, val from T1 @@ -31,7 +34,10 @@ select key, val from T1 ) subq1 join T2 b on subq1.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- no skew join compile time optimization would be performed if one of the +-- join sources is a sub-query consisting of a union all +-- adding a order by at the end to make the results deterministic +EXPLAIN select * from ( select key, val from T1 @@ -186,14 +192,18 @@ POSTHOOK: Input: default@t2 8 28 8 18 8 28 8 18 8 28 8 18 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- no skew join compile time optimization would be performed if one of the +-- join sources is a sub-query consisting of a group by +EXPLAIN select * from ( select key, count(1) as cnt from T1 group by key ) subq1 join T2 b on subq1.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- no skew join compile time optimization would be performed if one of the +-- join sources is a sub-query consisting of a group by +EXPLAIN select * from ( select key, count(1) as cnt from T1 group by key diff --git ql/src/test/results/clientpositive/smb_mapjoin9.q.out ql/src/test/results/clientpositive/smb_mapjoin9.q.out index b23ecc8..9a7a793 100644 --- ql/src/test/results/clientpositive/smb_mapjoin9.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin9.q.out @@ -8,13 +8,15 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table hive_test_smb_bucket2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@hive_test_smb_bucket2 -PREHOOK: query: explain extended +PREHOOK: query: -- empty partitions (HIVE-3205) +explain extended SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 FROM hive_test_smb_bucket1 a JOIN hive_test_smb_bucket2 b ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: -- empty partitions (HIVE-3205) +explain extended SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 FROM hive_test_smb_bucket1 a JOIN hive_test_smb_bucket2 b diff --git ql/src/test/results/clientpositive/smb_mapjoin_10.q.out ql/src/test/results/clientpositive/smb_mapjoin_10.q.out index 9a06d5e..f5fe945 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_10.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_10.q.out @@ -17,10 +17,14 @@ POSTHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '2') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Input: default@tmp_smb_bucket_10 POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 -PREHOOK: query: load data local inpath '../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +PREHOOK: query: -- add dummy files to make sure that the number of files in each partition is same as number of buckets + +load data local inpath '../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') PREHOOK: type: LOAD PREHOOK: Output: default@tmp_smb_bucket_10@ds=1 -POSTHOOK: query: load data local inpath '../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +POSTHOOK: query: -- add dummy files to make sure that the number of files in each partition is same as number of buckets + +load data local inpath '../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') POSTHOOK: type: LOAD POSTHOOK: Output: default@tmp_smb_bucket_10@ds=1 PREHOOK: query: load data local inpath '../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') diff --git ql/src/test/results/clientpositive/smb_mapjoin_11.q.out ql/src/test/results/clientpositive/smb_mapjoin_11.q.out index 35ea6db..309769e 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_11.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_11.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: query: -- This test verifies that the output of a sort merge join on 2 partitions (one on each side of the join) is bucketed + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: query: -- This test verifies that the output of a sort merge join on 2 partitions (one on each side of the join) is bucketed + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS @@ -26,19 +32,23 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) INTO 16 BUCKETS +PREHOOK: query: -- Create a bucketed table +CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) INTO 16 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) INTO 16 BUCKETS +POSTHOOK: query: -- Create a bucketed table +CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) INTO 16 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table3 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Insert data into the bucketed table by joining the two bucketed and sorted tables, bucketing is not enforced +EXPLAIN EXTENDED INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Insert data into the bucketed table by joining the two bucketed and sorted tables, bucketing is not enforced +EXPLAIN EXTENDED INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -213,14 +223,16 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +PREHOOK: query: -- Join data from a sampled bucket to verify the data is bucketed +SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 PREHOOK: Input: default@test_table1@ds=1 PREHOOK: Input: default@test_table3 PREHOOK: Input: default@test_table3@ds=1 #### A masked pattern was here #### -POSTHOOK: query: SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +POSTHOOK: query: -- Join data from a sampled bucket to verify the data is bucketed +SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1 POSTHOOK: Input: default@test_table1@ds=1 diff --git ql/src/test/results/clientpositive/smb_mapjoin_12.q.out ql/src/test/results/clientpositive/smb_mapjoin_12.q.out index 0a1a5b2..95e594c 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_12.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_12.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: query: -- This test verifies that the output of a sort merge join on 1 big partition with multiple small partitions is bucketed and sorted + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: query: -- This test verifies that the output of a sort merge join on 1 big partition with multiple small partitions is bucketed and sorted + +-- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS @@ -38,9 +44,11 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: query: -- Create a bucketed table +CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: query: -- Create a bucketed table +CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table3 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -51,10 +59,12 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Insert data into the bucketed table by joining the two bucketed and sorted tables, bucketing is not enforced +EXPLAIN EXTENDED INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Insert data into the bucketed table by joining the two bucketed and sorted tables, bucketing is not enforced +EXPLAIN EXTENDED INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -243,14 +253,16 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=3).key EXPRESSION [(src)src.FieldSch POSTHOOK: Lineage: test_table2 PARTITION(ds=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +PREHOOK: query: -- Join data from a sampled bucket to verify the data is bucketed +SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 PREHOOK: Input: default@test_table1@ds=1 PREHOOK: Input: default@test_table3 PREHOOK: Input: default@test_table3@ds=1 #### A masked pattern was here #### -POSTHOOK: query: SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' +POSTHOOK: query: -- Join data from a sampled bucket to verify the data is bucketed +SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1 POSTHOOK: Input: default@test_table1@ds=1 @@ -268,11 +280,13 @@ POSTHOOK: Lineage: test_table2 PARTITION(ds=3).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] 879 -PREHOOK: query: explain extended +PREHOOK: query: -- Join data from the sampled buckets of 2 tables to verify the data is bucketed and sorted +explain extended INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: -- Join data from the sampled buckets of 2 tables to verify the data is bucketed and sorted +explain extended INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/smb_mapjoin_13.q.out index de9a432..1204f88 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_13.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS +PREHOOK: query: -- This test verifies that the sort merge join optimizer works when the tables are joined on columns with different names + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS +POSTHOOK: query: -- This test verifies that the sort merge join optimizer works when the tables are joined on columns with different names + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 PREHOOK: query: CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS @@ -48,10 +54,14 @@ POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Join data from 2 tables on their respective sorted columns (one each, with different names) and +-- verify sort merge join is used +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Join data from 2 tables on their respective sorted columns (one each, with different names) and +-- verify sort merge join is used +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -221,10 +231,14 @@ POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, ty 0 val_0 0 val_0 0 val_0 0 val_0 2 val_2 2 val_2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- Join data from 2 tables on their respective columns (two each, with the same names but sorted +-- with different priorities) and verify sort merge join is not used +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- Join data from 2 tables on their respective columns (two each, with the same names but sorted +-- with different priorities) and verify sort merge join is not used +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/smb_mapjoin_14.q.out ql/src/test/results/clientpositive/smb_mapjoin_14.q.out index eae2055..234579a 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_14.q.out @@ -34,12 +34,14 @@ POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:stri POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 @@ -129,7 +131,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 22 -PREHOOK: query: explain +PREHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be converted to a sort-merge join +-- Add a order by at the end to make the results deterministic. +explain select key, count(*) from ( select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -137,7 +141,9 @@ select key, count(*) from group by key order by key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be converted to a sort-merge join +-- Add a order by at the end to make the results deterministic. +explain select key, count(*) from ( select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -285,7 +291,8 @@ POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:stri 5 9 8 1 9 1 -PREHOOK: query: explain +PREHOOK: query: -- The mapjoin is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select key, count(*) from @@ -295,7 +302,8 @@ select count(*) from group by key ) subq2 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The mapjoin is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain select count(*) from ( select key, count(*) from @@ -449,14 +457,18 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 6 -PREHOOK: query: explain +PREHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -561,7 +573,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain select /*+mapjoin(subq2)*/ count(*) from ( select * from @@ -573,7 +587,9 @@ select /*+mapjoin(subq2)*/ count(*) from join tbl2 b on subq2.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain select /*+mapjoin(subq2)*/ count(*) from ( select * from @@ -684,7 +700,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- Both the big table and the small table are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain select /*+mapjoin(subq2)*/ count(*) from ( select * from @@ -703,7 +721,9 @@ select /*+mapjoin(subq2)*/ count(*) from ) subq4 on subq2.key = subq4.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- Both the big table and the small table are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain select /*+mapjoin(subq2)*/ count(*) from ( select * from @@ -842,14 +862,20 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -954,14 +980,18 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side +-- join should be performed +explain select /*+mapjoin(subq1)*/ count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 on subq1.key = subq2.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side +-- join should be performed +explain select /*+mapjoin(subq1)*/ count(*) from (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 join @@ -1090,12 +1120,16 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 22 -PREHOOK: query: explain +PREHOOK: query: -- The small table is a sub-query and the big table is not. +-- It should be converted to a sort-merge join. +explain select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The small table is a sub-query and the big table is not. +-- It should be converted to a sort-merge join. +explain select /*+mapjoin(subq1)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key @@ -1185,12 +1219,16 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- The big table is a sub-query and the small table is not. +-- It should be converted to a sort-merge join. +explain select /*+mapjoin(a)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The big table is a sub-query and the small table is not. +-- It should be converted to a sort-merge join. +explain select /*+mapjoin(a)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key @@ -1289,7 +1327,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 20 -PREHOOK: query: explain +PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain select /*+mapjoin(subq1, subq2)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1299,7 +1339,9 @@ select /*+mapjoin(subq1, subq2)*/ count(*) from (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 on (subq1.key = subq3.key) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain select /*+mapjoin(subq1, subq2)*/ count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -1416,7 +1458,9 @@ POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:stri POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 56 -PREHOOK: query: explain +PREHOOK: query: -- The mapjoin is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain select count(*) from ( select /*+mapjoin(subq2)*/ subq2.key as key, subq2.value as value1, b.value as value2 from ( @@ -1429,7 +1473,9 @@ select count(*) from ( join tbl2 b on subq2.key = b.key) a PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The mapjoin is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain select count(*) from ( select /*+mapjoin(subq2)*/ subq2.key as key, subq2.value as value1, b.value as value2 from ( diff --git ql/src/test/results/clientpositive/smb_mapjoin_15.q.out ql/src/test/results/clientpositive/smb_mapjoin_15.q.out index 69b0786..8990856 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_15.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_15.q.out @@ -1,6 +1,14 @@ -PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS +PREHOOK: query: -- This test verifies that the sort merge join optimizer works when the tables are sorted on columns which is a superset +-- of join columns + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS +POSTHOOK: query: -- This test verifies that the sort merge join optimizer works when the tables are sorted on columns which is a superset +-- of join columns + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS @@ -26,10 +34,12 @@ POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- it should be converted to a sort-merge join, since the first sort column (#join columns = 1) contains the join columns +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key ORDER BY a.key LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- it should be converted to a sort-merge join, since the first sort column (#join columns = 1) contains the join columns +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key ORDER BY a.key LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -215,9 +225,11 @@ POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: CREATE TABLE test_table1 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS +PREHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS +POSTHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -257,10 +269,12 @@ POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -442,10 +456,14 @@ POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, ty 0 0 val_0 0 0 val_0 0 0 val_0 0 0 val_0 2 2 val_2 2 2 val_2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns +-- even if the order is not the same +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns +-- even if the order is not the same +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -627,10 +645,14 @@ POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, ty 0 0 val_0 0 0 val_0 0 0 val_0 0 0 val_0 2 2 val_2 2 2 val_2 -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- it should not be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) do not contain all +-- the join columns +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- it should not be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) do not contain all +-- the join columns +EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/smb_mapjoin_16.q.out ql/src/test/results/clientpositive/smb_mapjoin_16.q.out index d149d29..a1e9a3f 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_16.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_16.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS @@ -26,10 +28,12 @@ POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/smb_mapjoin_17.q.out ql/src/test/results/clientpositive/smb_mapjoin_17.q.out index 02d42f0..df5d209 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_17.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_17.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS @@ -190,7 +192,8 @@ POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, ty POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job upto 7 tables +EXPLAIN SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key JOIN test_table3 c ON a.key = c.key @@ -199,7 +202,8 @@ JOIN test_table5 e ON a.key = e.key JOIN test_table6 f ON a.key = f.key JOIN test_table7 g ON a.key = g.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job upto 7 tables +EXPLAIN SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key JOIN test_table3 c ON a.key = c.key @@ -350,7 +354,9 @@ POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, ty POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 4378 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- It should be automatically converted to a sort-merge join followed by a groupby in +-- a single MR job +EXPLAIN SELECT count(*) FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key LEFT OUTER JOIN test_table3 c ON a.key = c.key @@ -359,7 +365,9 @@ LEFT OUTER JOIN test_table5 e ON a.key = e.key LEFT OUTER JOIN test_table6 f ON a.key = f.key LEFT OUTER JOIN test_table7 g ON a.key = g.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- It should be automatically converted to a sort-merge join followed by a groupby in +-- a single MR job +EXPLAIN SELECT count(*) FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key LEFT OUTER JOIN test_table3 c ON a.key = c.key @@ -679,7 +687,8 @@ POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, ty POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 13126 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- outer join with max 16 aliases +EXPLAIN SELECT a.* FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key @@ -702,7 +711,8 @@ LEFT OUTER JOIN test_table6 r ON a.key = r.key LEFT OUTER JOIN test_table7 s ON a.key = s.key LEFT OUTER JOIN test_table8 t ON a.key = t.key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- outer join with max 16 aliases +EXPLAIN SELECT a.* FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key diff --git ql/src/test/results/clientpositive/smb_mapjoin_18.q.out ql/src/test/results/clientpositive/smb_mapjoin_18.q.out index 1f9e494..9e7cf7c 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_18.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_18.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 @@ -24,11 +26,15 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table1@ds=1 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY @@ -247,11 +253,15 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] 253 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, one of the buckets should be empty +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, one of the buckets should be empty +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/smb_mapjoin_19.q.out ql/src/test/results/clientpositive/smb_mapjoin_19.q.out index db8723c..75cbadd 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_19.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_19.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 @@ -24,11 +26,15 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table1@ds=1 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/smb_mapjoin_20.q.out ql/src/test/results/clientpositive/smb_mapjoin_20.q.out index dd69a97..60a7df7 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_20.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_20.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING) +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING) +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 @@ -24,11 +26,15 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table1@ds=1 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- with different datatypes. This should be a map-reduce operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- with different datatypes. This should be a map-reduce operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY @@ -178,11 +184,15 @@ POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, although the bucketing positions dont match +EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, although the bucketing positions dont match +EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY @@ -355,11 +365,15 @@ POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldS POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] 253 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- However, since an expression is being selected, it should involve a reducer +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key+a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- However, since an expression is being selected, it should involve a reducer +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key+a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/smb_mapjoin_21.q.out ql/src/test/results/clientpositive/smb_mapjoin_21.q.out index 91c6756..f1065d5 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_21.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_21.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 @@ -24,11 +26,15 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table1@ds=1 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY @@ -100,11 +106,15 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table2 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort orders does not match +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort orders does not match +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY @@ -192,11 +202,15 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table2 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY @@ -286,11 +300,15 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table2 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY @@ -378,11 +396,15 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table2 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the number of buckets do not match +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the number of buckets do not match +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY @@ -470,11 +492,15 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table2 POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since sort columns do not match +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since sort columns do not match +EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/smb_mapjoin_22.q.out ql/src/test/results/clientpositive/smb_mapjoin_22.q.out index 5dc2c87..a15c919 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_22.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_22.q.out @@ -1,7 +1,9 @@ -PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_table1 @@ -24,10 +26,14 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table1 POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -205,10 +211,14 @@ POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, ty POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 POSTHOOK: type: QUERY POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out index 530e3fb..c8f26f2 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out @@ -38,10 +38,16 @@ POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:s POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- The columns of the tables above are sorted in same descending order. +-- So, sort merge join should be performed + +explain select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key where a.key < 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The columns of the tables above are sorted in same descending order. +-- So, sort merge join should be performed + +explain select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key where a.key < 10 POSTHOOK: type: QUERY POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out index 8217d12..24755d3 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out @@ -42,11 +42,19 @@ POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:s POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- The columns of the tables above are sorted in same order. +-- descending followed by descending +-- So, sort merge join should be performed + +explain select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The columns of the tables above are sorted in same order. +-- descending followed by descending +-- So, sort merge join should be performed + +explain select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out index e88a489..47abb6b 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out @@ -42,11 +42,19 @@ POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:s POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- The columns of the tables above are sorted in same orders. +-- descending followed by ascending +-- So, sort merge join should be performed + +explain select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The columns of the tables above are sorted in same orders. +-- descending followed by ascending +-- So, sort merge join should be performed + +explain select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out index 5c76c3c..b33baee 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out @@ -42,11 +42,17 @@ POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:s POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain +PREHOOK: query: -- The columns of the tables above are sorted in different orders. +-- So, sort merge join should not be performed + +explain select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- The columns of the tables above are sorted in different orders. +-- So, sort merge join should not be performed + +explain select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out index 6315471..c390b5e 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out @@ -48,12 +48,16 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(s POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The partition sorting metadata matches but the table metadata does not, sorted merge join should still be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' AND b.part = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The partition sorting metadata matches but the table metadata does not, sorted merge join should still be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' AND b.part = '1' diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out index bf787eb..7dabb55 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out @@ -48,12 +48,16 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(s POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' AND b.part = '1' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part = '1' AND b.part = '1' diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out index 1266634..c321351 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out @@ -108,12 +108,16 @@ POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(s POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL diff --git ql/src/test/results/clientpositive/stats1.q.out ql/src/test/results/clientpositive/stats1.q.out index d459481..3e6057c 100644 --- ql/src/test/results/clientpositive/stats1.q.out +++ ql/src/test/results/clientpositive/stats1.q.out @@ -221,10 +221,16 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE tmptable +PREHOOK: query: -- Load a file into a existing table +-- Some stats (numFiles, totalSize) should be updated correctly +-- Some other stats (numRows, rawDataSize) should be cleared +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE tmptable PREHOOK: type: LOAD PREHOOK: Output: default@tmptable -POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE tmptable +POSTHOOK: query: -- Load a file into a existing table +-- Some stats (numFiles, totalSize) should be updated correctly +-- Some other stats (numRows, rawDataSize) should be cleared +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE tmptable POSTHOOK: type: LOAD POSTHOOK: Output: default@tmptable POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/stats18.q.out ql/src/test/results/clientpositive/stats18.q.out index ab28109..4deff84 100644 --- ql/src/test/results/clientpositive/stats18.q.out +++ ql/src/test/results/clientpositive/stats18.q.out @@ -13,9 +13,15 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr='13') +PREHOOK: query: -- Load a file into a existing partition +-- Some stats (numFiles, totalSize) should be updated correctly +-- Some other stats (numRows, rawDataSize) should be cleared +desc formatted stats_part partition (ds='2010-04-08', hr='13') PREHOOK: type: DESCTABLE -POSTHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr='13') +POSTHOOK: query: -- Load a file into a existing partition +-- Some stats (numFiles, totalSize) should be updated correctly +-- Some other stats (numRows, rawDataSize) should be cleared +desc formatted stats_part partition (ds='2010-04-08', hr='13') POSTHOOK: type: DESCTABLE POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/stats19.q.out ql/src/test/results/clientpositive/stats19.q.out index 829777e..6039d66 100644 --- ql/src/test/results/clientpositive/stats19.q.out +++ ql/src/test/results/clientpositive/stats19.q.out @@ -1,27 +1,41 @@ -PREHOOK: query: create table stats_part like srcpart +PREHOOK: query: -- Note, its important that the partitions created below have a name greater than 16 characters in +-- length since KeyVerifyingStatsAggregator depends on checking that a keyPrefix is hashed by the +-- length of the keyPrefix, having a partition name greather than 16 characters guarantees no false +-- positives. + +create table stats_part like srcpart PREHOOK: type: CREATETABLE -POSTHOOK: query: create table stats_part like srcpart +POSTHOOK: query: -- Note, its important that the partitions created below have a name greater than 16 characters in +-- length since KeyVerifyingStatsAggregator depends on checking that a keyPrefix is hashed by the +-- length of the keyPrefix, having a partition name greather than 16 characters guarantees no false +-- positives. + +create table stats_part like srcpart POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@stats_part -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +PREHOOK: query: -- The stats key should be hashed since the max length is too small +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 Stats prefix is hashed: true Stats prefix is hashed: true -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +POSTHOOK: query: -- The stats key should be hashed since the max length is too small +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +PREHOOK: query: -- The stats key should not be hashed since the max length is large enough +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 Stats prefix is hashed: false Stats prefix is hashed: false -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +POSTHOOK: query: -- The stats key should not be hashed since the max length is large enough +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 @@ -29,13 +43,15 @@ POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)sr POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +PREHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 Stats prefix is hashed: false Stats prefix is hashed: false -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +POSTHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 @@ -45,11 +61,19 @@ POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)sr POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +PREHOOK: query: -- Run the tests again and verify the stats are correct, this should verify that the stats publisher +-- is hashing as well where appropriate + +-- The stats key should be hashed since the max length is too small +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +POSTHOOK: query: -- Run the tests again and verify the stats are correct, this should verify that the stats publisher +-- is hashing as well where appropriate + +-- The stats key should be hashed since the max length is too small +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 @@ -108,11 +132,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +PREHOOK: query: -- The stats key should not be hashed since the max length is large enough +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +POSTHOOK: query: -- The stats key should not be hashed since the max length is large enough +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 @@ -175,11 +201,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +PREHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src +POSTHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 @@ -246,13 +274,19 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +PREHOOK: query: -- Do the same for dynamic partitions + +-- The stats key should be hashed since the max length is too small +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 Stats prefix is hashed: true Stats prefix is hashed: true -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +POSTHOOK: query: -- Do the same for dynamic partitions + +-- The stats key should be hashed since the max length is too small +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 @@ -270,13 +304,15 @@ POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)sr POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +PREHOOK: query: -- The stats key should not be hashed since the max length is large enough +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 Stats prefix is hashed: false Stats prefix is hashed: false -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +POSTHOOK: query: -- The stats key should not be hashed since the max length is large enough +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 @@ -296,13 +332,15 @@ POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)sr POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +PREHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 Stats prefix is hashed: false Stats prefix is hashed: false -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +POSTHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 @@ -324,11 +362,19 @@ POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)sr POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +PREHOOK: query: -- Run the tests again and verify the stats are correct, this should verify that the stats publisher +-- is hashing as well where appropriate + +-- The stats key should be hashed since the max length is too small +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +POSTHOOK: query: -- Run the tests again and verify the stats are correct, this should verify that the stats publisher +-- is hashing as well where appropriate + +-- The stats key should be hashed since the max length is too small +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 @@ -411,11 +457,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +PREHOOK: query: -- The stats key should not be hashed since the max length is large enough +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +POSTHOOK: query: -- The stats key should not be hashed since the max length is large enough +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 @@ -502,11 +550,13 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +PREHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 -POSTHOOK: query: insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src +POSTHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off +insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 diff --git ql/src/test/results/clientpositive/stats20.q.out ql/src/test/results/clientpositive/stats20.q.out index fc08fe0..f9d53a4 100644 --- ql/src/test/results/clientpositive/stats20.q.out +++ ql/src/test/results/clientpositive/stats20.q.out @@ -15,9 +15,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_partitioned@ds=1 POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe formatted stats_partitioned +PREHOOK: query: -- rawDataSize is 5312 after config is turned on +describe formatted stats_partitioned PREHOOK: type: DESCTABLE -POSTHOOK: query: describe formatted stats_partitioned +POSTHOOK: query: -- rawDataSize is 5312 after config is turned on +describe formatted stats_partitioned POSTHOOK: type: DESCTABLE POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -70,9 +72,11 @@ POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).key SIMPLE [(src)src.FieldS POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe formatted stats_partitioned +PREHOOK: query: -- rawDataSize is 0 after config is turned off +describe formatted stats_partitioned PREHOOK: type: DESCTABLE -POSTHOOK: query: describe formatted stats_partitioned +POSTHOOK: query: -- rawDataSize is 0 after config is turned off +describe formatted stats_partitioned POSTHOOK: type: DESCTABLE POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_partitioned PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/stats_aggregator_error_1.q.out ql/src/test/results/clientpositive/stats_aggregator_error_1.q.out index a768e0f..c2c665f 100644 --- ql/src/test/results/clientpositive/stats_aggregator_error_1.q.out +++ ql/src/test/results/clientpositive/stats_aggregator_error_1.q.out @@ -1,6 +1,18 @@ -PREHOOK: query: create table tmptable(key string, value string) +PREHOOK: query: -- In this test, there is a dummy stats aggregator which throws an error when various +-- methods are called (as indicated by the parameter hive.test.dummystats.agregator) +-- Since stats need not be reliable (by setting hive.stats.reliable to false), the +-- insert statements succeed. The insert statement succeeds even if the stats aggregator +-- is set to null, since stats need not be reliable. + +create table tmptable(key string, value string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tmptable(key string, value string) +POSTHOOK: query: -- In this test, there is a dummy stats aggregator which throws an error when various +-- methods are called (as indicated by the parameter hive.test.dummystats.agregator) +-- Since stats need not be reliable (by setting hive.stats.reliable to false), the +-- insert statements succeed. The insert statement succeeds even if the stats aggregator +-- is set to null, since stats need not be reliable. + +create table tmptable(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: INSERT OVERWRITE TABLE tmptable select * from src diff --git ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out index 03afb7f..6af6a82 100644 --- ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out +++ ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out @@ -1,6 +1,14 @@ -PREHOOK: query: create table tmptable(key string) partitioned by (part string) +PREHOOK: query: -- This test verifies writing a query using dynamic partitions +-- which results in no partitions actually being created with +-- hive.stats.reliable set to true + +create table tmptable(key string) partitioned by (part string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tmptable(key string) partitioned by (part string) +POSTHOOK: query: -- This test verifies writing a query using dynamic partitions +-- which results in no partitions actually being created with +-- hive.stats.reliable set to true + +create table tmptable(key string) partitioned by (part string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: explain insert overwrite table tmptable partition (part) select key, value from src where key = 'no_such_value' diff --git ql/src/test/results/clientpositive/stats_empty_partition.q.out ql/src/test/results/clientpositive/stats_empty_partition.q.out index ef1ea23..2815fff 100644 --- ql/src/test/results/clientpositive/stats_empty_partition.q.out +++ ql/src/test/results/clientpositive/stats_empty_partition.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: create table tmptable(key string, value string) partitioned by (part string) +PREHOOK: query: -- This test verifies that writing an empty partition succeeds when +-- hive.stats.reliable is set to true. + +create table tmptable(key string, value string) partitioned by (part string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tmptable(key string, value string) partitioned by (part string) +POSTHOOK: query: -- This test verifies that writing an empty partition succeeds when +-- hive.stats.reliable is set to true. + +create table tmptable(key string, value string) partitioned by (part string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: insert overwrite table tmptable partition (part = '1') select * from src where key = 'no_such_value' diff --git ql/src/test/results/clientpositive/stats_noscan_1.q.out ql/src/test/results/clientpositive/stats_noscan_1.q.out index 2e87ce8..072f391 100644 --- ql/src/test/results/clientpositive/stats_noscan_1.q.out +++ ql/src/test/results/clientpositive/stats_noscan_1.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: create table analyze_srcpart like srcpart +PREHOOK: query: -- test analyze table ... compute statistics noscan + +-- 1. test full spec +create table analyze_srcpart like srcpart PREHOOK: type: CREATETABLE -POSTHOOK: query: create table analyze_srcpart like srcpart +POSTHOOK: query: -- test analyze table ... compute statistics noscan + +-- 1. test full spec +create table analyze_srcpart like srcpart POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@analyze_srcpart PREHOOK: query: insert overwrite table analyze_srcpart partition (ds, hr) select * from srcpart where ds is not null @@ -95,9 +101,11 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(s POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe formatted analyze_srcpart PARTITION(ds='2008-04-08',hr=11) +PREHOOK: query: -- confirm result +describe formatted analyze_srcpart PARTITION(ds='2008-04-08',hr=11) PREHOOK: type: DESCTABLE -POSTHOOK: query: describe formatted analyze_srcpart PARTITION(ds='2008-04-08',hr=11) +POSTHOOK: query: -- confirm result +describe formatted analyze_srcpart PARTITION(ds='2008-04-08',hr=11) POSTHOOK: type: DESCTABLE POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] @@ -339,9 +347,11 @@ POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(s POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table analyze_srcpart_partial like srcpart +PREHOOK: query: -- 2. test partial spec +create table analyze_srcpart_partial like srcpart PREHOOK: type: CREATETABLE -POSTHOOK: query: create table analyze_srcpart_partial like srcpart +POSTHOOK: query: -- 2. test partial spec +create table analyze_srcpart_partial like srcpart POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@analyze_srcpart_partial POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] @@ -452,9 +462,11 @@ POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).key SI POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart_partial PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-08',hr=11) +PREHOOK: query: -- confirm result +describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-08',hr=11) PREHOOK: type: DESCTABLE -POSTHOOK: query: describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-08',hr=11) +POSTHOOK: query: -- confirm result +describe formatted analyze_srcpart_partial PARTITION(ds='2008-04-08',hr=11) POSTHOOK: type: DESCTABLE POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/stats_noscan_2.q.out ql/src/test/results/clientpositive/stats_noscan_2.q.out index 154c1ec..2bb6eed 100644 --- ql/src/test/results/clientpositive/stats_noscan_2.q.out +++ ql/src/test/results/clientpositive/stats_noscan_2.q.out @@ -1,5 +1,9 @@ +PREHOOK: query: -- test analyze table compute statistiscs [noscan] on external table +-- 1 test table #### A masked pattern was here #### PREHOOK: type: CREATETABLE +POSTHOOK: query: -- test analyze table compute statistiscs [noscan] on external table +-- 1 test table #### A masked pattern was here #### POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@anaylyze_external @@ -109,9 +113,13 @@ POSTHOOK: query: drop table anaylyze_external POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@anaylyze_external POSTHOOK: Output: default@anaylyze_external -PREHOOK: query: create table texternal(key string, val string) partitioned by (insertdate string) +PREHOOK: query: -- 2 test partition +-- prepare data +create table texternal(key string, val string) partitioned by (insertdate string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table texternal(key string, val string) partitioned by (insertdate string) +POSTHOOK: query: -- 2 test partition +-- prepare data +create table texternal(key string, val string) partitioned by (insertdate string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@texternal #### A masked pattern was here #### @@ -144,8 +152,10 @@ POSTHOOK: Input: default@texternal@insertdate=2008-01-01 POSTHOOK: Lineage: texternal PARTITION(insertdate=2008-01-01).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: texternal PARTITION(insertdate=2008-01-01).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 500 +PREHOOK: query: -- create external table #### A masked pattern was here #### PREHOOK: type: CREATETABLE +POSTHOOK: query: -- create external table #### A masked pattern was here #### POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@anaylyze_external @@ -173,13 +183,15 @@ POSTHOOK: Input: default@anaylyze_external@insertdate=2008-01-01 POSTHOOK: Lineage: texternal PARTITION(insertdate=2008-01-01).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: texternal PARTITION(insertdate=2008-01-01).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 500 -PREHOOK: query: analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics +PREHOOK: query: -- analyze +analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@anaylyze_external PREHOOK: Input: default@anaylyze_external@insertdate=2008-01-01 PREHOOK: Output: default@anaylyze_external PREHOOK: Output: default@anaylyze_external@insertdate=2008-01-01 -POSTHOOK: query: analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics +POSTHOOK: query: -- analyze +analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics POSTHOOK: type: QUERY POSTHOOK: Input: default@anaylyze_external POSTHOOK: Input: default@anaylyze_external@insertdate=2008-01-01 diff --git ql/src/test/results/clientpositive/stats_partscan_1.q.out ql/src/test/results/clientpositive/stats_partscan_1.q.out index 462e0eb..31f8429 100644 --- ql/src/test/results/clientpositive/stats_partscan_1.q.out +++ ql/src/test/results/clientpositive/stats_partscan_1.q.out @@ -1,8 +1,14 @@ -PREHOOK: query: CREATE table analyze_srcpart_partial_scan (key STRING, value STRING) +PREHOOK: query: -- test analyze table ... compute statistics partialscan + +-- 1. prepare data +CREATE table analyze_srcpart_partial_scan (key STRING, value STRING) partitioned by (ds string, hr string) stored as rcfile PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE table analyze_srcpart_partial_scan (key STRING, value STRING) +POSTHOOK: query: -- test analyze table ... compute statistics partialscan + +-- 1. prepare data +CREATE table analyze_srcpart_partial_scan (key STRING, value STRING) partitioned by (ds string, hr string) stored as rcfile POSTHOOK: type: CREATETABLE @@ -77,10 +83,12 @@ Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 -PREHOOK: query: explain +PREHOOK: query: -- 2. partialscan +explain analyze table analyze_srcpart_partial_scan PARTITION(ds='2008-04-08',hr=11) compute statistics partialscan PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- 2. partialscan +explain analyze table analyze_srcpart_partial_scan PARTITION(ds='2008-04-08',hr=11) compute statistics partialscan POSTHOOK: type: QUERY POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] @@ -126,9 +134,11 @@ POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-09,hr=11).k POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe formatted analyze_srcpart_partial_scan PARTITION(ds='2008-04-08',hr=11) +PREHOOK: query: -- 3. confirm result +describe formatted analyze_srcpart_partial_scan PARTITION(ds='2008-04-08',hr=11) PREHOOK: type: DESCTABLE -POSTHOOK: query: describe formatted analyze_srcpart_partial_scan PARTITION(ds='2008-04-08',hr=11) +POSTHOOK: query: -- 3. confirm result +describe formatted analyze_srcpart_partial_scan PARTITION(ds='2008-04-08',hr=11) POSTHOOK: type: DESCTABLE POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: analyze_srcpart_partial_scan PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/stats_publisher_error_1.q.out ql/src/test/results/clientpositive/stats_publisher_error_1.q.out index a768e0f..a122b83 100644 --- ql/src/test/results/clientpositive/stats_publisher_error_1.q.out +++ ql/src/test/results/clientpositive/stats_publisher_error_1.q.out @@ -1,6 +1,18 @@ -PREHOOK: query: create table tmptable(key string, value string) +PREHOOK: query: -- In this test, there is a dummy stats publisher which throws an error when various +-- methods are called (as indicated by the parameter hive.test.dummystats.publisher) +-- Since stats need not be reliable (by setting hive.stats.reliable to false), the +-- insert statements succeed. The insert statement succeeds even if the stats publisher +-- is set to null, since stats need not be reliable. + +create table tmptable(key string, value string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tmptable(key string, value string) +POSTHOOK: query: -- In this test, there is a dummy stats publisher which throws an error when various +-- methods are called (as indicated by the parameter hive.test.dummystats.publisher) +-- Since stats need not be reliable (by setting hive.stats.reliable to false), the +-- insert statements succeed. The insert statement succeeds even if the stats publisher +-- is set to null, since stats need not be reliable. + +create table tmptable(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: INSERT OVERWRITE TABLE tmptable select * from src diff --git ql/src/test/results/clientpositive/table_access_keys_stats.q.out ql/src/test/results/clientpositive/table_access_keys_stats.q.out index 978c16a..15ff151 100644 --- ql/src/test/results/clientpositive/table_access_keys_stats.q.out +++ ql/src/test/results/clientpositive/table_access_keys_stats.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: query: -- This test is used for testing the TableAccessAnalyzer + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PREHOOK: type: LOAD @@ -7,7 +9,8 @@ PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -PREHOOK: query: SELECT key, count(1) FROM T1 GROUP BY key +PREHOOK: query: -- Simple group-by queries +SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### @@ -34,7 +37,8 @@ Keys:key,val 7 17 1 8 18 1 8 28 1 -PREHOOK: query: SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key +PREHOOK: query: -- With subqueries and column aliases +SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### @@ -60,7 +64,8 @@ Keys:key 3 1 7 1 8 2 -PREHOOK: query: SELECT 1, key, count(1) FROM T1 GROUP BY 1, key +PREHOOK: query: -- With constants +SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### @@ -101,7 +106,8 @@ Keys:key,val 7 1 17 2 1 8 1 18 2 1 8 1 28 2 1 -PREHOOK: query: SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 +PREHOOK: query: -- no mapping with functions +SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 #### A masked pattern was here #### @@ -125,7 +131,8 @@ Keys:key 6.0 1 14.0 1 16.0 2 -PREHOOK: query: SELECT * FROM ( +PREHOOK: query: -- group by followed by union +SELECT * FROM ( SELECT key, count(1) as c FROM T1 GROUP BY key UNION ALL SELECT key, count(1) as c FROM T1 GROUP BY key @@ -151,7 +158,8 @@ Keys:key 3 1 7 1 8 2 -PREHOOK: query: SELECT * FROM +PREHOOK: query: -- group by followed by a join +SELECT * FROM (SELECT key, count(1) as c FROM T1 GROUP BY key) subq1 JOIN (SELECT key, count(1) as c FROM T1 GROUP BY key) subq2 @@ -195,7 +203,8 @@ Keys:key 7 1 7 17 1 8 2 8 18 1 8 2 8 28 1 -PREHOOK: query: SELECT key, constant, val, count(1) from +PREHOOK: query: -- constants from sub-queries should work fine +SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T1) subq1 group by key, constant, val PREHOOK: type: QUERY @@ -211,7 +220,8 @@ Keys:key,val 7 1 17 1 8 1 18 1 8 1 28 1 -PREHOOK: query: SELECT key, constant3, val, count(1) FROM +PREHOOK: query: -- multiple levels of constants from sub-queries should work fine +SELECT key, constant3, val, count(1) FROM ( SELECT key, constant AS constant2, val, 2 AS constant3 FROM @@ -234,7 +244,8 @@ Keys:key,val 7 2 17 1 8 2 18 1 8 2 28 1 -PREHOOK: query: FROM T1 +PREHOOK: query: -- work with insert overwrite +FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, count(1) GROUP BY key, 1 INSERT OVERWRITE TABLE T3 SELECT key, sum(val) GROUP BY key PREHOOK: type: QUERY @@ -249,7 +260,8 @@ Operator:GBY_8 Table:default@t1 Keys:key -PREHOOK: query: SELECT * +PREHOOK: query: -- simple joins +SELECT * FROM T1 JOIN T2 ON T1.key = t2.key ORDER BY T1.key ASC, T1.val ASC @@ -282,7 +294,8 @@ Keys:key,val Table:default@t1 Keys:key,val -PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * +PREHOOK: query: -- map join +SELECT /*+ MAPJOIN(a) */ * FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY @@ -301,7 +314,8 @@ Keys:key 7 17 7 1 8 18 8 2 8 28 8 2 -PREHOOK: query: SELECT * +PREHOOK: query: -- with constant in join condition +SELECT * FROM T1 JOIN T2 ON T1.key = T2.key AND T1.val = 3 and T2.val = 3 PREHOOK: type: QUERY @@ -314,7 +328,8 @@ Keys:key Table:default@t1 Keys:key -PREHOOK: query: SELECT * +PREHOOK: query: -- subqueries +SELECT * FROM ( SELECT val FROM T1 WHERE key = 5 @@ -352,7 +367,8 @@ Keys:val Table:default@t1 Keys:val -PREHOOK: query: SELECT * +PREHOOK: query: -- with column aliases in subqueries +SELECT * FROM ( SELECT val as v FROM T1 WHERE key = 5 @@ -372,7 +388,8 @@ Keys:val Table:default@t1 Keys:val -PREHOOK: query: SELECT * +PREHOOK: query: -- with constants in subqueries +SELECT * FROM ( SELECT key, val FROM T1 @@ -392,7 +409,8 @@ Keys:key Table:default@t1 Keys:val,key -PREHOOK: query: SELECT * +PREHOOK: query: -- multiple levels of constants in subqueries +SELECT * FROM ( SELECT key, val from @@ -415,7 +433,8 @@ Keys:val,key Table:default@t1 Keys:key -PREHOOK: query: SELECT * +PREHOOK: query: -- no mapping on functions +SELECT * FROM ( SELECT key, val from T1 @@ -429,7 +448,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: SELECT subq1.val, COUNT(*) +PREHOOK: query: -- join followed by group by +SELECT subq1.val, COUNT(*) FROM ( SELECT key, val FROM T1 @@ -450,7 +470,8 @@ Keys:key Table:default@t1 Keys:val,key -PREHOOK: query: SELECT * +PREHOOK: query: -- join followed by union +SELECT * FROM ( SELECT subq1.val, COUNT(*) @@ -489,7 +510,8 @@ Keys:val,key 13.0 1 17.0 1 46.0 1 -PREHOOK: query: SELECT * +PREHOOK: query: -- join followed by join +SELECT * FROM ( SELECT subq1.val as val, COUNT(*) diff --git ql/src/test/results/clientpositive/timestamp_udf.q.out ql/src/test/results/clientpositive/timestamp_udf.q.out index abcb2fb..c04de7a 100644 --- ql/src/test/results/clientpositive/timestamp_udf.q.out +++ ql/src/test/results/clientpositive/timestamp_udf.q.out @@ -36,13 +36,15 @@ POSTHOOK: Output: default@timestamp_udf POSTHOOK: Output: default@timestamp_udf_string POSTHOOK: Lineage: timestamp_udf.t EXPRESSION [] POSTHOOK: Lineage: timestamp_udf_string.t SIMPLE [] -PREHOOK: query: select unix_timestamp(t), year(t), month(t), day(t), dayofmonth(t), +PREHOOK: query: -- Test UDFs with Timestamp input +select unix_timestamp(t), year(t), month(t), day(t), dayofmonth(t), weekofyear(t), hour(t), minute(t), second(t), to_date(t) from timestamp_udf PREHOOK: type: QUERY PREHOOK: Input: default@timestamp_udf #### A masked pattern was here #### -POSTHOOK: query: select unix_timestamp(t), year(t), month(t), day(t), dayofmonth(t), +POSTHOOK: query: -- Test UDFs with Timestamp input +select unix_timestamp(t), year(t), month(t), day(t), dayofmonth(t), weekofyear(t), hour(t), minute(t), second(t), to_date(t) from timestamp_udf POSTHOOK: type: QUERY @@ -155,13 +157,15 @@ POSTHOOK: Input: default@timestamp_udf POSTHOOK: Lineage: timestamp_udf.t EXPRESSION [] POSTHOOK: Lineage: timestamp_udf_string.t SIMPLE [] 2011-05-06 07:08:09.1234567 2011-05-06 12:08:09.1234567 2011-05-06 07:08:09.1234567 2011-05-06 12:08:09.1234567 -PREHOOK: query: select unix_timestamp(t), year(t), month(t), day(t), dayofmonth(t), +PREHOOK: query: -- Test UDFs with string input +select unix_timestamp(t), year(t), month(t), day(t), dayofmonth(t), weekofyear(t), hour(t), minute(t), second(t), to_date(t) from timestamp_udf_string PREHOOK: type: QUERY PREHOOK: Input: default@timestamp_udf_string #### A masked pattern was here #### -POSTHOOK: query: select unix_timestamp(t), year(t), month(t), day(t), dayofmonth(t), +POSTHOOK: query: -- Test UDFs with string input +select unix_timestamp(t), year(t), month(t), day(t), dayofmonth(t), weekofyear(t), hour(t), minute(t), second(t), to_date(t) from timestamp_udf_string POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/transform2.q.out ql/src/test/results/clientpositive/transform2.q.out index aeeaebf..28d098d 100644 --- ql/src/test/results/clientpositive/transform2.q.out +++ ql/src/test/results/clientpositive/transform2.q.out @@ -1,8 +1,10 @@ -PREHOOK: query: SELECT TRANSFORM(substr(key, 1, 2)) USING 'cat' FROM src LIMIT 1 +PREHOOK: query: -- Transform with a function that has many parameters +SELECT TRANSFORM(substr(key, 1, 2)) USING 'cat' FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT TRANSFORM(substr(key, 1, 2)) USING 'cat' FROM src LIMIT 1 +POSTHOOK: query: -- Transform with a function that has many parameters +SELECT TRANSFORM(substr(key, 1, 2)) USING 'cat' FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/truncate_table.q.out ql/src/test/results/clientpositive/truncate_table.q.out index a894ce7..8ef2dd5 100644 --- ql/src/test/results/clientpositive/truncate_table.q.out +++ ql/src/test/results/clientpositive/truncate_table.q.out @@ -66,9 +66,11 @@ PREHOOK: Output: default@srcpart_truncate@ds=2008-04-09/hr=12 POSTHOOK: query: load data local inpath '../data/files/kv1.txt' into table srcpart_truncate partition (ds='2008-04-09', hr='12') POSTHOOK: type: LOAD POSTHOOK: Output: default@srcpart_truncate@ds=2008-04-09/hr=12 -PREHOOK: query: explain TRUNCATE TABLE src_truncate +PREHOOK: query: -- truncate non-partitioned table +explain TRUNCATE TABLE src_truncate PREHOOK: type: TRUNCATETABLE -POSTHOOK: query: explain TRUNCATE TABLE src_truncate +POSTHOOK: query: -- truncate non-partitioned table +explain TRUNCATE TABLE src_truncate POSTHOOK: type: TRUNCATETABLE ABSTRACT SYNTAX TREE: (TOK_TRUNCATETABLE (TOK_TABLE_PARTITION src_truncate)) @@ -97,9 +99,11 @@ POSTHOOK: query: select * from src_truncate POSTHOOK: type: QUERY POSTHOOK: Input: default@src_truncate #### A masked pattern was here #### -PREHOOK: query: explain TRUNCATE TABLE srcpart_truncate partition (ds='2008-04-08', hr='11') +PREHOOK: query: -- truncate a partition +explain TRUNCATE TABLE srcpart_truncate partition (ds='2008-04-08', hr='11') PREHOOK: type: TRUNCATETABLE -POSTHOOK: query: explain TRUNCATE TABLE srcpart_truncate partition (ds='2008-04-08', hr='11') +POSTHOOK: query: -- truncate a partition +explain TRUNCATE TABLE srcpart_truncate partition (ds='2008-04-08', hr='11') POSTHOOK: type: TRUNCATETABLE ABSTRACT SYNTAX TREE: (TOK_TRUNCATETABLE (TOK_TABLE_PARTITION srcpart_truncate (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '11')))) @@ -133,9 +137,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart_truncate POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=11 #### A masked pattern was here #### -PREHOOK: query: explain TRUNCATE TABLE srcpart_truncate partition (ds, hr='12') +PREHOOK: query: -- truncate partitions with partial spec +explain TRUNCATE TABLE srcpart_truncate partition (ds, hr='12') PREHOOK: type: TRUNCATETABLE -POSTHOOK: query: explain TRUNCATE TABLE srcpart_truncate partition (ds, hr='12') +POSTHOOK: query: -- truncate partitions with partial spec +explain TRUNCATE TABLE srcpart_truncate partition (ds, hr='12') POSTHOOK: type: TRUNCATETABLE ABSTRACT SYNTAX TREE: (TOK_TRUNCATETABLE (TOK_TABLE_PARTITION srcpart_truncate (TOK_PARTSPEC (TOK_PARTVAL ds) (TOK_PARTVAL hr '12')))) @@ -173,9 +179,11 @@ POSTHOOK: Input: default@srcpart_truncate POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart_truncate@ds=2008-04-09/hr=12 #### A masked pattern was here #### -PREHOOK: query: explain TRUNCATE TABLE srcpart_truncate +PREHOOK: query: -- truncate partitioned table +explain TRUNCATE TABLE srcpart_truncate PREHOOK: type: TRUNCATETABLE -POSTHOOK: query: explain TRUNCATE TABLE srcpart_truncate +POSTHOOK: query: -- truncate partitioned table +explain TRUNCATE TABLE srcpart_truncate POSTHOOK: type: TRUNCATETABLE ABSTRACT SYNTAX TREE: (TOK_TRUNCATETABLE (TOK_TABLE_PARTITION srcpart_truncate)) diff --git ql/src/test/results/clientpositive/type_widening.q.out ql/src/test/results/clientpositive/type_widening.q.out index 913e6f5..2c85a1d 100644 --- ql/src/test/results/clientpositive/type_widening.q.out +++ ql/src/test/results/clientpositive/type_widening.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: EXPLAIN SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1 +PREHOOK: query: -- Check for int, bigint automatic type widening conversions in UDFs, UNIONS +EXPLAIN SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1 +POSTHOOK: query: -- Check for int, bigint automatic type widening conversions in UDFs, UNIONS +EXPLAIN SELECT COALESCE(0, 9223372036854775807) FROM src LIMIT 1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COALESCE 0 9223372036854775807))) (TOK_LIMIT 1))) diff --git ql/src/test/results/clientpositive/udaf_percentile_approx.q.out ql/src/test/results/clientpositive/udaf_percentile_approx.q.out index 3bd6fd6..d871179 100644 --- ql/src/test/results/clientpositive/udaf_percentile_approx.q.out +++ ql/src/test/results/clientpositive/udaf_percentile_approx.q.out @@ -1,8 +1,10 @@ -PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src +PREHOOK: query: -- disable map-side aggregation +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src +POSTHOOK: query: -- disable map-side aggregation +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -106,11 +108,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### [26.0,255.5,479.0,491.0] -PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src +PREHOOK: query: -- enable map-side aggregation +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src +POSTHOOK: query: -- enable map-side aggregation +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/udf_array_contains.q.out ql/src/test/results/clientpositive/udf_array_contains.q.out index eced78b..5ff0b17 100644 --- ql/src/test/results/clientpositive/udf_array_contains.q.out +++ ql/src/test/results/clientpositive/udf_array_contains.q.out @@ -11,21 +11,25 @@ array_contains(array, value) - Returns TRUE if the array contains value. Example: > SELECT array_contains(array(1, 2, 3), 2) FROM src LIMIT 1; true -PREHOOK: query: SELECT array_contains(array(1, 2, 3), 1) FROM src LIMIT 1 +PREHOOK: query: -- evalutes function for array of primitives +SELECT array_contains(array(1, 2, 3), 1) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT array_contains(array(1, 2, 3), 1) FROM src LIMIT 1 +POSTHOOK: query: -- evalutes function for array of primitives +SELECT array_contains(array(1, 2, 3), 1) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### true -PREHOOK: query: SELECT array_contains(array(array(1,2), array(2,3), array(3,4)), array(1,2)) +PREHOOK: query: -- evaluates function for nested arrays +SELECT array_contains(array(array(1,2), array(2,3), array(3,4)), array(1,2)) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT array_contains(array(array(1,2), array(2,3), array(3,4)), array(1,2)) +POSTHOOK: query: -- evaluates function for nested arrays +SELECT array_contains(array(array(1,2), array(2,3), array(3,4)), array(1,2)) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/udf_bin.q.out ql/src/test/results/clientpositive/udf_bin.q.out index 966ee66..0e4a8d0 100644 --- ql/src/test/results/clientpositive/udf_bin.q.out +++ ql/src/test/results/clientpositive/udf_bin.q.out @@ -29,11 +29,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 1 0 101111101011100001101100101 -PREHOOK: query: SELECT bin(-5) FROM src LIMIT 1 +PREHOOK: query: -- Negative numbers should be treated as two's complement (64 bit). +SELECT bin(-5) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT bin(-5) FROM src LIMIT 1 +POSTHOOK: query: -- Negative numbers should be treated as two's complement (64 bit). +SELECT bin(-5) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/udf_case.q.out ql/src/test/results/clientpositive/udf_case.q.out index 8348258..9c7132c 100644 --- ql/src/test/results/clientpositive/udf_case.q.out +++ ql/src/test/results/clientpositive/udf_case.q.out @@ -165,13 +165,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 2 5 15 NULL 20 24 -PREHOOK: query: SELECT CASE 1 WHEN 1 THEN 'yo' +PREHOOK: query: -- verify that short-circuiting is working correctly for CASE +-- we should never get to the ELSE branch, which would raise an exception +SELECT CASE 1 WHEN 1 THEN 'yo' ELSE reflect('java.lang.String', 'bogus', 1) END FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT CASE 1 WHEN 1 THEN 'yo' +POSTHOOK: query: -- verify that short-circuiting is working correctly for CASE +-- we should never get to the ELSE branch, which would raise an exception +SELECT CASE 1 WHEN 1 THEN 'yo' ELSE reflect('java.lang.String', 'bogus', 1) END FROM src LIMIT 1 POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udf_concat_ws.q.out ql/src/test/results/clientpositive/udf_concat_ws.q.out index dfaff63..cfcad96 100644 --- ql/src/test/results/clientpositive/udf_concat_ws.q.out +++ ql/src/test/results/clientpositive/udf_concat_ws.q.out @@ -97,7 +97,8 @@ POSTHOOK: Lineage: dest1.c1 SIMPLE [] POSTHOOK: Lineage: dest1.c2 SIMPLE [] POSTHOOK: Lineage: dest1.c3 SIMPLE [] xyzabc8675309 abc,xyz,8675309 NULL abc**8675309 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- evalutes function for array of strings +EXPLAIN SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'), concat_ws('-', 'www', array('face', 'book', 'com'), '1234'), concat_ws('F', 'www', array('face', 'book', 'com', '1234')), @@ -106,7 +107,8 @@ SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'), concat_ws('[]', array('www'), 'face', array('book', 'com', '1234')), concat_ws('AAA', array('www'), array('face', 'book', 'com'), '1234') FROM dest1 LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- evalutes function for array of strings +EXPLAIN SELECT concat_ws('.', array('www', 'face', 'book', 'com'), '1234'), concat_ws('-', 'www', array('face', 'book', 'com'), '1234'), concat_ws('F', 'www', array('face', 'book', 'com', '1234')), diff --git ql/src/test/results/clientpositive/udf_conv.q.out ql/src/test/results/clientpositive/udf_conv.q.out index fae9c2b..010e192 100644 --- ql/src/test/results/clientpositive/udf_conv.q.out +++ ql/src/test/results/clientpositive/udf_conv.q.out @@ -14,7 +14,10 @@ Example: '4' > SELECT conv(-10, 16, -10) FROM src LIMIT 1; '16' -PREHOOK: query: SELECT +PREHOOK: query: -- conv must work on both strings and integers up to 64-bit precision + +-- Some simple conversions to test different bases +SELECT conv('4521', 10, 36), conv('22', 10, 10), conv('110011', 2, 16), @@ -23,7 +26,10 @@ FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- conv must work on both strings and integers up to 64-bit precision + +-- Some simple conversions to test different bases +SELECT conv('4521', 10, 36), conv('22', 10, 10), conv('110011', 2, 16), @@ -33,7 +39,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 3HL 22 33 116ED2B2FB4 -PREHOOK: query: SELECT +PREHOOK: query: -- Test negative numbers. If to_base is positive, the number should be handled +-- as a two's complement (64-bit) +SELECT conv('-641', 10, -10), conv('1011', 2, -16), conv('-1', 10, 16), @@ -42,7 +50,9 @@ FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- Test negative numbers. If to_base is positive, the number should be handled +-- as a two's complement (64-bit) +SELECT conv('-641', 10, -10), conv('1011', 2, -16), conv('-1', 10, 16), @@ -52,7 +62,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -641 B FFFFFFFFFFFFFFFF FFFFFFFFFFFFFFF1 -PREHOOK: query: SELECT +PREHOOK: query: -- Test overflow. If a number is two large, the result should be -1 (if signed) +-- or MAX_LONG (if unsigned) +SELECT conv('9223372036854775807', 36, 16), conv('9223372036854775807', 36, -16), conv('-9223372036854775807', 36, 16), @@ -61,7 +73,9 @@ FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- Test overflow. If a number is two large, the result should be -1 (if signed) +-- or MAX_LONG (if unsigned) +SELECT conv('9223372036854775807', 36, 16), conv('9223372036854775807', 36, -16), conv('-9223372036854775807', 36, 16), @@ -71,7 +85,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### FFFFFFFFFFFFFFFF -1 FFFFFFFFFFFFFFFF -1 -PREHOOK: query: SELECT +PREHOOK: query: -- Test with invalid input. If one of the bases is invalid, the result should +-- be NULL. If there is an invalid digit in the number, the longest valid +-- prefix should be converted. +SELECT conv('123455', 3, 10), conv('131', 1, 5), conv('515', 5, 100), @@ -80,7 +97,10 @@ FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- Test with invalid input. If one of the bases is invalid, the result should +-- be NULL. If there is an invalid digit in the number, the longest valid +-- prefix should be converted. +SELECT conv('123455', 3, 10), conv('131', 1, 5), conv('515', 5, 100), @@ -90,7 +110,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 5 NULL NULL NULL -PREHOOK: query: SELECT +PREHOOK: query: -- Perform the same tests with number arguments. + +SELECT conv(4521, 10, 36), conv(22, 10, 10), conv(110011, 2, 16) @@ -98,7 +120,9 @@ FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- Perform the same tests with number arguments. + +SELECT conv(4521, 10, 36), conv(22, 10, 10), conv(110011, 2, 16) @@ -164,13 +188,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 5 NULL NULL NULL -PREHOOK: query: SELECT conv(key, 10, 16), +PREHOOK: query: -- Make sure that state is properly reset. + +SELECT conv(key, 10, 16), conv(key, 16, 10) FROM src LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT conv(key, 10, 16), +POSTHOOK: query: -- Make sure that state is properly reset. + +SELECT conv(key, 10, 16), conv(key, 16, 10) FROM src LIMIT 3 POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udf_format_number.q.out ql/src/test/results/clientpositive/udf_format_number.q.out index 14efac2..0bfc1f8 100644 --- ql/src/test/results/clientpositive/udf_format_number.q.out +++ ql/src/test/results/clientpositive/udf_format_number.q.out @@ -2,9 +2,13 @@ PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE POSTHOOK: query: use default POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: DESCRIBE FUNCTION format_number +PREHOOK: query: -- Test format_number() UDF + +DESCRIBE FUNCTION format_number PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION format_number +POSTHOOK: query: -- Test format_number() UDF + +DESCRIBE FUNCTION format_number POSTHOOK: type: DESCFUNCTION format_number(X, D) - Formats the number X to a format like '#,###,###.##', rounded to D decimal places, and returns the result as a string. If D is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT PREHOOK: query: DESCRIBE FUNCTION EXTENDED format_number @@ -76,7 +80,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 12,332.1235 12,332.1000 12,332 -PREHOOK: query: SELECT format_number(0.123456789, 12), +PREHOOK: query: -- positive numbers +SELECT format_number(0.123456789, 12), format_number(12345678.123456789, 5), format_number(1234567.123456789, 7), format_number(123456.123456789, 0) @@ -84,7 +89,8 @@ FROM src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT format_number(0.123456789, 12), +POSTHOOK: query: -- positive numbers +SELECT format_number(0.123456789, 12), format_number(12345678.123456789, 5), format_number(1234567.123456789, 7), format_number(123456.123456789, 0) @@ -93,7 +99,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0.123456789000 12,345,678.12346 1,234,567.1234568 123,456 -PREHOOK: query: SELECT format_number(-123456.123456789, 0), +PREHOOK: query: -- negative numbers +SELECT format_number(-123456.123456789, 0), format_number(-1234567.123456789, 2), format_number(-0.123456789, 15), format_number(-12345.123456789, 4) @@ -101,7 +108,8 @@ FROM src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT format_number(-123456.123456789, 0), +POSTHOOK: query: -- negative numbers +SELECT format_number(-123456.123456789, 0), format_number(-1234567.123456789, 2), format_number(-0.123456789, 15), format_number(-12345.123456789, 4) @@ -110,7 +118,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -123,456 -1,234,567.12 -0.123456789000000 -12,345.1235 -PREHOOK: query: SELECT format_number(0.0, 4), +PREHOOK: query: -- zeros +SELECT format_number(0.0, 4), format_number(0.000000, 1), format_number(000.0000, 1), format_number(00000.0000, 1), @@ -119,7 +128,8 @@ FROM src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT format_number(0.0, 4), +POSTHOOK: query: -- zeros +SELECT format_number(0.0, 4), format_number(0.000000, 1), format_number(000.0000, 1), format_number(00000.0000, 1), @@ -129,7 +139,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0.0000 0.0 0.0 0.0 -0.0000 -PREHOOK: query: SELECT format_number(0, 0), +PREHOOK: query: -- integers +SELECT format_number(0, 0), format_number(1, 4), format_number(12, 2), format_number(123, 5), @@ -138,7 +149,8 @@ FROM src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT format_number(0, 0), +POSTHOOK: query: -- integers +SELECT format_number(0, 0), format_number(1, 4), format_number(12, 2), format_number(123, 5), @@ -148,7 +160,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 1.0000 12.00 123.00000 1,234.0000000 -PREHOOK: query: SELECT format_number(-9223372036854775807, 10), +PREHOOK: query: -- long and double boundary +-- 9223372036854775807 is LONG_MAX +-- -9223372036854775807 is one more than LONG_MIN, +-- due to HIVE-2733, put it here to check LONG_MIN boundary +-- 4.9E-324 and 1.7976931348623157E308 are Double.MIN_VALUE and Double.MAX_VALUE +-- check them for Double boundary +SELECT format_number(-9223372036854775807, 10), format_number(9223372036854775807, 20), format_number(4.9E-324, 324), format_number(1.7976931348623157E308, 308) @@ -156,7 +174,13 @@ FROM src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT format_number(-9223372036854775807, 10), +POSTHOOK: query: -- long and double boundary +-- 9223372036854775807 is LONG_MAX +-- -9223372036854775807 is one more than LONG_MIN, +-- due to HIVE-2733, put it here to check LONG_MIN boundary +-- 4.9E-324 and 1.7976931348623157E308 are Double.MIN_VALUE and Double.MAX_VALUE +-- check them for Double boundary +SELECT format_number(-9223372036854775807, 10), format_number(9223372036854775807, 20), format_number(4.9E-324, 324), format_number(1.7976931348623157E308, 308) diff --git ql/src/test/results/clientpositive/udf_get_json_object.q.out ql/src/test/results/clientpositive/udf_get_json_object.q.out index 684de1e..e670365 100644 --- ql/src/test/results/clientpositive/udf_get_json_object.q.out +++ ql/src/test/results/clientpositive/udf_get_json_object.q.out @@ -173,9 +173,13 @@ POSTHOOK: Input: default@src_json #### A masked pattern was here #### POSTHOOK: Lineage: dest1.c1 SIMPLE [] 1234 -PREHOOK: query: CREATE TABLE dest2(c1 STRING) STORED AS RCFILE +PREHOOK: query: -- Verify that get_json_object can handle new lines in JSON values + +CREATE TABLE dest2(c1 STRING) STORED AS RCFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest2(c1 STRING) STORED AS RCFILE +POSTHOOK: query: -- Verify that get_json_object can handle new lines in JSON values + +CREATE TABLE dest2(c1 STRING) STORED AS RCFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.c1 SIMPLE [] diff --git ql/src/test/results/clientpositive/udf_hex.q.out ql/src/test/results/clientpositive/udf_hex.q.out index df25ece..82db575 100644 --- ql/src/test/results/clientpositive/udf_hex.q.out +++ ql/src/test/results/clientpositive/udf_hex.q.out @@ -15,7 +15,9 @@ Example: 'H1' > SELECT hex('Facebook') FROM src LIMIT 1; '46616365626F6F6B' -PREHOOK: query: SELECT +PREHOOK: query: -- If the argument is a string, hex should return a string containing two hex +-- digits for every character in the input. +SELECT hex('Facebook'), hex('\0'), hex('qwertyuiopasdfghjkl') @@ -23,7 +25,9 @@ FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- If the argument is a string, hex should return a string containing two hex +-- digits for every character in the input. +SELECT hex('Facebook'), hex('\0'), hex('qwertyuiopasdfghjkl') @@ -32,7 +36,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 46616365626F6F6B 00 71776572747975696F706173646667686A6B6C -PREHOOK: query: SELECT +PREHOOK: query: -- If the argument is a number, hex should convert it to hexadecimal. +SELECT hex(1), hex(0), hex(4207849477) @@ -40,7 +45,8 @@ FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- If the argument is a number, hex should convert it to hexadecimal. +SELECT hex(1), hex(0), hex(4207849477) @@ -49,11 +55,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 1 0 FACEB005 -PREHOOK: query: SELECT hex(-5) FROM src LIMIT 1 +PREHOOK: query: -- Negative numbers should be treated as two's complement (64 bit). +SELECT hex(-5) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT hex(-5) FROM src LIMIT 1 +POSTHOOK: query: -- Negative numbers should be treated as two's complement (64 bit). +SELECT hex(-5) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/udf_if.q.out ql/src/test/results/clientpositive/udf_if.q.out index e42ec73..2c2a542 100644 --- ql/src/test/results/clientpositive/udf_if.q.out +++ ql/src/test/results/clientpositive/udf_if.q.out @@ -89,14 +89,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 1 1 1 1 NULL 2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Type conversions +EXPLAIN SELECT IF(TRUE, CAST(128 AS SMALLINT), CAST(1 AS TINYINT)) AS COL1, IF(FALSE, 1, 1.1) AS COL2, IF(FALSE, 1, 'ABC') AS COL3, IF(FALSE, 'ABC', 12.3) AS COL4 FROM src LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Type conversions +EXPLAIN SELECT IF(TRUE, CAST(128 AS SMALLINT), CAST(1 AS TINYINT)) AS COL1, IF(FALSE, 1, 1.1) AS COL2, IF(FALSE, 1, 'ABC') AS COL3, diff --git ql/src/test/results/clientpositive/udf_java_method.q.out ql/src/test/results/clientpositive/udf_java_method.q.out index 6aaa4b8..15e71e6 100644 --- ql/src/test/results/clientpositive/udf_java_method.q.out +++ ql/src/test/results/clientpositive/udf_java_method.q.out @@ -11,7 +11,9 @@ java_method(class,method[,arg1[,arg2..]]) calls method with reflection Synonyms: reflect Use this UDF to call Java methods by matching the argument signature -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: -- java_method() is a synonym for reflect() + +EXPLAIN EXTENDED SELECT java_method("java.lang.String", "valueOf", 1), java_method("java.lang.String", "isEmpty"), java_method("java.lang.Math", "max", 2, 3), @@ -21,7 +23,9 @@ SELECT java_method("java.lang.String", "valueOf", 1), java_method("java.lang.Math", "floor", 1.9) FROM src LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: -- java_method() is a synonym for reflect() + +EXPLAIN EXTENDED SELECT java_method("java.lang.String", "valueOf", 1), java_method("java.lang.String", "isEmpty"), java_method("java.lang.Math", "max", 2, 3), diff --git ql/src/test/results/clientpositive/udf_length.q.out ql/src/test/results/clientpositive/udf_length.q.out index 29bbee8..945c382 100644 --- ql/src/test/results/clientpositive/udf_length.q.out +++ ql/src/test/results/clientpositive/udf_length.q.out @@ -161,9 +161,11 @@ POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest1 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +PREHOOK: query: -- Test with non-ascii characters. +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +POSTHOOK: query: -- Test with non-ascii characters. +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/udf_map_keys.q.out ql/src/test/results/clientpositive/udf_map_keys.q.out index 09e20fc..5001686 100644 --- ql/src/test/results/clientpositive/udf_map_keys.q.out +++ ql/src/test/results/clientpositive/udf_map_keys.q.out @@ -2,9 +2,13 @@ PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE POSTHOOK: query: use default POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: DESCRIBE FUNCTION map_keys +PREHOOK: query: -- Test map_keys() UDF + +DESCRIBE FUNCTION map_keys PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION map_keys +POSTHOOK: query: -- Test map_keys() UDF + +DESCRIBE FUNCTION map_keys POSTHOOK: type: DESCFUNCTION map_keys(map) - Returns an unordered array containing the keys of the input map. PREHOOK: query: DESCRIBE FUNCTION EXTENDED map_keys @@ -12,20 +16,24 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED map_keys POSTHOOK: type: DESCFUNCTION map_keys(map) - Returns an unordered array containing the keys of the input map. -PREHOOK: query: SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 +PREHOOK: query: -- Evaluate function against INT valued keys +SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 +POSTHOOK: query: -- Evaluate function against INT valued keys +SELECT map_keys(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### [1,2,3] -PREHOOK: query: SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 +PREHOOK: query: -- Evaluate function against STRING valued keys +SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 +POSTHOOK: query: -- Evaluate function against STRING valued keys +SELECT map_keys(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/udf_map_values.q.out ql/src/test/results/clientpositive/udf_map_values.q.out index 9dfa5aa..8174112 100644 --- ql/src/test/results/clientpositive/udf_map_values.q.out +++ ql/src/test/results/clientpositive/udf_map_values.q.out @@ -2,9 +2,13 @@ PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE POSTHOOK: query: use default POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: DESCRIBE FUNCTION map_values +PREHOOK: query: -- Test map_values() UDF + +DESCRIBE FUNCTION map_values PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION map_values +POSTHOOK: query: -- Test map_values() UDF + +DESCRIBE FUNCTION map_values POSTHOOK: type: DESCFUNCTION map_values(map) - Returns an unordered array containing the values of the input map. PREHOOK: query: DESCRIBE FUNCTION EXTENDED map_values @@ -12,20 +16,24 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED map_values POSTHOOK: type: DESCFUNCTION map_values(map) - Returns an unordered array containing the values of the input map. -PREHOOK: query: SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 +PREHOOK: query: -- Evaluate function against STRING valued values +SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 +POSTHOOK: query: -- Evaluate function against STRING valued values +SELECT map_values(map(1, "a", 2, "b", 3, "c")) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### ["a","b","c"] -PREHOOK: query: SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 +PREHOOK: query: -- Evaluate function against INT valued keys +SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 +POSTHOOK: query: -- Evaluate function against INT valued keys +SELECT map_values(map("a", 1, "b", 2, "c", 3)) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/udf_negative.q.out ql/src/test/results/clientpositive/udf_negative.q.out index b366db2..7a29a9b 100644 --- ql/src/test/results/clientpositive/udf_negative.q.out +++ ql/src/test/results/clientpositive/udf_negative.q.out @@ -8,9 +8,11 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED negative POSTHOOK: type: DESCFUNCTION negative a - Returns -a -PREHOOK: query: DESCRIBE FUNCTION - +PREHOOK: query: -- synonym +DESCRIBE FUNCTION - PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION - +POSTHOOK: query: -- synonym +DESCRIBE FUNCTION - POSTHOOK: type: DESCFUNCTION a - b - Returns the difference a-b PREHOOK: query: DESCRIBE FUNCTION EXTENDED - diff --git ql/src/test/results/clientpositive/udf_not.q.out ql/src/test/results/clientpositive/udf_not.q.out index 961e996..b80d7b6 100644 --- ql/src/test/results/clientpositive/udf_not.q.out +++ ql/src/test/results/clientpositive/udf_not.q.out @@ -9,9 +9,11 @@ POSTHOOK: query: DESCRIBE FUNCTION EXTENDED not POSTHOOK: type: DESCFUNCTION not a - Logical not Synonyms: ! -PREHOOK: query: DESCRIBE FUNCTION ! +PREHOOK: query: -- synonym +DESCRIBE FUNCTION ! PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION ! +POSTHOOK: query: -- synonym +DESCRIBE FUNCTION ! POSTHOOK: type: DESCFUNCTION ! a - Logical not PREHOOK: query: DESCRIBE FUNCTION EXTENDED ! diff --git ql/src/test/results/clientpositive/udf_percentile.q.out ql/src/test/results/clientpositive/udf_percentile.q.out index a6daa0a..b963236 100644 --- ql/src/test/results/clientpositive/udf_percentile.q.out +++ ql/src/test/results/clientpositive/udf_percentile.q.out @@ -288,7 +288,8 @@ POSTHOOK: Input: default@src 47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] 48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] 49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, +PREHOOK: query: -- test null handling +SELECT CAST(key AS INT) DIV 10, percentile(NULL, 0.0), percentile(NULL, array(0.0, 0.5, 0.99, 1.0)) FROM src @@ -296,7 +297,8 @@ GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, +POSTHOOK: query: -- test null handling +SELECT CAST(key AS INT) DIV 10, percentile(NULL, 0.0), percentile(NULL, array(0.0, 0.5, 0.99, 1.0)) FROM src @@ -354,7 +356,8 @@ POSTHOOK: Input: default@src 47 NULL NULL 48 NULL NULL 49 NULL NULL -PREHOOK: query: SELECT CAST(key AS INT) DIV 10, +PREHOOK: query: -- test empty array handling +SELECT CAST(key AS INT) DIV 10, percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0)) FROM src @@ -362,7 +365,8 @@ GROUP BY CAST(key AS INT) DIV 10 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, +POSTHOOK: query: -- test empty array handling +SELECT CAST(key AS INT) DIV 10, percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0)) FROM src @@ -429,11 +433,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### NULL -PREHOOK: query: select percentile(cast(key as bigint), array()) from src where false +PREHOOK: query: -- test where percentile list is empty +select percentile(cast(key as bigint), array()) from src where false PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select percentile(cast(key as bigint), array()) from src where false +POSTHOOK: query: -- test where percentile list is empty +select percentile(cast(key as bigint), array()) from src where false POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/udf_positive.q.out ql/src/test/results/clientpositive/udf_positive.q.out index 8c352d7..4524950 100644 --- ql/src/test/results/clientpositive/udf_positive.q.out +++ ql/src/test/results/clientpositive/udf_positive.q.out @@ -8,9 +8,11 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED positive POSTHOOK: type: DESCFUNCTION positive a - Returns a -PREHOOK: query: DESCRIBE FUNCTION + +PREHOOK: query: -- synonym +DESCRIBE FUNCTION + PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION + +POSTHOOK: query: -- synonym +DESCRIBE FUNCTION + POSTHOOK: type: DESCFUNCTION a + b - Returns a+b PREHOOK: query: DESCRIBE FUNCTION EXTENDED + diff --git ql/src/test/results/clientpositive/udf_printf.q.out ql/src/test/results/clientpositive/udf_printf.q.out index 3970274..564d9ed 100644 --- ql/src/test/results/clientpositive/udf_printf.q.out +++ ql/src/test/results/clientpositive/udf_printf.q.out @@ -2,9 +2,13 @@ PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE POSTHOOK: query: use default POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: DESCRIBE FUNCTION printf +PREHOOK: query: -- Test printf() UDF + +DESCRIBE FUNCTION printf PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION printf +POSTHOOK: query: -- Test printf() UDF + +DESCRIBE FUNCTION printf POSTHOOK: type: DESCFUNCTION printf(String format, Obj... args) - function that can format strings according to printf-style format strings PREHOOK: query: DESCRIBE FUNCTION EXTENDED printf @@ -53,11 +57,13 @@ STAGE PLANS: limit: 1 -PREHOOK: query: SELECT printf("Hello World %d %s", 100, "days") FROM src LIMIT 1 +PREHOOK: query: -- Test Primitive Types +SELECT printf("Hello World %d %s", 100, "days") FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT printf("Hello World %d %s", 100, "days") FROM src LIMIT 1 +POSTHOOK: query: -- Test Primitive Types +SELECT printf("Hello World %d %s", 100, "days") FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -71,18 +77,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### All Type Test: false, A, 15000, 1.234000e+01, +27183.2401, 2300.41, 32, corret, 0x1.002p8 -PREHOOK: query: SELECT printf("Color %s, String Null: %s, number1 %d, number2 %05d, Integer Null: %d, hex %#x, float %5.2f Double Null: %f\n", "red", NULL, 123456, 89, NULL, 255, 3.14159, NULL) FROM src LIMIT 1 +PREHOOK: query: -- Test NULL Values +SELECT printf("Color %s, String Null: %s, number1 %d, number2 %05d, Integer Null: %d, hex %#x, float %5.2f Double Null: %f\n", "red", NULL, 123456, 89, NULL, 255, 3.14159, NULL) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT printf("Color %s, String Null: %s, number1 %d, number2 %05d, Integer Null: %d, hex %#x, float %5.2f Double Null: %f\n", "red", NULL, 123456, 89, NULL, 255, 3.14159, NULL) FROM src LIMIT 1 +POSTHOOK: query: -- Test NULL Values +SELECT printf("Color %s, String Null: %s, number1 %d, number2 %05d, Integer Null: %d, hex %#x, float %5.2f Double Null: %f\n", "red", NULL, 123456, 89, NULL, 255, 3.14159, NULL) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### Color red, String Null: null, number1 123456, number2 00089, Integer Null: null, hex 0xff, float 3.14 Double Null: null -PREHOOK: query: create table timestamp_udf (t timestamp) +PREHOOK: query: -- Test Timestamp +create table timestamp_udf (t timestamp) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table timestamp_udf (t timestamp) +POSTHOOK: query: -- Test Timestamp +create table timestamp_udf (t timestamp) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@timestamp_udf PREHOOK: query: from src @@ -117,12 +127,14 @@ POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@timestamp_udf POSTHOOK: Output: default@timestamp_udf POSTHOOK: Lineage: timestamp_udf.t EXPRESSION [] -PREHOOK: query: CREATE TABLE binay_udf(key binary, value int) +PREHOOK: query: -- Test Binary +CREATE TABLE binay_udf(key binary, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '9' STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE binay_udf(key binary, value int) +POSTHOOK: query: -- Test Binary +CREATE TABLE binay_udf(key binary, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '9' STORED AS TEXTFILE diff --git ql/src/test/results/clientpositive/udf_reverse.q.out ql/src/test/results/clientpositive/udf_reverse.q.out index ec75aa4..ed50f8d 100644 --- ql/src/test/results/clientpositive/udf_reverse.q.out +++ ql/src/test/results/clientpositive/udf_reverse.q.out @@ -161,9 +161,15 @@ POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dest1 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +PREHOOK: query: -- Test with non-ascii characters +-- kv4.txt contains the text 0xE982B5E993AE, which should be reversed to +-- 0xE993AEE982B5 +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +POSTHOOK: query: -- Test with non-ascii characters +-- kv4.txt contains the text 0xE982B5E993AE, which should be reversed to +-- 0xE993AEE982B5 +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/udf_round_2.q.out ql/src/test/results/clientpositive/udf_round_2.q.out index 67ff0cf..5dbbd2b 100644 --- ql/src/test/results/clientpositive/udf_round_2.q.out +++ ql/src/test/results/clientpositive/udf_round_2.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: create table tstTbl1(n double) +PREHOOK: query: -- test for NaN (not-a-number) +create table tstTbl1(n double) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tstTbl1(n double) +POSTHOOK: query: -- test for NaN (not-a-number) +create table tstTbl1(n double) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tstTbl1 PREHOOK: query: insert overwrite table tstTbl1 @@ -44,11 +46,13 @@ POSTHOOK: Input: default@tsttbl1 #### A masked pattern was here #### POSTHOOK: Lineage: tsttbl1.n EXPRESSION [] NaN -PREHOOK: query: select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src limit 1 +PREHOOK: query: -- test for Infinity +select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src limit 1 +POSTHOOK: query: -- test for Infinity +select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/udf_round_3.q.out ql/src/test/results/clientpositive/udf_round_3.q.out index eb2588e..0b00d6a 100644 --- ql/src/test/results/clientpositive/udf_round_3.q.out +++ ql/src/test/results/clientpositive/udf_round_3.q.out @@ -1,44 +1,54 @@ -PREHOOK: query: select round(-128), round(127), round(0) from src limit 1 +PREHOOK: query: -- test for TINYINT +select round(-128), round(127), round(0) from src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select round(-128), round(127), round(0) from src limit 1 +POSTHOOK: query: -- test for TINYINT +select round(-128), round(127), round(0) from src limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -128 127 0 -PREHOOK: query: select round(-32768), round(32767), round(-129), round(128) from src limit 1 +PREHOOK: query: -- test for SMALLINT +select round(-32768), round(32767), round(-129), round(128) from src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select round(-32768), round(32767), round(-129), round(128) from src limit 1 +POSTHOOK: query: -- test for SMALLINT +select round(-32768), round(32767), round(-129), round(128) from src limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -32768 32767 -129 128 -PREHOOK: query: select round(cast(negative(pow(2, 31)) as INT)), round(cast((pow(2, 31) - 1) as INT)), round(-32769), round(32768) from src limit 1 +PREHOOK: query: -- test for INT +select round(cast(negative(pow(2, 31)) as INT)), round(cast((pow(2, 31) - 1) as INT)), round(-32769), round(32768) from src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select round(cast(negative(pow(2, 31)) as INT)), round(cast((pow(2, 31) - 1) as INT)), round(-32769), round(32768) from src limit 1 +POSTHOOK: query: -- test for INT +select round(cast(negative(pow(2, 31)) as INT)), round(cast((pow(2, 31) - 1) as INT)), round(-32769), round(32768) from src limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -2147483648 2147483647 -32769 32768 -PREHOOK: query: select round(cast(negative(pow(2, 63)) as BIGINT)), round(cast((pow(2, 63) - 1) as BIGINT)), round(cast(negative(pow(2, 31) + 1) as BIGINT)), round(cast(pow(2, 31) as BIGINT)) from src limit 1 +PREHOOK: query: -- test for BIGINT +select round(cast(negative(pow(2, 63)) as BIGINT)), round(cast((pow(2, 63) - 1) as BIGINT)), round(cast(negative(pow(2, 31) + 1) as BIGINT)), round(cast(pow(2, 31) as BIGINT)) from src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select round(cast(negative(pow(2, 63)) as BIGINT)), round(cast((pow(2, 63) - 1) as BIGINT)), round(cast(negative(pow(2, 31) + 1) as BIGINT)), round(cast(pow(2, 31) as BIGINT)) from src limit 1 +POSTHOOK: query: -- test for BIGINT +select round(cast(negative(pow(2, 63)) as BIGINT)), round(cast((pow(2, 63) - 1) as BIGINT)), round(cast(negative(pow(2, 31) + 1) as BIGINT)), round(cast(pow(2, 31) as BIGINT)) from src limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -9223372036854775808 9223372036854775807 -2147483649 2147483648 -PREHOOK: query: select round(126.1), round(126.7), round(32766.1), round(32766.7) from src limit 1 +PREHOOK: query: -- test for DOUBLE +select round(126.1), round(126.7), round(32766.1), round(32766.7) from src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select round(126.1), round(126.7), round(32766.1), round(32766.7) from src limit 1 +POSTHOOK: query: -- test for DOUBLE +select round(126.1), round(126.7), round(32766.1), round(32766.7) from src limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/udf_sort_array.q.out ql/src/test/results/clientpositive/udf_sort_array.q.out index 6bc2f8f..9e712db 100644 --- ql/src/test/results/clientpositive/udf_sort_array.q.out +++ ql/src/test/results/clientpositive/udf_sort_array.q.out @@ -2,9 +2,13 @@ PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE POSTHOOK: query: use default POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: DESCRIBE FUNCTION sort_array +PREHOOK: query: -- Test sort_array() UDF + +DESCRIBE FUNCTION sort_array PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION sort_array +POSTHOOK: query: -- Test sort_array() UDF + +DESCRIBE FUNCTION sort_array POSTHOOK: type: DESCFUNCTION sort_array(array(obj1, obj2,...)) - Sorts the input array in ascending order according to the natural ordering of the array elements. PREHOOK: query: DESCRIBE FUNCTION EXTENDED sort_array @@ -15,10 +19,12 @@ sort_array(array(obj1, obj2,...)) - Sorts the input array in ascending order acc Example: > SELECT sort_array(array('b', 'd', 'c', 'a')) FROM src LIMIT 1; 'a', 'b', 'c', 'd' -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Evaluate function against STRING valued keys +EXPLAIN SELECT sort_array(array("b", "d", "c", "a")) FROM src LIMIT 1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Evaluate function against STRING valued keys +EXPLAIN SELECT sort_array(array("b", "d", "c", "a")) FROM src LIMIT 1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: @@ -71,25 +77,30 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### ["enterprise databases","hadoop distributed file system","hadoop map-reduce"] -PREHOOK: query: SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1 +PREHOOK: query: -- Evaluate function against INT valued keys +SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1 +POSTHOOK: query: -- Evaluate function against INT valued keys +SELECT sort_array(array(2, 9, 7, 3, 5, 4, 1, 6, 8)) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### [1,2,3,4,5,6,7,8,9] -PREHOOK: query: SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1 +PREHOOK: query: -- Evaluate function against FLOAT valued keys +SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1 +POSTHOOK: query: -- Evaluate function against FLOAT valued keys +SELECT sort_array(sort_array(array(2.333, 9, 1.325, 2.003, 0.777, -3.445, 1))) FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### [-3.445,0.777,1.0,1.325,2.003,2.333,9.0] -PREHOOK: query: CREATE TABLE dest1 ( +PREHOOK: query: -- Test it against data in a table. +CREATE TABLE dest1 ( tinyints ARRAY, smallints ARRAY, ints ARRAY, @@ -101,7 +112,8 @@ PREHOOK: query: CREATE TABLE dest1 ( timestamps ARRAY ) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1 ( +POSTHOOK: query: -- Test it against data in a table. +CREATE TABLE dest1 ( tinyints ARRAY, smallints ARRAY, ints ARRAY, diff --git ql/src/test/results/clientpositive/udf_substr.q.out ql/src/test/results/clientpositive/udf_substr.q.out index 1892e26..712725e 100644 --- ql/src/test/results/clientpositive/udf_substr.q.out +++ ql/src/test/results/clientpositive/udf_substr.q.out @@ -118,7 +118,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### C C C C B BC BC BC A AB ABC ABC -PREHOOK: query: SELECT +PREHOOK: query: -- substring() is a synonim of substr(), so just perform some basic tests +SELECT substring('ABCDEFG', 3, 4), substring('ABCDEFG', -5, 4), substring('ABCDEFG', 3), substring('ABCDEFG', -5), substring('ABC', 0), substring('ABC', 1), substring('ABC', 2), substring('ABC', 3), @@ -128,7 +129,8 @@ FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- substring() is a synonim of substr(), so just perform some basic tests +SELECT substring('ABCDEFG', 3, 4), substring('ABCDEFG', -5, 4), substring('ABCDEFG', 3), substring('ABCDEFG', -5), substring('ABC', 0), substring('ABC', 1), substring('ABC', 2), substring('ABC', 3), @@ -139,7 +141,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### CDEF CDEF CDEFG CDEFG ABC ABC BC C ABC BC A A A -PREHOOK: query: SELECT +PREHOOK: query: -- test for binary substr +SELECT substr(null, 1), substr(null, 1, 1), substr(ABC, null), substr(ABC, null, 1), substr(ABC, 1, null), @@ -158,7 +161,8 @@ FROM ( PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- test for binary substr +SELECT substr(null, 1), substr(null, 1, 1), substr(ABC, null), substr(ABC, null, 1), substr(ABC, 1, null), @@ -178,7 +182,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### NULL NULL NULL NULL NULL A AB ABC ABC A AB ABC ABC B BC BC BC C C C C C C C C B BC BC BC A AB ABC ABC -PREHOOK: query: SELECT +PREHOOK: query: -- test UTF-8 substr +SELECT substr("玩", 1), substr("abc 玩", 5), substr("abc 玩玩玩 abc", 5), @@ -187,7 +192,8 @@ FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- test UTF-8 substr +SELECT substr("玩", 1), substr("abc 玩", 5), substr("abc 玩玩玩 abc", 5), diff --git ql/src/test/results/clientpositive/udf_substring.q.out ql/src/test/results/clientpositive/udf_substring.q.out index ae473a0..0f76298 100644 --- ql/src/test/results/clientpositive/udf_substring.q.out +++ ql/src/test/results/clientpositive/udf_substring.q.out @@ -1,6 +1,8 @@ -PREHOOK: query: DESCRIBE FUNCTION substring +PREHOOK: query: -- Synonym. See udf_substr.q +DESCRIBE FUNCTION substring PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION substring +POSTHOOK: query: -- Synonym. See udf_substr.q +DESCRIBE FUNCTION substring POSTHOOK: type: DESCFUNCTION substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len PREHOOK: query: DESCRIBE FUNCTION EXTENDED substring diff --git ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out index 0189e45..c91d594 100644 --- ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out +++ ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out @@ -80,9 +80,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@oneline #### A masked pattern was here #### random_string NULL -PREHOOK: query: explain select * from (select * from src) a where unix_timestamp(a.key) > 10 +PREHOOK: query: -- PPD +explain select * from (select * from src) a where unix_timestamp(a.key) > 10 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from (select * from src) a where unix_timestamp(a.key) > 10 +POSTHOOK: query: -- PPD +explain select * from (select * from src) a where unix_timestamp(a.key) > 10 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_FUNCTION unix_timestamp (. (TOK_TABLE_OR_COL a) key)) 10)))) diff --git ql/src/test/results/clientpositive/udf_translate.q.out ql/src/test/results/clientpositive/udf_translate.q.out index dc117f4..ab43c17 100644 --- ql/src/test/results/clientpositive/udf_translate.q.out +++ ql/src/test/results/clientpositive/udf_translate.q.out @@ -20,9 +20,11 @@ If the same character is present multiple times in the input string, the first o For example, translate('abcdef', 'ada', '192') returns '1bc9ef' replaces 'a' with '1' and 'd' with '9' ignoring the second occurence of 'a' in the from string mapping it to '2' -PREHOOK: query: CREATE TABLE table_input(input STRING) +PREHOOK: query: -- Create some tables to serve some input data +CREATE TABLE table_input(input STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE table_input(input STRING) +POSTHOOK: query: -- Create some tables to serve some input data +CREATE TABLE table_input(input STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@table_input PREHOOK: query: CREATE TABLE table_translate(input_string STRING, from_string STRING, to_string STRING) @@ -51,12 +53,14 @@ POSTHOOK: Lineage: table_input.input SIMPLE [] POSTHOOK: Lineage: table_translate.from_string SIMPLE [] POSTHOOK: Lineage: table_translate.input_string SIMPLE [] POSTHOOK: Lineage: table_translate.to_string SIMPLE [] -PREHOOK: query: SELECT translate('abcd', 'ab', '12'), +PREHOOK: query: -- Run some queries on constant input parameters +SELECT translate('abcd', 'ab', '12'), translate('abcd', 'abc', '12') FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT translate('abcd', 'ab', '12'), +POSTHOOK: query: -- Run some queries on constant input parameters +SELECT translate('abcd', 'ab', '12'), translate('abcd', 'abc', '12') FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -66,12 +70,14 @@ POSTHOOK: Lineage: table_translate.from_string SIMPLE [] POSTHOOK: Lineage: table_translate.input_string SIMPLE [] POSTHOOK: Lineage: table_translate.to_string SIMPLE [] 12cd 12d -PREHOOK: query: SELECT translate(table_input.input, 'ab', '12'), +PREHOOK: query: -- Run some queries where first parameter being a table column while the other two being constants +SELECT translate(table_input.input, 'ab', '12'), translate(table_input.input, 'abc', '12') FROM table_input LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@table_input #### A masked pattern was here #### -POSTHOOK: query: SELECT translate(table_input.input, 'ab', '12'), +POSTHOOK: query: -- Run some queries where first parameter being a table column while the other two being constants +SELECT translate(table_input.input, 'ab', '12'), translate(table_input.input, 'abc', '12') FROM table_input LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@table_input @@ -81,11 +87,13 @@ POSTHOOK: Lineage: table_translate.from_string SIMPLE [] POSTHOOK: Lineage: table_translate.input_string SIMPLE [] POSTHOOK: Lineage: table_translate.to_string SIMPLE [] 12cd 12d -PREHOOK: query: SELECT translate(input_string, from_string, to_string) FROM table_translate LIMIT 1 +PREHOOK: query: -- Run some queries where all parameters are coming from table columns +SELECT translate(input_string, from_string, to_string) FROM table_translate LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@table_translate #### A masked pattern was here #### -POSTHOOK: query: SELECT translate(input_string, from_string, to_string) FROM table_translate LIMIT 1 +POSTHOOK: query: -- Run some queries where all parameters are coming from table columns +SELECT translate(input_string, from_string, to_string) FROM table_translate LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@table_translate #### A masked pattern was here #### @@ -94,14 +102,16 @@ POSTHOOK: Lineage: table_translate.from_string SIMPLE [] POSTHOOK: Lineage: table_translate.input_string SIMPLE [] POSTHOOK: Lineage: table_translate.to_string SIMPLE [] 1bc -PREHOOK: query: SELECT translate(NULL, 'ab', '12'), +PREHOOK: query: -- Run some queries where some parameters are NULL +SELECT translate(NULL, 'ab', '12'), translate('abcd', NULL, '12'), translate('abcd', 'ab', NULL), translate(NULL, NULL, NULL) FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT translate(NULL, 'ab', '12'), +POSTHOOK: query: -- Run some queries where some parameters are NULL +SELECT translate(NULL, 'ab', '12'), translate('abcd', NULL, '12'), translate('abcd', 'ab', NULL), translate(NULL, NULL, NULL) FROM src LIMIT 1 @@ -113,12 +123,14 @@ POSTHOOK: Lineage: table_translate.from_string SIMPLE [] POSTHOOK: Lineage: table_translate.input_string SIMPLE [] POSTHOOK: Lineage: table_translate.to_string SIMPLE [] NULL NULL NULL NULL -PREHOOK: query: SELECT translate('abcd', 'aba', '123'), +PREHOOK: query: -- Run some queries where the same character appears several times in the from string (2nd argument) of the UDF +SELECT translate('abcd', 'aba', '123'), translate('abcd', 'aba', '12') FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT translate('abcd', 'aba', '123'), +POSTHOOK: query: -- Run some queries where the same character appears several times in the from string (2nd argument) of the UDF +SELECT translate('abcd', 'aba', '123'), translate('abcd', 'aba', '12') FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -128,11 +140,13 @@ POSTHOOK: Lineage: table_translate.from_string SIMPLE [] POSTHOOK: Lineage: table_translate.input_string SIMPLE [] POSTHOOK: Lineage: table_translate.to_string SIMPLE [] 12cd 12cd -PREHOOK: query: SELECT translate('abcd', 'abc', '1234') FROM src LIMIT 1 +PREHOOK: query: -- Run some queries for the ignorant case when the 3rd parameter has more characters than the second one +SELECT translate('abcd', 'abc', '1234') FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT translate('abcd', 'abc', '1234') FROM src LIMIT 1 +POSTHOOK: query: -- Run some queries for the ignorant case when the 3rd parameter has more characters than the second one +SELECT translate('abcd', 'abc', '1234') FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -141,11 +155,13 @@ POSTHOOK: Lineage: table_translate.from_string SIMPLE [] POSTHOOK: Lineage: table_translate.input_string SIMPLE [] POSTHOOK: Lineage: table_translate.to_string SIMPLE [] 123d -PREHOOK: query: SELECT translate('Àbcd', 'À', 'Ã') FROM src LIMIT 1 +PREHOOK: query: -- Test proper function over UTF-8 characters +SELECT translate('Àbcd', 'À', 'Ã') FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT translate('Àbcd', 'À', 'Ã') FROM src LIMIT 1 +POSTHOOK: query: -- Test proper function over UTF-8 characters +SELECT translate('Àbcd', 'À', 'Ã') FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/udf_unhex.q.out ql/src/test/results/clientpositive/udf_unhex.q.out index 57faa0b..09e7402 100644 --- ql/src/test/results/clientpositive/udf_unhex.q.out +++ ql/src/test/results/clientpositive/udf_unhex.q.out @@ -25,7 +25,9 @@ The characters in the argument string must be legal hexadecimal digits: '0' .. '9', 'A' .. 'F', 'a' .. 'f'. If UNHEX() encounters any nonhexadecimal digits in the argument, it returns NULL. Also, if there are an odd number of characters a leading 0 is appended. -PREHOOK: query: SELECT +PREHOOK: query: -- Good inputs + +SELECT unhex('4D7953514C'), unhex('31323637'), unhex('61'), @@ -35,7 +37,9 @@ FROM src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- Good inputs + +SELECT unhex('4D7953514C'), unhex('31323637'), unhex('61'), @@ -46,7 +50,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### MySQL 1267 a -4 -PREHOOK: query: SELECT +PREHOOK: query: -- Bad inputs +SELECT unhex('MySQL'), unhex('G123'), unhex('\0') @@ -54,7 +59,8 @@ FROM src limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT +POSTHOOK: query: -- Bad inputs +SELECT unhex('MySQL'), unhex('G123'), unhex('\0') diff --git ql/src/test/results/clientpositive/udtf_json_tuple.q.out ql/src/test/results/clientpositive/udtf_json_tuple.q.out index e2baafc..f40e1fb 100644 --- ql/src/test/results/clientpositive/udtf_json_tuple.q.out +++ ql/src/test/results/clientpositive/udtf_json_tuple.q.out @@ -571,9 +571,13 @@ POSTHOOK: Lineage: json_t.key EXPRESSION [] NULL 1 2 2 value2 1 -PREHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS RCFILE +PREHOOK: query: -- Verify that json_tuple can handle new lines in JSON values + +CREATE TABLE dest1(c1 STRING) STORED AS RCFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS RCFILE +POSTHOOK: query: -- Verify that json_tuple can handle new lines in JSON values + +CREATE TABLE dest1(c1 STRING) STORED AS RCFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: json_t.jstring EXPRESSION [] diff --git ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out index ca182fc..ec127db 100644 --- ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out +++ ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out @@ -353,10 +353,12 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL facebook.com /path1/p.php k1=v1&k2=v2 Ref1 http /path1/p.php?k1=v1&k2=v2 facebook.com NULL v1 sites.google.com /a/example.com/site/page NULL NULL ftp /a/example.com/site/page sites.google.com NULL NULL www.socs.uts.edu.au /MosaicDocs-old/url-primer.html k1=tps chapter1 https /MosaicDocs-old/url-primer.html?k1=tps www.socs.uts.edu.au:80 NULL tps -PREHOOK: query: explain +PREHOOK: query: -- should return null for 'host', 'query', 'QUERY:nonExistCol' +explain select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1', 'host', 'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi, au, us, qk1, err1, err2, err3 order by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- should return null for 'host', 'query', 'QUERY:nonExistCol' +explain select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1', 'host', 'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi, au, us, qk1, err1, err2, err3 order by a.key POSTHOOK: type: QUERY POSTHOOK: Lineage: url_t.fullurl EXPRESSION [] diff --git ql/src/test/results/clientpositive/union.q.out ql/src/test/results/clientpositive/union.q.out index 627788c..1e2b567 100644 --- ql/src/test/results/clientpositive/union.q.out +++ ql/src/test/results/clientpositive/union.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: -- union case: both subqueries are map jobs on same input, followed by filesink + +EXPLAIN FROM ( FROM src select src.key, src.value WHERE src.key < 100 UNION ALL @@ -6,7 +8,9 @@ FROM ( ) unioninput INSERT OVERWRITE DIRECTORY '../build/ql/test/data/warehouse/union.out' SELECT unioninput.* PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- union case: both subqueries are map jobs on same input, followed by filesink + +EXPLAIN FROM ( FROM src select src.key, src.value WHERE src.key < 100 UNION ALL diff --git ql/src/test/results/clientpositive/union10.q.out ql/src/test/results/clientpositive/union10.q.out index ca6f7f8..ad05b38 100644 --- ql/src/test/results/clientpositive/union10.q.out +++ ql/src/test/results/clientpositive/union10.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table tmptable(key string, value int) +PREHOOK: query: -- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by filesink + +create table tmptable(key string, value int) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tmptable(key string, value int) +POSTHOOK: query: -- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by filesink + +create table tmptable(key string, value int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: explain diff --git ql/src/test/results/clientpositive/union11.q.out ql/src/test/results/clientpositive/union11.q.out index f3d03bb..94e8900 100644 --- ql/src/test/results/clientpositive/union11.q.out +++ ql/src/test/results/clientpositive/union11.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: explain +PREHOOK: query: -- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by reducesink + +explain select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc group by unionsrc.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by reducesink + +explain select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2 diff --git ql/src/test/results/clientpositive/union12.q.out ql/src/test/results/clientpositive/union12.q.out index e40fcdf..4a1dea6 100644 --- ql/src/test/results/clientpositive/union12.q.out +++ ql/src/test/results/clientpositive/union12.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table tmptable(key string, value int) +PREHOOK: query: -- union case: all subqueries are a map-reduce jobs, 3 way union, different inputs for all sub-queries, followed by filesink + +create table tmptable(key string, value int) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tmptable(key string, value int) +POSTHOOK: query: -- union case: all subqueries are a map-reduce jobs, 3 way union, different inputs for all sub-queries, followed by filesink + +create table tmptable(key string, value int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: explain diff --git ql/src/test/results/clientpositive/union13.q.out ql/src/test/results/clientpositive/union13.q.out index 73af75b..991b85d 100644 --- ql/src/test/results/clientpositive/union13.q.out +++ ql/src/test/results/clientpositive/union13.q.out @@ -1,8 +1,12 @@ -PREHOOK: query: explain +PREHOOK: query: -- union case: both subqueries are a map-only jobs, same input, followed by filesink + +explain select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case: both subqueries are a map-only jobs, same input, followed by filesink + +explain select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/union14.q.out ql/src/test/results/clientpositive/union14.q.out index bdd2d55..0f98a10 100644 --- ql/src/test/results/clientpositive/union14.q.out +++ ql/src/test/results/clientpositive/union14.q.out @@ -1,10 +1,14 @@ -PREHOOK: query: explain +PREHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink + +explain select unionsrc.key, count(1) FROM (select s2.key as key, s2.value as value from src1 s2 UNION ALL select 'tst1' as key, cast(count(1) as string) as value from src s1) unionsrc group by unionsrc.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink + +explain select unionsrc.key, count(1) FROM (select s2.key as key, s2.value as value from src1 s2 UNION ALL select 'tst1' as key, cast(count(1) as string) as value from src s1) diff --git ql/src/test/results/clientpositive/union15.q.out ql/src/test/results/clientpositive/union15.q.out index dae4438..0589a14 100644 --- ql/src/test/results/clientpositive/union15.q.out +++ ql/src/test/results/clientpositive/union15.q.out @@ -1,11 +1,15 @@ -PREHOOK: query: explain +PREHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink + +explain select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src1 s2 UNION ALL select s3.key as key, s3.value as value from src1 s3) unionsrc group by unionsrc.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink + +explain select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src1 s2 diff --git ql/src/test/results/clientpositive/union17.q.out ql/src/test/results/clientpositive/union17.q.out index 3cf7065..5fa6de3 100644 --- ql/src/test/results/clientpositive/union17.q.out +++ ql/src/test/results/clientpositive/union17.q.out @@ -8,14 +8,18 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE DEST2(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@DEST2 -PREHOOK: query: explain +PREHOOK: query: -- union case:map-reduce sub-queries followed by multi-table insert + +explain FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case:map-reduce sub-queries followed by multi-table insert + +explain FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git ql/src/test/results/clientpositive/union18.q.out ql/src/test/results/clientpositive/union18.q.out index 29cb728..c59389b 100644 --- ql/src/test/results/clientpositive/union18.q.out +++ ql/src/test/results/clientpositive/union18.q.out @@ -8,14 +8,18 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE DEST2(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@DEST2 -PREHOOK: query: explain +PREHOOK: query: -- union case:map-reduce sub-queries followed by multi-table insert + +explain FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, unionsrc.value INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, unionsrc.value PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case:map-reduce sub-queries followed by multi-table insert + +explain FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git ql/src/test/results/clientpositive/union19.q.out ql/src/test/results/clientpositive/union19.q.out index 0495e32..e3f33cb 100644 --- ql/src/test/results/clientpositive/union19.q.out +++ ql/src/test/results/clientpositive/union19.q.out @@ -8,14 +8,18 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE DEST2(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@DEST2 -PREHOOK: query: explain +PREHOOK: query: -- union case:map-reduce sub-queries followed by multi-table insert + +explain FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, count(unionsrc.value) group by unionsrc.key INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, unionsrc.value PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case:map-reduce sub-queries followed by multi-table insert + +explain FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git ql/src/test/results/clientpositive/union2.q.out ql/src/test/results/clientpositive/union2.q.out index 78320a3..2a5c074 100644 --- ql/src/test/results/clientpositive/union2.q.out +++ ql/src/test/results/clientpositive/union2.q.out @@ -1,8 +1,12 @@ -PREHOOK: query: explain +PREHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink + +explain select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink + +explain select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/union20.q.out ql/src/test/results/clientpositive/union20.q.out index 367aef2..3ae87b3 100644 --- ql/src/test/results/clientpositive/union20.q.out +++ ql/src/test/results/clientpositive/union20.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: explain +PREHOOK: query: -- union :map-reduce sub-queries followed by join + +explain SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL @@ -9,7 +11,9 @@ JOIN select s4.key as key, s4.value as value from src s4 where s4.key < 10) unionsrc2 ON (unionsrc1.key = unionsrc2.key) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union :map-reduce sub-queries followed by join + +explain SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git ql/src/test/results/clientpositive/union21.q.out ql/src/test/results/clientpositive/union21.q.out index ef923cb..171b57a 100644 --- ql/src/test/results/clientpositive/union21.q.out +++ ql/src/test/results/clientpositive/union21.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: explain +PREHOOK: query: -- union of constants, udf outputs, and columns from text table and thrift table + +explain SELECT key, count(1) FROM ( SELECT '1' as key from src @@ -13,7 +15,9 @@ FROM ( ) union_output GROUP BY key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union of constants, udf outputs, and columns from text table and thrift table + +explain SELECT key, count(1) FROM ( SELECT '1' as key from src diff --git ql/src/test/results/clientpositive/union22.q.out ql/src/test/results/clientpositive/union22.q.out index 762aba9..bec39f4 100644 --- ql/src/test/results/clientpositive/union22.q.out +++ ql/src/test/results/clientpositive/union22.q.out @@ -42,7 +42,9 @@ POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSc POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain extended +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +explain extended insert overwrite table dst_union22 partition (ds='2') select * from ( @@ -55,7 +57,9 @@ where a.k1 > 20 ) subq PREHOOK: type: QUERY -POSTHOOK: query: explain extended +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +explain extended insert overwrite table dst_union22 partition (ds='2') select * from ( diff --git ql/src/test/results/clientpositive/union32.q.out ql/src/test/results/clientpositive/union32.q.out index 3986783..2f84314 100644 --- ql/src/test/results/clientpositive/union32.q.out +++ ql/src/test/results/clientpositive/union32.q.out @@ -1,7 +1,13 @@ -PREHOOK: query: CREATE TABLE t1 AS SELECT * FROM src WHERE key < 10 +PREHOOK: query: -- This tests various union queries which have columns on one side of the query +-- being of double type and those on the other side another + +CREATE TABLE t1 AS SELECT * FROM src WHERE key < 10 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@src -POSTHOOK: query: CREATE TABLE t1 AS SELECT * FROM src WHERE key < 10 +POSTHOOK: query: -- This tests various union queries which have columns on one side of the query +-- being of double type and those on the other side another + +CREATE TABLE t1 AS SELECT * FROM src WHERE key < 10 POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: default@t1 @@ -12,14 +18,16 @@ POSTHOOK: query: CREATE TABLE t2 AS SELECT * FROM src WHERE key < 10 POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: default@t2 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Test simple union with double +EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION ALL SELECT CAST(key AS BIGINT) AS key FROM t2) a ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Test simple union with double +EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t1 UNION ALL @@ -135,14 +143,16 @@ POSTHOOK: Input: default@t2 8.0 9.0 9.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Test union with join on the left +EXPLAIN SELECT * FROM (SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key FROM t2) a ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Test union with join on the left +EXPLAIN SELECT * FROM (SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL @@ -316,14 +326,16 @@ POSTHOOK: Input: default@t2 8.0 9.0 9.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Test union with join on the right +EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t2 UNION ALL SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Test union with join on the right +EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key FROM t2 UNION ALL @@ -497,14 +509,16 @@ POSTHOOK: Input: default@t2 8.0 9.0 9.0 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Test union with join on the left selecting multiple columns +EXPLAIN SELECT * FROM (SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Test union with join on the left selecting multiple columns +EXPLAIN SELECT * FROM (SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL @@ -693,14 +707,16 @@ POSTHOOK: Input: default@t2 8.0 8.0 9.0 9.0 9.0 9 -PREHOOK: query: EXPLAIN +PREHOOK: query: -- Test union with join on the right selecting multiple columns +EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a ORDER BY key PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- Test union with join on the right selecting multiple columns +EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL diff --git ql/src/test/results/clientpositive/union33.q.out ql/src/test/results/clientpositive/union33.q.out index 6e543b9..94c82aa 100644 --- ql/src/test/results/clientpositive/union33.q.out +++ ql/src/test/results/clientpositive/union33.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: CREATE TABLE test_src (key STRING, value STRING) +PREHOOK: query: -- This tests that a union all with a map only subquery on one side and a +-- subquery involving two map reduce jobs on the other runs correctly. + +CREATE TABLE test_src (key STRING, value STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE test_src (key STRING, value STRING) +POSTHOOK: query: -- This tests that a union all with a map only subquery on one side and a +-- subquery involving two map reduce jobs on the other runs correctly. + +CREATE TABLE test_src (key STRING, value STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@test_src PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src diff --git ql/src/test/results/clientpositive/union4.q.out ql/src/test/results/clientpositive/union4.q.out index dc2edc3..97d8223 100644 --- ql/src/test/results/clientpositive/union4.q.out +++ ql/src/test/results/clientpositive/union4.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: create table tmptable(key string, value int) +PREHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by filesink + + +create table tmptable(key string, value int) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tmptable(key string, value int) +POSTHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by filesink + + +create table tmptable(key string, value int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: explain diff --git ql/src/test/results/clientpositive/union5.q.out ql/src/test/results/clientpositive/union5.q.out index f353906..482abe8 100644 --- ql/src/test/results/clientpositive/union5.q.out +++ ql/src/test/results/clientpositive/union5.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: explain +PREHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink + +explain select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink + +explain select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key diff --git ql/src/test/results/clientpositive/union6.q.out ql/src/test/results/clientpositive/union6.q.out index 18782bd..b07b562 100644 --- ql/src/test/results/clientpositive/union6.q.out +++ ql/src/test/results/clientpositive/union6.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: create table tmptable(key string, value string) +PREHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by filesink + + +create table tmptable(key string, value string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tmptable(key string, value string) +POSTHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by filesink + + +create table tmptable(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: explain diff --git ql/src/test/results/clientpositive/union7.q.out ql/src/test/results/clientpositive/union7.q.out index 742d59b..d58b94d 100644 --- ql/src/test/results/clientpositive/union7.q.out +++ ql/src/test/results/clientpositive/union7.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: explain +PREHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink + +explain select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink + +explain select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key diff --git ql/src/test/results/clientpositive/union8.q.out ql/src/test/results/clientpositive/union8.q.out index 27124c4..fbfe03d 100644 --- ql/src/test/results/clientpositive/union8.q.out +++ ql/src/test/results/clientpositive/union8.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: explain +PREHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by filesink + +explain select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2 UNION ALL select s3.key as key, s3.value as value from src s3) unionsrc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by filesink + +explain select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2 UNION ALL select s3.key as key, s3.value as value from src s3) unionsrc diff --git ql/src/test/results/clientpositive/union9.q.out ql/src/test/results/clientpositive/union9.q.out index 424db51..a52da32 100644 --- ql/src/test/results/clientpositive/union9.q.out +++ ql/src/test/results/clientpositive/union9.q.out @@ -1,9 +1,13 @@ -PREHOOK: query: explain +PREHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by reducesink + +explain select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2 UNION ALL select s3.key as key, s3.value as value from src s3) unionsrc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by reducesink + +explain select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2 UNION ALL select s3.key as key, s3.value as value from src s3) unionsrc diff --git ql/src/test/results/clientpositive/union_null.q.out ql/src/test/results/clientpositive/union_null.q.out index 69e784e..361f6b2 100644 --- ql/src/test/results/clientpositive/union_null.q.out +++ ql/src/test/results/clientpositive/union_null.q.out @@ -1,8 +1,10 @@ -PREHOOK: query: select x from (select value as x from src union all select NULL as x from src)a limit 10 +PREHOOK: query: -- HIVE-2901 +select x from (select value as x from src union all select NULL as x from src)a limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select x from (select value as x from src union all select NULL as x from src)a limit 10 +POSTHOOK: query: -- HIVE-2901 +select x from (select value as x from src union all select NULL as x from src)a limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/union_remove_6.q.out ql/src/test/results/clientpositive/union_remove_6.q.out index dfe7051..1b04f93 100644 --- ql/src/test/results/clientpositive/union_remove_6.q.out +++ ql/src/test/results/clientpositive/union_remove_6.q.out @@ -1,6 +1,20 @@ -PREHOOK: query: create table inputTbl1(key string, val string) stored as textfile +PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +-- Union of 2 subqueries is performed (all of which are mapred queries) +-- followed by select star and a file sink in 2 output tables. +-- The optimiaztion does not take affect since it is a multi-table insert. +-- It does not matter, whether the output is merged or not. In this case, +-- merging is turned off + +create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE -POSTHOOK: query: create table inputTbl1(key string, val string) stored as textfile +POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +-- Union of 2 subqueries is performed (all of which are mapred queries) +-- followed by select star and a file sink in 2 output tables. +-- The optimiaztion does not take affect since it is a multi-table insert. +-- It does not matter, whether the output is merged or not. In this case, +-- merging is turned off + +create table inputTbl1(key string, val string) stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@inputTbl1 PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile diff --git ql/src/test/results/clientpositive/unset_table_view_property.q.out ql/src/test/results/clientpositive/unset_table_view_property.q.out index b68ea2b..d39f492 100644 --- ql/src/test/results/clientpositive/unset_table_view_property.q.out +++ ql/src/test/results/clientpositive/unset_table_view_property.q.out @@ -9,11 +9,13 @@ POSTHOOK: query: SHOW TBLPROPERTIES testTable POSTHOOK: type: SHOW_TBLPROPERTIES #### A masked pattern was here #### -PREHOOK: query: ALTER TABLE testTable SET TBLPROPERTIES ('a'='1', 'c'='3') +PREHOOK: query: -- UNSET TABLE PROPERTIES +ALTER TABLE testTable SET TBLPROPERTIES ('a'='1', 'c'='3') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@testtable PREHOOK: Output: default@testtable -POSTHOOK: query: ALTER TABLE testTable SET TBLPROPERTIES ('a'='1', 'c'='3') +POSTHOOK: query: -- UNSET TABLE PROPERTIES +ALTER TABLE testTable SET TBLPROPERTIES ('a'='1', 'c'='3') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@testtable POSTHOOK: Output: default@testtable @@ -27,11 +29,13 @@ c 3 #### A masked pattern was here #### a 1 #### A masked pattern was here #### -PREHOOK: query: ALTER TABLE testTable UNSET TBLPROPERTIES ('a', 'c') +PREHOOK: query: -- UNSET all the properties +ALTER TABLE testTable UNSET TBLPROPERTIES ('a', 'c') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@testtable PREHOOK: Output: default@testtable -POSTHOOK: query: ALTER TABLE testTable UNSET TBLPROPERTIES ('a', 'c') +POSTHOOK: query: -- UNSET all the properties +ALTER TABLE testTable UNSET TBLPROPERTIES ('a', 'c') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@testtable POSTHOOK: Output: default@testtable @@ -60,11 +64,13 @@ c 3 #### A masked pattern was here #### a 1 #### A masked pattern was here #### -PREHOOK: query: ALTER TABLE testTable UNSET TBLPROPERTIES ('a', 'd') +PREHOOK: query: -- UNSET a subset of the properties +ALTER TABLE testTable UNSET TBLPROPERTIES ('a', 'd') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@testtable PREHOOK: Output: default@testtable -POSTHOOK: query: ALTER TABLE testTable UNSET TBLPROPERTIES ('a', 'd') +POSTHOOK: query: -- UNSET a subset of the properties +ALTER TABLE testTable UNSET TBLPROPERTIES ('a', 'd') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@testtable POSTHOOK: Output: default@testtable @@ -76,11 +82,13 @@ POSTHOOK: type: SHOW_TBLPROPERTIES #### A masked pattern was here #### c 3 #### A masked pattern was here #### -PREHOOK: query: ALTER TABLE testTable UNSET TBLPROPERTIES ('c', 'c', 'c') +PREHOOK: query: -- the same property being UNSET multiple times +ALTER TABLE testTable UNSET TBLPROPERTIES ('c', 'c', 'c') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@testtable PREHOOK: Output: default@testtable -POSTHOOK: query: ALTER TABLE testTable UNSET TBLPROPERTIES ('c', 'c', 'c') +POSTHOOK: query: -- the same property being UNSET multiple times +ALTER TABLE testTable UNSET TBLPROPERTIES ('c', 'c', 'c') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@testtable POSTHOOK: Output: default@testtable @@ -110,11 +118,13 @@ c 3 #### A masked pattern was here #### a 1 #### A masked pattern was here #### -PREHOOK: query: ALTER TABLE testTable UNSET TBLPROPERTIES IF EXISTS ('b', 'd', 'b', 'f') +PREHOOK: query: -- UNSET a subset of the properties and some non-existed properties using IF EXISTS +ALTER TABLE testTable UNSET TBLPROPERTIES IF EXISTS ('b', 'd', 'b', 'f') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@testtable PREHOOK: Output: default@testtable -POSTHOOK: query: ALTER TABLE testTable UNSET TBLPROPERTIES IF EXISTS ('b', 'd', 'b', 'f') +POSTHOOK: query: -- UNSET a subset of the properties and some non-existed properties using IF EXISTS +ALTER TABLE testTable UNSET TBLPROPERTIES IF EXISTS ('b', 'd', 'b', 'f') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@testtable POSTHOOK: Output: default@testtable @@ -128,11 +138,13 @@ c 3 #### A masked pattern was here #### a 1 #### A masked pattern was here #### -PREHOOK: query: ALTER TABLE testTable UNSET TBLPROPERTIES IF EXISTS ('b', 'd', 'c', 'f', 'x', 'y', 'z') +PREHOOK: query: -- UNSET a subset of the properties and some non-existed properties using IF EXISTS +ALTER TABLE testTable UNSET TBLPROPERTIES IF EXISTS ('b', 'd', 'c', 'f', 'x', 'y', 'z') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@testtable PREHOOK: Output: default@testtable -POSTHOOK: query: ALTER TABLE testTable UNSET TBLPROPERTIES IF EXISTS ('b', 'd', 'c', 'f', 'x', 'y', 'z') +POSTHOOK: query: -- UNSET a subset of the properties and some non-existed properties using IF EXISTS +ALTER TABLE testTable UNSET TBLPROPERTIES IF EXISTS ('b', 'd', 'c', 'f', 'x', 'y', 'z') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@testtable POSTHOOK: Output: default@testtable @@ -144,9 +156,11 @@ POSTHOOK: type: SHOW_TBLPROPERTIES #### A masked pattern was here #### a 1 #### A masked pattern was here #### -PREHOOK: query: CREATE VIEW testView AS SELECT value FROM src WHERE key=86 +PREHOOK: query: -- UNSET VIEW PROPERTIES +CREATE VIEW testView AS SELECT value FROM src WHERE key=86 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW testView AS SELECT value FROM src WHERE key=86 +POSTHOOK: query: -- UNSET VIEW PROPERTIES +CREATE VIEW testView AS SELECT value FROM src WHERE key=86 POSTHOOK: type: CREATEVIEW POSTHOOK: Output: default@testView PREHOOK: query: ALTER VIEW testView SET TBLPROPERTIES ('propA'='100', 'propB'='200') @@ -166,11 +180,13 @@ POSTHOOK: type: SHOW_TBLPROPERTIES propA 100 propB 200 #### A masked pattern was here #### -PREHOOK: query: ALTER VIEW testView UNSET TBLPROPERTIES ('propA', 'propB') +PREHOOK: query: -- UNSET all the properties +ALTER VIEW testView UNSET TBLPROPERTIES ('propA', 'propB') PREHOOK: type: ALTERVIEW_PROPERTIES PREHOOK: Input: default@testview PREHOOK: Output: default@testview -POSTHOOK: query: ALTER VIEW testView UNSET TBLPROPERTIES ('propA', 'propB') +POSTHOOK: query: -- UNSET all the properties +ALTER VIEW testView UNSET TBLPROPERTIES ('propA', 'propB') POSTHOOK: type: ALTERVIEW_PROPERTIES POSTHOOK: Input: default@testview POSTHOOK: Output: default@testview @@ -198,11 +214,13 @@ propA 100 #### A masked pattern was here #### propD 400 propC 300 -PREHOOK: query: ALTER VIEW testView UNSET TBLPROPERTIES ('propA', 'propC') +PREHOOK: query: -- UNSET a subset of the properties +ALTER VIEW testView UNSET TBLPROPERTIES ('propA', 'propC') PREHOOK: type: ALTERVIEW_PROPERTIES PREHOOK: Input: default@testview PREHOOK: Output: default@testview -POSTHOOK: query: ALTER VIEW testView UNSET TBLPROPERTIES ('propA', 'propC') +POSTHOOK: query: -- UNSET a subset of the properties +ALTER VIEW testView UNSET TBLPROPERTIES ('propA', 'propC') POSTHOOK: type: ALTERVIEW_PROPERTIES POSTHOOK: Input: default@testview POSTHOOK: Output: default@testview @@ -213,11 +231,13 @@ POSTHOOK: type: SHOW_TBLPROPERTIES #### A masked pattern was here #### propD 400 -PREHOOK: query: ALTER VIEW testView UNSET TBLPROPERTIES ('propD', 'propD', 'propD') +PREHOOK: query: -- the same property being UNSET multiple times +ALTER VIEW testView UNSET TBLPROPERTIES ('propD', 'propD', 'propD') PREHOOK: type: ALTERVIEW_PROPERTIES PREHOOK: Input: default@testview PREHOOK: Output: default@testview -POSTHOOK: query: ALTER VIEW testView UNSET TBLPROPERTIES ('propD', 'propD', 'propD') +POSTHOOK: query: -- the same property being UNSET multiple times +ALTER VIEW testView UNSET TBLPROPERTIES ('propD', 'propD', 'propD') POSTHOOK: type: ALTERVIEW_PROPERTIES POSTHOOK: Input: default@testview POSTHOOK: Output: default@testview @@ -246,11 +266,13 @@ propB 200 #### A masked pattern was here #### propD 400 propC 300 -PREHOOK: query: ALTER VIEW testView UNSET TBLPROPERTIES IF EXISTS ('propC', 'propD', 'propD', 'propC', 'propZ') +PREHOOK: query: -- UNSET a subset of the properties and some non-existed properties using IF EXISTS +ALTER VIEW testView UNSET TBLPROPERTIES IF EXISTS ('propC', 'propD', 'propD', 'propC', 'propZ') PREHOOK: type: ALTERVIEW_PROPERTIES PREHOOK: Input: default@testview PREHOOK: Output: default@testview -POSTHOOK: query: ALTER VIEW testView UNSET TBLPROPERTIES IF EXISTS ('propC', 'propD', 'propD', 'propC', 'propZ') +POSTHOOK: query: -- UNSET a subset of the properties and some non-existed properties using IF EXISTS +ALTER VIEW testView UNSET TBLPROPERTIES IF EXISTS ('propC', 'propD', 'propD', 'propC', 'propZ') POSTHOOK: type: ALTERVIEW_PROPERTIES POSTHOOK: Input: default@testview POSTHOOK: Output: default@testview @@ -263,11 +285,13 @@ POSTHOOK: type: SHOW_TBLPROPERTIES propA 100 propB 200 #### A masked pattern was here #### -PREHOOK: query: ALTER VIEW testView UNSET TBLPROPERTIES IF EXISTS ('propB', 'propC', 'propD', 'propF') +PREHOOK: query: -- UNSET a subset of the properties and some non-existed properties using IF EXISTS +ALTER VIEW testView UNSET TBLPROPERTIES IF EXISTS ('propB', 'propC', 'propD', 'propF') PREHOOK: type: ALTERVIEW_PROPERTIES PREHOOK: Input: default@testview PREHOOK: Output: default@testview -POSTHOOK: query: ALTER VIEW testView UNSET TBLPROPERTIES IF EXISTS ('propB', 'propC', 'propD', 'propF') +POSTHOOK: query: -- UNSET a subset of the properties and some non-existed properties using IF EXISTS +ALTER VIEW testView UNSET TBLPROPERTIES IF EXISTS ('propB', 'propC', 'propD', 'propF') POSTHOOK: type: ALTERVIEW_PROPERTIES POSTHOOK: Input: default@testview POSTHOOK: Output: default@testview diff --git ql/src/test/results/clientpositive/view.q.out ql/src/test/results/clientpositive/view.q.out index 76a942f..30d5b88 100644 --- ql/src/test/results/clientpositive/view.q.out +++ ql/src/test/results/clientpositive/view.q.out @@ -36,44 +36,60 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv1.txt' OVERWRITE INTO TABLE table2 POSTHOOK: type: LOAD POSTHOOK: Output: db1@table2 -PREHOOK: query: CREATE VIEW v1 AS SELECT * FROM table1 +PREHOOK: query: -- relative reference, no alias +CREATE VIEW v1 AS SELECT * FROM table1 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW v1 AS SELECT * FROM table1 +POSTHOOK: query: -- relative reference, no alias +CREATE VIEW v1 AS SELECT * FROM table1 POSTHOOK: type: CREATEVIEW POSTHOOK: Output: db1@v1 -PREHOOK: query: CREATE VIEW v2 AS SELECT t1.* FROM table1 t1 +PREHOOK: query: -- relative reference, aliased +CREATE VIEW v2 AS SELECT t1.* FROM table1 t1 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW v2 AS SELECT t1.* FROM table1 t1 +POSTHOOK: query: -- relative reference, aliased +CREATE VIEW v2 AS SELECT t1.* FROM table1 t1 POSTHOOK: type: CREATEVIEW POSTHOOK: Output: db1@v2 -PREHOOK: query: CREATE VIEW v3 AS SELECT t1.*, t2.key k FROM table1 t1 JOIN table2 t2 ON t1.key = t2.key +PREHOOK: query: -- relative reference, multiple tables +CREATE VIEW v3 AS SELECT t1.*, t2.key k FROM table1 t1 JOIN table2 t2 ON t1.key = t2.key PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW v3 AS SELECT t1.*, t2.key k FROM table1 t1 JOIN table2 t2 ON t1.key = t2.key +POSTHOOK: query: -- relative reference, multiple tables +CREATE VIEW v3 AS SELECT t1.*, t2.key k FROM table1 t1 JOIN table2 t2 ON t1.key = t2.key POSTHOOK: type: CREATEVIEW POSTHOOK: Output: db1@v3 -PREHOOK: query: CREATE VIEW v4 AS SELECT * FROM db1.table1 +PREHOOK: query: -- absolute reference, no alias +CREATE VIEW v4 AS SELECT * FROM db1.table1 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW v4 AS SELECT * FROM db1.table1 +POSTHOOK: query: -- absolute reference, no alias +CREATE VIEW v4 AS SELECT * FROM db1.table1 POSTHOOK: type: CREATEVIEW POSTHOOK: Output: db1@v4 -PREHOOK: query: CREATE VIEW v5 AS SELECT t1.* FROM db1.table1 t1 +PREHOOK: query: -- absolute reference, aliased +CREATE VIEW v5 AS SELECT t1.* FROM db1.table1 t1 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW v5 AS SELECT t1.* FROM db1.table1 t1 +POSTHOOK: query: -- absolute reference, aliased +CREATE VIEW v5 AS SELECT t1.* FROM db1.table1 t1 POSTHOOK: type: CREATEVIEW POSTHOOK: Output: db1@v5 -PREHOOK: query: CREATE VIEW v6 AS SELECT t1.*, t2.key k FROM db1.table1 t1 JOIN db1.table2 t2 ON t1.key = t2.key +PREHOOK: query: -- absolute reference, multiple tables +CREATE VIEW v6 AS SELECT t1.*, t2.key k FROM db1.table1 t1 JOIN db1.table2 t2 ON t1.key = t2.key PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW v6 AS SELECT t1.*, t2.key k FROM db1.table1 t1 JOIN db1.table2 t2 ON t1.key = t2.key +POSTHOOK: query: -- absolute reference, multiple tables +CREATE VIEW v6 AS SELECT t1.*, t2.key k FROM db1.table1 t1 JOIN db1.table2 t2 ON t1.key = t2.key POSTHOOK: type: CREATEVIEW POSTHOOK: Output: db1@v6 -PREHOOK: query: CREATE VIEW v7 AS SELECT key from table1 +PREHOOK: query: -- relative reference, explicit column +CREATE VIEW v7 AS SELECT key from table1 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW v7 AS SELECT key from table1 +POSTHOOK: query: -- relative reference, explicit column +CREATE VIEW v7 AS SELECT key from table1 POSTHOOK: type: CREATEVIEW POSTHOOK: Output: db1@v7 -PREHOOK: query: CREATE VIEW v8 AS SELECT key from db1.table1 +PREHOOK: query: -- absolute reference, explicit column +CREATE VIEW v8 AS SELECT key from db1.table1 PREHOOK: type: CREATEVIEW -POSTHOOK: query: CREATE VIEW v8 AS SELECT key from db1.table1 +POSTHOOK: query: -- absolute reference, explicit column +CREATE VIEW v8 AS SELECT key from db1.table1 POSTHOOK: type: CREATEVIEW POSTHOOK: Output: db1@v8 PREHOOK: query: CREATE DATABASE db2 diff --git ql/src/test/results/clientpositive/windowing.q.out ql/src/test/results/clientpositive/windowing.q.out index 015a950..55fe4d0 100644 --- ql/src/test/results/clientpositive/windowing.q.out +++ ql/src/test/results/clientpositive/windowing.q.out @@ -2,7 +2,8 @@ PREHOOK: query: DROP TABLE part PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE part POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE part( +PREHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -14,7 +15,8 @@ PREHOOK: query: CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part( +POSTHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -33,7 +35,8 @@ PREHOOK: Output: default@part POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part POSTHOOK: type: LOAD POSTHOOK: Output: default@part -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 1. testWindowing +select p_mfgr, p_name, p_size, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 @@ -41,7 +44,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 1. testWindowing +select p_mfgr, p_name, p_size, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 @@ -75,7 +79,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 2. testGroupByWithPartitioning +select p_mfgr, p_name, p_size, min(p_retailprice), rank() over(distribute by p_mfgr sort by p_name)as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, @@ -85,7 +90,8 @@ group by p_mfgr, p_name, p_size PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 2. testGroupByWithPartitioning +select p_mfgr, p_name, p_size, min(p_retailprice), rank() over(distribute by p_mfgr sort by p_name)as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, @@ -120,7 +126,8 @@ Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 -PREHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice), +PREHOOK: query: -- 3. testGroupByHavingWithSWQ +select p_mfgr, p_name, p_size, min(p_retailprice), rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz @@ -130,7 +137,8 @@ having p_size > 0 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice), +POSTHOOK: query: -- 3. testGroupByHavingWithSWQ +select p_mfgr, p_name, p_size, min(p_retailprice), rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz @@ -165,13 +173,15 @@ Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 4. testCount +select p_mfgr, p_name, count(p_size) over(distribute by p_mfgr sort by p_name) as cd from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 4. testCount +select p_mfgr, p_name, count(p_size) over(distribute by p_mfgr sort by p_name) as cd from part POSTHOOK: type: QUERY @@ -203,7 +213,8 @@ Manufacturer#5 almond antique medium spring khaki 2 Manufacturer#5 almond antique sky peru orange 3 Manufacturer#5 almond aquamarine dodger light gainsboro 4 Manufacturer#5 almond azure blanched chiffon midnight 5 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 5. testCountWithWindowingUDAF +select p_mfgr, p_name, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, count(p_size) over(distribute by p_mfgr sort by p_name) as cd, @@ -213,7 +224,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 5. testCountWithWindowingUDAF +select p_mfgr, p_name, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, count(p_size) over(distribute by p_mfgr sort by p_name) as cd, @@ -249,7 +261,8 @@ Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35000000000 Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 -PREHOOK: query: select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +PREHOOK: query: -- 6. testCountInSubQ +select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz from (select p_mfgr, p_name, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, @@ -261,7 +274,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +POSTHOOK: query: -- 6. testCountInSubQ +select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz from (select p_mfgr, p_name, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, @@ -299,7 +313,8 @@ POSTHOOK: Input: default@part 3 3 3 5190.08 -4 4 4 4 6208.18 44 5 5 5 7672.66 -23 -PREHOOK: query: select abc.p_mfgr, abc.p_name, +PREHOOK: query: -- 7. testJoinWithWindowingAndPTF +select abc.p_mfgr, abc.p_name, rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, @@ -311,7 +326,8 @@ order by p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select abc.p_mfgr, abc.p_name, +POSTHOOK: query: -- 7. testJoinWithWindowingAndPTF +select abc.p_mfgr, abc.p_name, rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, abc.p_retailprice, sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row) as s1, @@ -351,13 +367,15 @@ Manufacturer#5 almond antique medium spring khaki 2 2 1611.66 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 1464.48 7672.66 23 -23 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 8. testMixedCaseAlias +select p_mfgr, p_name, p_size, rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 8. testMixedCaseAlias +select p_mfgr, p_name, p_size, rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R from part POSTHOOK: type: QUERY @@ -389,7 +407,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 9. testHavingWithWindowingNoGBY +select p_mfgr, p_name, p_size, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 @@ -397,7 +416,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 9. testHavingWithWindowingNoGBY +select p_mfgr, p_name, p_size, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 @@ -431,7 +451,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 10. testHavingWithWindowingCondRankNoGBY +select p_mfgr, p_name, p_size, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 @@ -439,7 +460,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 10. testHavingWithWindowingCondRankNoGBY +select p_mfgr, p_name, p_size, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 @@ -473,7 +495,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 11. testFirstLast +select p_mfgr,p_name, p_size, sum(p_size) over (distribute by p_mfgr sort by p_mfgr rows between current row and current row) as s2, first_value(p_size) over w1 as f, last_value(p_size, false) over w1 as l @@ -482,7 +505,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr rows between 2 preceding and 2 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 11. testFirstLast +select p_mfgr,p_name, p_size, sum(p_size) over (distribute by p_mfgr sort by p_mfgr rows between current row and current row) as s2, first_value(p_size) over w1 as f, last_value(p_size, false) over w1 as l @@ -517,7 +541,8 @@ Manufacturer#5 almond antique medium spring khaki 6 6 31 46 Manufacturer#5 almond antique sky peru orange 2 2 31 23 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 6 23 Manufacturer#5 almond azure blanched chiffon midnight 23 23 2 23 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 12. testFirstLastWithWhere +select p_mfgr,p_name, p_size, rank() over(distribute by p_mfgr sort by p_mfgr) as r, sum(p_size) over (distribute by p_mfgr sort by p_mfgr rows between current row and current row) as s2, first_value(p_size) over w1 as f, @@ -528,7 +553,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr rows between 2 preceding and 2 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 12. testFirstLastWithWhere +select p_mfgr,p_name, p_size, rank() over(distribute by p_mfgr sort by p_mfgr) as r, sum(p_size) over (distribute by p_mfgr sort by p_mfgr rows between current row and current row) as s2, first_value(p_size) over w1 as f, @@ -544,7 +570,8 @@ Manufacturer#3 almond antique forest lavender goldenrod 14 1 14 17 1 Manufacturer#3 almond antique metallic orange dim 19 1 19 17 45 Manufacturer#3 almond antique misty red olive 1 1 1 14 45 Manufacturer#3 almond antique olive coral navajo 45 1 45 19 45 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 13. testSumWindow +select p_mfgr,p_name, p_size, sum(p_size) over w1 as s1, sum(p_size) over (distribute by p_mfgr sort by p_mfgr rows between current row and current row) as s2 from part @@ -552,7 +579,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr rows between 2 preceding and PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 13. testSumWindow +select p_mfgr,p_name, p_size, sum(p_size) over w1 as s1, sum(p_size) over (distribute by p_mfgr sort by p_mfgr rows between current row and current row) as s2 from part @@ -586,14 +614,16 @@ Manufacturer#5 almond antique medium spring khaki 6 85 6 Manufacturer#5 almond antique sky peru orange 2 108 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 46 Manufacturer#5 almond azure blanched chiffon midnight 23 71 23 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 14. testNoSortClause +select p_mfgr,p_name, p_size, rank() over(distribute by p_mfgr) as r, dense_rank() over(distribute by p_mfgr) as dr from part window w1 as (distribute by p_mfgr rows between 2 preceding and 2 following) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 14. testNoSortClause +select p_mfgr,p_name, p_size, rank() over(distribute by p_mfgr) as r, dense_rank() over(distribute by p_mfgr) as dr from part window w1 as (distribute by p_mfgr rows between 2 preceding and 2 following) @@ -626,7 +656,8 @@ Manufacturer#5 almond antique medium spring khaki 6 1 1 Manufacturer#5 almond antique sky peru orange 2 1 1 Manufacturer#5 almond aquamarine dodger light gainsboro 46 1 1 Manufacturer#5 almond azure blanched chiffon midnight 23 1 1 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 15. testExpressions +select p_mfgr,p_name, p_size, rank() over(distribute by p_mfgr sort by p_mfgr) as r, dense_rank() over(distribute by p_mfgr sort by p_mfgr) as dr, cume_dist() over(distribute by p_mfgr sort by p_mfgr) as cud, @@ -643,7 +674,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 15. testExpressions +select p_mfgr,p_name, p_size, rank() over(distribute by p_mfgr sort by p_mfgr) as r, dense_rank() over(distribute by p_mfgr sort by p_mfgr) as dr, cume_dist() over(distribute by p_mfgr sort by p_mfgr) as cud, @@ -686,7 +718,8 @@ Manufacturer#5 almond antique medium spring khaki 6 1 1 1.0 0.0 1 5 21.6 16.2061 Manufacturer#5 almond antique sky peru orange 2 1 1 1.0 0.0 2 5 21.6 16.206171663906314 1 23 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 1 1 1.0 0.0 2 5 21.6 16.206171663906314 1 23 6 Manufacturer#5 almond azure blanched chiffon midnight 23 1 1 1.0 0.0 3 5 21.6 16.206171663906314 1 23 2 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 16. testMultipleWindows +select p_mfgr,p_name, p_size, rank() over(distribute by p_mfgr sort by p_mfgr) as r, dense_rank() over(distribute by p_mfgr sort by p_mfgr) as dr, cume_dist() over(distribute by p_mfgr sort by p_mfgr) as cud, @@ -698,7 +731,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 16. testMultipleWindows +select p_mfgr,p_name, p_size, rank() over(distribute by p_mfgr sort by p_mfgr) as r, dense_rank() over(distribute by p_mfgr sort by p_mfgr) as dr, cume_dist() over(distribute by p_mfgr sort by p_mfgr) as cud, @@ -736,7 +770,8 @@ Manufacturer#5 almond antique medium spring khaki 6 1 1 1.0 37 8 31 Manufacturer#5 almond azure blanched chiffon midnight 23 1 1 1.0 108 23 2 Manufacturer#5 almond antique blue firebrick mint 31 1 1 1.0 31 31 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 1 1 1.0 85 46 6 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 17. testCountStar +select p_mfgr,p_name, p_size, count(*) over(distribute by p_mfgr sort by p_mfgr ) as c, count(p_size) over(distribute by p_mfgr sort by p_mfgr) as ca, first_value(p_size) over w1 as fvW1 @@ -745,7 +780,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 17. testCountStar +select p_mfgr,p_name, p_size, count(*) over(distribute by p_mfgr sort by p_mfgr ) as c, count(p_size) over(distribute by p_mfgr sort by p_mfgr) as ca, first_value(p_size) over w1 as fvW1 @@ -780,7 +816,8 @@ Manufacturer#5 almond antique medium spring khaki 6 5 5 31 Manufacturer#5 almond antique sky peru orange 2 5 5 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 5 5 6 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 2 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 18. testUDAFs +select p_mfgr,p_name, p_size, sum(p_retailprice) over w1 as s, min(p_retailprice) over w1 as mi, max(p_retailprice) over w1 as ma, @@ -790,7 +827,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 18. testUDAFs +select p_mfgr,p_name, p_size, sum(p_retailprice) over w1 as s, min(p_retailprice) over w1 as mi, max(p_retailprice) over w1 as ma, @@ -826,7 +864,8 @@ Manufacturer#5 almond antique medium spring khaki 6 6208.18 1018.1 1789.69 1552. Manufacturer#5 almond antique sky peru orange 2 7672.66 1018.1 1789.69 1534.532 Manufacturer#5 almond aquamarine dodger light gainsboro 46 5882.970000000001 1018.1 1788.73 1470.7425000000003 Manufacturer#5 almond azure blanched chiffon midnight 23 4271.3099999999995 1018.1 1788.73 1423.7699999999998 -PREHOOK: query: select p_mfgr,p_name, p_size, p_retailprice, +PREHOOK: query: -- 19. testUDAFsWithGBY +select p_mfgr,p_name, p_size, p_retailprice, sum(p_retailprice) over w1 as s, min(p_retailprice) as mi , max(p_retailprice) as ma , @@ -837,7 +876,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, p_retailprice, +POSTHOOK: query: -- 19. testUDAFsWithGBY +select p_mfgr,p_name, p_size, p_retailprice, sum(p_retailprice) over w1 as s, min(p_retailprice) as mi , max(p_retailprice) as ma , @@ -873,7 +913,8 @@ Manufacturer#5 almond antique medium spring khaki 6 1611.66 6208.18 1611.66 1611 Manufacturer#5 almond antique sky peru orange 2 1788.73 7672.66 1788.73 1788.73 1534.532 Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 5882.970000000001 1018.1 1018.1 1470.7425000000003 Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 4271.3099999999995 1464.48 1464.48 1423.7699999999998 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 20. testSTATs +select p_mfgr,p_name, p_size, stddev(p_retailprice) over w1 as sdev, stddev_pop(p_retailprice) over w1 as sdev_pop, collect_set(p_size) over w1 as uniq_size, @@ -885,7 +926,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 20. testSTATs +select p_mfgr,p_name, p_size, stddev(p_retailprice) over w1 as sdev, stddev_pop(p_retailprice) over w1 as sdev_pop, collect_set(p_size) over w1 as uniq_size, @@ -923,7 +965,8 @@ Manufacturer#5 almond antique medium spring khaki 6 316.68049612345885 316.68049 Manufacturer#5 almond antique sky peru orange 2 285.40506298242155 285.40506298242155 [2,23,6,46,31] 81456.04997600002 -0.712858514567818 -3297.2011999999986 Manufacturer#5 almond aquamarine dodger light gainsboro 46 285.43749038756283 285.43749038756283 [2,23,6,46] 81474.56091875004 -0.984128787153391 -4871.028125000002 Manufacturer#5 almond azure blanched chiffon midnight 23 315.9225931564038 315.9225931564038 [2,23,46] 99807.08486666664 -0.9978877469246936 -5664.856666666666 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 21. testDISTs +select p_mfgr,p_name, p_size, histogram_numeric(p_retailprice, 5) over w1 as hist, percentile(p_partkey, 0.5) over w1 as per, row_number() over(distribute by p_mfgr sort by p_mfgr) as rn @@ -932,7 +975,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 21. testDISTs +select p_mfgr,p_name, p_size, histogram_numeric(p_retailprice, 5) over w1 as hist, percentile(p_partkey, 0.5) over w1 as per, row_number() over(distribute by p_mfgr sort by p_mfgr) as rn @@ -967,13 +1011,15 @@ Manufacturer#5 almond antique medium spring khaki 6 [{"x":1018.1,"y":1.0},{"x":1 Manufacturer#5 almond antique sky peru orange 2 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 78486.0 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0}] 60577.5 4 Manufacturer#5 almond azure blanched chiffon midnight 23 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1788.73,"y":1.0}] 78486.0 5 -PREHOOK: query: create view IF NOT EXISTS mfgr_price_view as +PREHOOK: query: -- 22. testViewAsTableInputWithWindowing +create view IF NOT EXISTS mfgr_price_view as select p_mfgr, p_brand, sum(p_retailprice) as s from part group by p_mfgr, p_brand PREHOOK: type: CREATEVIEW -POSTHOOK: query: create view IF NOT EXISTS mfgr_price_view as +POSTHOOK: query: -- 22. testViewAsTableInputWithWindowing +create view IF NOT EXISTS mfgr_price_view as select p_mfgr, p_brand, sum(p_retailprice) as s from part @@ -1012,13 +1058,15 @@ Manufacturer#4 Brand#42 2581.6800000000003 7337.620000000001 Manufacturer#5 Brand#51 1611.66 1611.66 Manufacturer#5 Brand#52 3254.17 4865.83 Manufacturer#5 Brand#53 2806.83 7672.66 -PREHOOK: query: create view IF NOT EXISTS mfgr_brand_price_view as +PREHOOK: query: -- 23. testCreateViewWithWindowingQuery +create view IF NOT EXISTS mfgr_brand_price_view as select p_mfgr, p_brand, sum(p_retailprice) over w1 as s from part window w1 as (distribute by p_mfgr sort by p_mfgr rows between 2 preceding and current row) PREHOOK: type: CREATEVIEW -POSTHOOK: query: create view IF NOT EXISTS mfgr_brand_price_view as +POSTHOOK: query: -- 23. testCreateViewWithWindowingQuery +create view IF NOT EXISTS mfgr_brand_price_view as select p_mfgr, p_brand, sum(p_retailprice) over w1 as s from part @@ -1061,7 +1109,8 @@ Manufacturer#5 Brand#51 3401.3500000000004 Manufacturer#5 Brand#53 5190.08 Manufacturer#5 Brand#53 4418.490000000001 Manufacturer#5 Brand#52 4271.3099999999995 -PREHOOK: query: select p_mfgr, p_name, +PREHOOK: query: -- 24. testLateralViews +select p_mfgr, p_name, lv_col, p_size, sum(p_size) over w1 as s from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p lateral view explode(arr) part_lv as lv_col @@ -1069,7 +1118,8 @@ window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and c PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, +POSTHOOK: query: -- 24. testLateralViews +select p_mfgr, p_name, lv_col, p_size, sum(p_size) over w1 as s from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p lateral view explode(arr) part_lv as lv_col @@ -1155,7 +1205,8 @@ Manufacturer#5 almond aquamarine dodger light gainsboro 3 46 138 Manufacturer#5 almond azure blanched chiffon midnight 1 23 115 Manufacturer#5 almond azure blanched chiffon midnight 2 23 92 Manufacturer#5 almond azure blanched chiffon midnight 3 23 69 -PREHOOK: query: CREATE TABLE part_1( +PREHOOK: query: -- 25. testMultipleInserts3SWQs +CREATE TABLE part_1( p_mfgr STRING, p_name STRING, p_size INT, @@ -1163,7 +1214,8 @@ r INT, dr INT, s DOUBLE) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part_1( +POSTHOOK: query: -- 25. testMultipleInserts3SWQs +CREATE TABLE part_1( p_mfgr STRING, p_name STRING, p_size INT, @@ -1450,7 +1502,8 @@ Manufacturer#5 almond antique medium spring khaki 6 5 5 31 Manufacturer#5 almond antique sky peru orange 2 5 5 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 5 5 6 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 2 -PREHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +PREHOOK: query: -- 26. testGroupByHavingWithSWQAndAlias +select p_mfgr, p_name, p_size, min(p_retailprice) as mi, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz @@ -1460,7 +1513,8 @@ having p_size > 0 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +POSTHOOK: query: -- 26. testGroupByHavingWithSWQAndAlias +select p_mfgr, p_name, p_size, min(p_retailprice) as mi, rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr, p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz @@ -1516,7 +1570,8 @@ Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 -PREHOOK: query: select p_mfgr,p_name, p_size, +PREHOOK: query: -- 27. testMultipleRangeWindows +select p_mfgr,p_name, p_size, sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 from part @@ -1524,7 +1579,8 @@ window w1 as (rows between 2 preceding and 2 following) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr,p_name, p_size, +POSTHOOK: query: -- 27. testMultipleRangeWindows +select p_mfgr,p_name, p_size, sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 from part @@ -1579,13 +1635,15 @@ Manufacturer#5 almond antique medium spring khaki 6 8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 54 Manufacturer#5 almond antique blue firebrick mint 31 54 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 46 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 28. testPartOrderInUDAFInvoke +select p_mfgr, p_name, p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 28. testPartOrderInUDAFInvoke +select p_mfgr, p_name, p_size, sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s from part POSTHOOK: type: QUERY @@ -1638,14 +1696,16 @@ Manufacturer#5 almond antique medium spring khaki 6 85 Manufacturer#5 almond antique sky peru orange 2 108 Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 Manufacturer#5 almond azure blanched chiffon midnight 23 71 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 29. testPartOrderInWdwDef +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s from part window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 29. testPartOrderInWdwDef +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s from part window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) @@ -1699,7 +1759,8 @@ Manufacturer#5 almond antique medium spring khaki 6 85 Manufacturer#5 almond antique sky peru orange 2 108 Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 Manufacturer#5 almond azure blanched chiffon midnight 23 71 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 30. testDefaultPartitioningSpecRules +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s, sum(p_size) over w2 as s2 from part @@ -1708,7 +1769,8 @@ window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 30. testDefaultPartitioningSpecRules +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s, sum(p_size) over w2 as s2 from part @@ -1764,7 +1826,8 @@ Manufacturer#5 almond antique medium spring khaki 6 85 37 Manufacturer#5 almond antique sky peru orange 2 108 39 Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 85 Manufacturer#5 almond azure blanched chiffon midnight 23 71 108 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 31. testWindowCrossReference +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s1, sum(p_size) over w2 as s2 from part @@ -1773,7 +1836,8 @@ window w1 as (partition by p_mfgr order by p_mfgr rows between 2 preceding and 2 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 31. testWindowCrossReference +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s1, sum(p_size) over w2 as s2 from part @@ -1829,7 +1893,8 @@ Manufacturer#5 almond antique medium spring khaki 6 85 85 Manufacturer#5 almond antique sky peru orange 2 108 108 Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 77 Manufacturer#5 almond azure blanched chiffon midnight 23 71 71 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 32. testWindowInheritance +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s1, sum(p_size) over w2 as s2 from part @@ -1838,7 +1903,8 @@ window w1 as (partition by p_mfgr order by p_mfgr rows between 2 preceding and 2 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 32. testWindowInheritance +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s1, sum(p_size) over w2 as s2 from part @@ -1894,7 +1960,8 @@ Manufacturer#5 almond antique medium spring khaki 6 85 37 Manufacturer#5 almond antique sky peru orange 2 108 39 Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 85 Manufacturer#5 almond azure blanched chiffon midnight 23 71 108 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 33. testWindowForwardReference +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s1, sum(p_size) over w2 as s2, sum(p_size) over w3 as s3 @@ -1905,7 +1972,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr rows between 2 preceding and 2 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 33. testWindowForwardReference +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s1, sum(p_size) over w2 as s2, sum(p_size) over w3 as s3 @@ -1963,7 +2031,8 @@ Manufacturer#5 almond antique medium spring khaki 6 85 37 37 Manufacturer#5 almond antique sky peru orange 2 108 39 39 Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 85 85 Manufacturer#5 almond azure blanched chiffon midnight 23 71 108 108 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 34. testWindowDefinitionPropagation +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s1, sum(p_size) over w2 as s2, sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 @@ -1974,7 +2043,8 @@ window w1 as (distribute by p_mfgr sort by p_mfgr rows between 2 preceding and 2 PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 34. testWindowDefinitionPropagation +select p_mfgr, p_name, p_size, sum(p_size) over w1 as s1, sum(p_size) over w2 as s2, sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 @@ -2032,14 +2102,16 @@ Manufacturer#5 almond antique medium spring khaki 6 85 37 85 Manufacturer#5 almond antique sky peru orange 2 108 39 108 Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 85 77 Manufacturer#5 almond azure blanched chiffon midnight 23 71 108 71 -PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size, +PREHOOK: query: -- 35. testDistinctWithWindowing +select DISTINCT p_mfgr, p_name, p_size, sum(p_size) over w1 as s from part window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size, +POSTHOOK: query: -- 35. testDistinctWithWindowing +select DISTINCT p_mfgr, p_name, p_size, sum(p_size) over w1 as s from part window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) @@ -2092,13 +2164,15 @@ Manufacturer#5 almond antique medium spring khaki 6 85 Manufacturer#5 almond antique sky peru orange 2 108 Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 Manufacturer#5 almond azure blanched chiffon midnight 23 71 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 36. testRankWithPartitioning +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name ) as r from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 36. testRankWithPartitioning +select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name ) as r from part POSTHOOK: type: QUERY @@ -2151,7 +2225,8 @@ Manufacturer#5 almond antique medium spring khaki 6 2 Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 37. testPartitioningVariousForms +select p_mfgr, p_name, p_size, sum(p_retailprice) over (partition by p_mfgr order by p_mfgr) as s1, min(p_retailprice) over (partition by p_mfgr) as s2, max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, @@ -2161,7 +2236,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 37. testPartitioningVariousForms +select p_mfgr, p_name, p_size, sum(p_retailprice) over (partition by p_mfgr order by p_mfgr) as s1, min(p_retailprice) over (partition by p_mfgr) as s2, max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, @@ -2218,7 +2294,8 @@ Manufacturer#5 almond antique medium spring khaki 6 7672.66 1018.1 1789.69 1534. Manufacturer#5 almond antique sky peru orange 2 7672.66 1018.1 1789.69 1534.532 5 Manufacturer#5 almond aquamarine dodger light gainsboro 46 7672.66 1018.1 1789.69 1534.532 5 Manufacturer#5 almond azure blanched chiffon midnight 23 7672.66 1018.1 1789.69 1534.532 5 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 38. testPartitioningVariousForms2 +select p_mfgr, p_name, p_size, sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row) as s1, min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 @@ -2226,7 +2303,8 @@ from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 38. testPartitioningVariousForms2 +select p_mfgr, p_name, p_size, sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row) as s1, min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 @@ -2281,13 +2359,15 @@ Manufacturer#5 almond antique medium spring khaki 6 1611.66 1611.66 1611.66 Manufacturer#5 almond antique sky peru orange 2 1788.73 1788.73 1788.73 Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 1018.1 1018.1 Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 1464.48 1464.48 -PREHOOK: query: select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +PREHOOK: query: -- 39. testUDFOnOrderCols +select p_mfgr, p_type, substr(p_type, 2) as short_ptype, rank() over (partition by p_mfgr order by substr(p_type, 2)) as r from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +POSTHOOK: query: -- 39. testUDFOnOrderCols +select p_mfgr, p_type, substr(p_type, 2) as short_ptype, rank() over (partition by p_mfgr order by substr(p_type, 2)) as r from part POSTHOOK: type: QUERY @@ -2340,13 +2420,15 @@ Manufacturer#5 ECONOMY BURNISHED STEEL CONOMY BURNISHED STEEL 2 Manufacturer#5 MEDIUM BURNISHED TIN EDIUM BURNISHED TIN 3 Manufacturer#5 SMALL PLATED BRASS MALL PLATED BRASS 4 Manufacturer#5 STANDARD BURNISHED TIN TANDARD BURNISHED TIN 5 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 40. testNoBetweenForRows +select p_mfgr, p_name, p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding) as s1 from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 40. testNoBetweenForRows +select p_mfgr, p_name, p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding) as s1 from part POSTHOOK: type: QUERY @@ -2399,13 +2481,15 @@ Manufacturer#5 almond antique medium spring khaki 6 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 7672.66 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 41. testNoBetweenForRange +select p_mfgr, p_name, p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding) as s1 from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 41. testNoBetweenForRange +select p_mfgr, p_name, p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding) as s1 from part POSTHOOK: type: QUERY @@ -2458,13 +2542,15 @@ Manufacturer#5 almond antique medium spring khaki 6 3400.3900000000003 Manufacturer#5 almond azure blanched chiffon midnight 23 4864.870000000001 Manufacturer#5 almond antique blue firebrick mint 31 6654.560000000001 Manufacturer#5 almond aquamarine dodger light gainsboro 46 7672.660000000002 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 42. testUnboundedFollowingForRows +select p_mfgr, p_name, p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following) as s1 from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 42. testUnboundedFollowingForRows +select p_mfgr, p_name, p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following) as s1 from part POSTHOOK: type: QUERY @@ -2517,13 +2603,15 @@ Manufacturer#5 almond antique medium spring khaki 6 5882.970000000001 Manufacturer#5 almond antique sky peru orange 2 4271.3099999999995 Manufacturer#5 almond aquamarine dodger light gainsboro 46 2482.58 Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 -PREHOOK: query: select p_mfgr, p_name, p_size, +PREHOOK: query: -- 43. testUnboundedFollowingForRange +select p_mfgr, p_name, p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following) as s1 from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size, +POSTHOOK: query: -- 43. testUnboundedFollowingForRange +select p_mfgr, p_name, p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following) as s1 from part POSTHOOK: type: QUERY @@ -2576,14 +2664,16 @@ Manufacturer#5 almond antique medium spring khaki 6 5883.93 Manufacturer#5 almond azure blanched chiffon midnight 23 4272.27 Manufacturer#5 almond antique blue firebrick mint 31 2807.79 Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 -PREHOOK: query: select p_name, p_retailprice, +PREHOOK: query: -- 44. testOverNoPartitionSingleAggregate +select p_name, p_retailprice, avg(p_retailprice) over() from part order by p_name PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_name, p_retailprice, +POSTHOOK: query: -- 44. testOverNoPartitionSingleAggregate +select p_name, p_retailprice, avg(p_retailprice) over() from part order by p_name diff --git ql/src/test/results/clientpositive/windowing_columnPruning.q.out ql/src/test/results/clientpositive/windowing_columnPruning.q.out index 2902b63..5856417 100644 --- ql/src/test/results/clientpositive/windowing_columnPruning.q.out +++ ql/src/test/results/clientpositive/windowing_columnPruning.q.out @@ -2,7 +2,8 @@ PREHOOK: query: DROP TABLE part PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE part POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE part( +PREHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -14,7 +15,8 @@ PREHOOK: query: CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part( +POSTHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -33,13 +35,15 @@ PREHOOK: Output: default@part POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part POSTHOOK: type: LOAD POSTHOOK: Output: default@part -PREHOOK: query: select p_size, +PREHOOK: query: -- 1. testQueryLevelPartitionColsNotInSelect +select p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_size, +POSTHOOK: query: -- 1. testQueryLevelPartitionColsNotInSelect +select p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 from part POSTHOOK: type: QUERY @@ -71,13 +75,15 @@ POSTHOOK: Input: default@part 2 5190.08 46 6208.18 23 7672.66 -PREHOOK: query: select p_size, +PREHOOK: query: -- 2. testWindowPartitionColsNotInSelect +select p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_size, +POSTHOOK: query: -- 2. testWindowPartitionColsNotInSelect +select p_size, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 from part POSTHOOK: type: QUERY @@ -109,13 +115,15 @@ POSTHOOK: Input: default@part 2 5190.08 46 6208.18 23 7672.66 -PREHOOK: query: select p_mfgr, +PREHOOK: query: -- 3. testHavingColNotInSelect +select p_mfgr, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 from part PREHOOK: type: QUERY PREHOOK: Input: default@part #### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, +POSTHOOK: query: -- 3. testHavingColNotInSelect +select p_mfgr, sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 from part POSTHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/windowing_expressions.q.out ql/src/test/results/clientpositive/windowing_expressions.q.out index 9401829..e510408 100644 --- ql/src/test/results/clientpositive/windowing_expressions.q.out +++ ql/src/test/results/clientpositive/windowing_expressions.q.out @@ -2,7 +2,8 @@ PREHOOK: query: DROP TABLE part PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE part POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE part( +PREHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -14,7 +15,8 @@ PREHOOK: query: CREATE TABLE part( p_comment STRING ) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE part( +POSTHOOK: query: -- data setup +CREATE TABLE part( p_partkey INT, p_name STRING, p_mfgr STRING, @@ -660,9 +662,11 @@ Manufacturer#5 1241.29 Manufacturer#5 1424.0900000000001 Manufacturer#5 1515.25 Manufacturer#5 1534.532 -PREHOOK: query: create table t1 (a1 int, b1 string) +PREHOOK: query: -- multi table insert test +create table t1 (a1 int, b1 string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table t1 (a1 int, b1 string) +POSTHOOK: query: -- multi table insert test +create table t1 (a1 int, b1 string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@t1 PREHOOK: query: create table t2 (a1 int, b1 string) diff --git ql/src/test/results/clientpositive/windowing_rank.q.out ql/src/test/results/clientpositive/windowing_rank.q.out index a38ccc0..f72e734 100644 --- ql/src/test/results/clientpositive/windowing_rank.q.out +++ ql/src/test/results/clientpositive/windowing_rank.q.out @@ -471,7 +471,9 @@ ulysses steinbeck 0.0 victor van buren 0.3333333333333333 sarah carson 0.6666666666666666 priscilla nixon 1.0 -PREHOOK: query: select ts, dec, rnk +PREHOOK: query: -- If following tests fail, look for the comments in class PTFPPD::process() + +select ts, dec, rnk from (select ts, dec, rank() over (partition by ts) as rnk @@ -485,7 +487,9 @@ where rnk = 1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@over10k #### A masked pattern was here #### -POSTHOOK: query: select ts, dec, rnk +POSTHOOK: query: -- If following tests fail, look for the comments in class PTFPPD::process() + +select ts, dec, rnk from (select ts, dec, rank() over (partition by ts) as rnk